# Statistical Arbitrage — Pairs Trading

We estimate a hedge ratio via OLS, compute spread, z-score, generate signals, simulate PnL, and visualize.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
np.random.seed(1)
N=3000
x=np.cumsum(np.random.randn(N)*0.3)+100
y=0.7*x+np.cumsum(np.random.randn(N)*0.05)


## 1. Hedge ratio (OLS)

In [None]:
X=np.vstack([x, np.ones_like(x)]).T
beta,res,_,_=np.linalg.lstsq(X,y,rcond=None)
hedge=beta[0]
print('hedge ratio:', hedge)


## 2. Spread & z-score

In [None]:
spread=y-hedge*x
w=200
mu=pd.Series(spread).rolling(w).mean()
sig=pd.Series(spread).rolling(w).std().replace(0,1e-9)
z=(pd.Series(spread)-mu)/sig


## 3. Signals
Enter when |z|>2, exit when |z|<0.5.

In [None]:
enter_long=(z<-2).astype(int)
enter_short=(z> 2).astype(int)
position=np.zeros(N)
in_pos=0
for t in range(N):
    if in_pos==0 and enter_long.iloc[t]==1: in_pos=1
    elif in_pos==0 and enter_short.iloc[t]==1: in_pos=-1
    elif in_pos!=0 and abs(z.iloc[t])<0.5: in_pos=0
    position[t]=in_pos


## 4. PnL simulation
We long spread when z<-2, short spread when z>2.

In [None]:
ret_spread=np.diff(spread,prepend=spread[0])
pnl=np.cumsum(position*ret_spread)


## 5. Risk metrics

In [None]:
rets=np.diff(pnl,prepend=pnl[0])
sh=np.mean(rets)/np.std(rets+1e-9)*np.sqrt(252*6*60)
print('Final PnL:', pnl[-1],' Sharpe~', sh)


## 6. Plots

In [None]:
plt.figure(); plt.plot(spread,label='spread'); plt.plot(mu,label='mu'); plt.fill_between(range(N),(mu-2*sig),(mu+2*sig),alpha=0.2); plt.legend(); plt.title('Spread & bands');
plt.figure(); plt.plot(z); plt.title('Z-score');
plt.figure(); plt.plot(pnl); plt.title('PnL'); plt.show()


## 7. Robustness: rolling hedge ratio

In [None]:
win=500
hedges=[]
for i in range(N-win):
    X=np.vstack([x[i:i+win], np.ones(win)]).T
    beta,_,_,_=np.linalg.lstsq(X,y[i:i+win],rcond=None)
    hedges.append(beta[0])
plt.figure(); plt.plot(hedges); plt.title('Rolling hedge ratio'); plt.show()


## 8. Stationarity check (ADF surrogate)
We approximate an ADF by checking variance ratio.

In [None]:
vr=np.var(spread[:N//2])/np.var(spread[N//2:]+1e-9)
print('variance ratio ~', vr)


## 9. Slippage/fees sensitivity

In [None]:
fee=0.0001
turnover=np.sum(np.abs(np.diff(position)))
cost=turnover*fee
print('turnover',turnover,' cost',cost,' pnl_net', pnl[-1]-cost)


## 10. Takeaways