In [1]:
# Using the CADF test for cointegration

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts
import statsmodels.tsa.vector_ar.vecm as vm

df=pd.read_csv('data\inputData_EWA_EWC_IGE.csv')
df['Date']=pd.to_datetime(df['Date'],  format='%Y%m%d').dt.date # remove HH:MM:SS
df.set_index('Date', inplace=True)


df.plot()
df.plot.scatter(x='EWA', y='EWC')
plt.xlabel('EWA share price')
plt.ylabel('EWC share price')
plt.xlabel('April 4,2006, to April 9,2012')
plt.ylabel('Share price $')
plt.show()
results=sm.ols(formula="EWC ~ EWA", data=df[['EWA', 'EWC']]).fit()
print(results.params)
hedgeRatio=results.params[1]
print('hedgeRatio=%f' % hedgeRatio)

(df['EWC']-hedgeRatio*df['EWA']).plot()
plt.xlabel('Stationarity of Residuals of Linear ')
plt.ylabel('EWC - hedgeRatio*EWA')
plt.show()

print(ts.coint(df['EWA'], df['EWC']))

# cadf test
coint_t, pvalue, crit_value=ts.coint(df['EWA'], df['EWC'])
print('t-statistic=%f' % coint_t)
print('pvalue=%f' % pvalue)
print(crit_value)

# Johansen test
result=vm.coint_johansen(df[['EWA', 'EWC']].values, det_order=0, k_ar_diff=1)
print('Johansen test')
print(result.lr1)
print(result.cvt)
print(result.lr2)
print(result.cvm)

# Add IGE for Johansen test
result=vm.coint_johansen(df.values, det_order=0, k_ar_diff=1)
print('Add IGE for Johansen test')
print(result.lr1)
print(result.cvt)
print(result.lr2)
print(result.cvm)

print('eigenvalues')
print(result.eig)  # eigenvalues
print('eigenvectors')
print(result.evec)  # eigenvectors

yport=pd.DataFrame(np.dot(df.values, result.evec[:, 0])) #  (net) market value of portfolio

ylag=yport.shift()
deltaY=yport-ylag
df2=pd.concat([ylag, deltaY], axis=1)
df2.columns=['ylag', 'deltaY']
regress_results=sm.ols(formula="deltaY ~ ylag", data=df2).fit() # Note this can deal with NaN in top row
print(regress_results.params)

halflife=-np.log(2)/regress_results.params['ylag']
print('halflife=%f days' % halflife)

#  Apply a simple linear mean reversion strategy to EWA-EWC-IGE
lookback=np.round(halflife).astype(int) #  setting lookback to the halflife found above
numUnits =-(yport-yport.rolling(lookback).mean())/yport.rolling(lookback).std() # capital invested in portfolio in dollars.  movingAvg and movingStd are functions from epchan.com/book2
positions=pd.DataFrame(np.dot(numUnits.values, np.expand_dims(result.evec[:, 0], axis=1).T)*df.values) # results.evec(:, 1)' can be viewed as the capital allocation, while positions is the dollar capital in each ETF.
pnl=np.sum((positions.shift().values)*(df.pct_change().values), axis=1) # daily P&L of the strategy
ret=pnl/np.sum(np.abs(positions.shift()), axis=1)
(np.cumprod(1+ret)-1).plot()
print('APR=%f Sharpe=%f' % (np.prod(1+ret)**(252/len(ret))-1, np.sqrt(252)*np.mean(ret)/np.std(ret)))
# APR=0.125739 Sharpe=1.391310


<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Intercept    6.411331
EWA          0.962429
dtype: float64
hedgeRatio=0.962429


<Figure size 640x480 with 1 Axes>

(-3.063528097618719, 0.09586561374353242, array([-3.90376106, -3.34020915, -3.04728056]))
t-statistic=-3.063528
pvalue=0.095866
[-3.90376106 -3.34020915 -3.04728056]
Johansen test
[19.98321869  3.98276124]
[[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
[16.00045745  3.98276124]
[[12.2971 14.2639 18.52  ]
 [ 2.7055  3.8415  6.6349]]
Add IGE for Johansen test
[34.42862022 17.53171895  4.47102054]
[[27.0669 29.7961 35.4628]
 [13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
[16.89690127 13.06069841  4.47102054]
[[18.8928 21.1314 25.865 ]
 [12.2971 14.2639 18.52  ]
 [ 2.7055  3.8415  6.6349]]
eigenvalues
[0.01121626 0.00868086 0.00298021]
eigenvectors
[[ 0.7599635  -0.11204898  0.0789828 ]
 [-1.04602749 -0.5796762   0.26467204]
 [ 0.22330592  0.53159644 -0.09515547]]
Intercept   -0.115768
ylag        -0.030586
dtype: float64
halflife=22.662578 days
APR=0.125739 Sharpe=1.402653


In [2]:
regress_results.params['ylag']

-0.030585539965171365

In [4]:
regress_results.params

Intercept   -0.115768
ylag        -0.030586
dtype: float64