In [11]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
train_spread = pd.read_csv("train_spread.csv", index_col=0)
train_z_score = pd.read_csv("train_z_score.csv", index_col=0) # 2012/01/30+
train_spread = train_spread.iloc[18:,:] # Resccale to 2012/01/30+
# train_spread

In [5]:
def objective(params, spread, z_score, pair_name):
    entryZscore, exitZscore = params
    PnL = pd.DataFrame({f"{pair_name}": z_score})
    PnL.index = pd.to_datetime(z_score.index)
    PnL['long entry'] = [x < (-entryZscore) and z_score[-1] > (-entryZscore) for x in z_score]
    PnL['long exit'] = [x > (-exitZscore) and z_score[-1] < (-exitZscore) for x in z_score]
    PnL['num units long'] = np.zeros(len(PnL))
    PnL.loc[PnL['long entry'],'num units long'] = 1 
    PnL.loc[PnL['long exit'],'num units long'] = 0 
        
    PnL['short entry'] = [x > (-entryZscore) and z_score[-1] < (-entryZscore) for x in z_score]
    PnL['short exit'] = [x < (-exitZscore) and z_score[-1] > (-exitZscore) for x in z_score]
    PnL['num units short'] = np.zeros(len(PnL))
    PnL.loc[PnL['short entry'],'num units short'] = -1
    PnL.loc[PnL['short exit'],'num units short'] = 0
    
    PnL['numUnits'] = PnL['num units long'] + PnL['num units short']
    PnL['spread pct ch'] = (spread - spread.shift(1)).values
    PnL['port rets'] = PnL['spread pct ch'] * PnL['numUnits'].shift(1)
    PnL['cum rets'] = PnL['port rets'].cumsum()
    PnL['cum rets'] = PnL['cum rets'] + 1
    end_val = PnL['cum rets'].iat[-1]
    return(-end_val)

In [6]:
def backtest(spread, z_score, pair_name):
    result = minimize(objective, [0.7, -0.05], args=(spread, z_score, pair_name), bounds=[(-1.5, 1.5), (-1.5, 1.5)], method='Nelder-Mead')
    best_entryZscore, best_exitZscore = result.x
    best_return = -result.fun
    return best_return, best_entryZscore, best_exitZscore

In [8]:
optim_table = pd.DataFrame()
optim_table.index = train_z_score.columns
optim_table["MaxProfit"] = np.zeros(len(train_z_score.columns))
optim_table["entryZscore"] = np.zeros(len(train_z_score.columns))
optim_table["exitZscore"] = np.zeros(len(train_z_score.columns))

for pair in range(train_z_score.shape[1]):
    pair_name = train_z_score.columns[pair]
    z_score = train_z_score[pair_name]
    spread = train_spread.iloc[:,pair]
    optim_table.iloc[pair,:] = backtest(spread, z_score, pair_name)

optim_table

Unnamed: 0,MaxProfit,entryZscore,exitZscore
ORCL GOOGL,117.367601,0.700000,-0.050000
USLV GLD,508.791784,0.697812,-0.050234
USLV AAPL,344.948694,0.700000,-0.050000
USLV GOOGL,473.927218,0.019687,-0.074297
USLV AMD,170.490896,0.752500,-0.048125
...,...,...,...
CMCSA VDC,339.956116,-0.070000,-0.077500
CMCSA KXI,88.793036,0.564922,-0.054824
CMCSA VHT,304.897946,0.259219,-0.066680
CMCSA VNQ,174.113230,0.633281,-0.053633


In [9]:
print(optim_table["entryZscore"].describe())
print(optim_table["exitZscore"].describe())

count    73.000000
mean      0.496197
std       0.290159
min      -0.175000
25%       0.259219
50%       0.634375
75%       0.713125
max       0.789688
Name: entryZscore, dtype: float64
count    73.000000
mean     -0.057878
std       0.010322
min      -0.082500
25%      -0.066680
50%      -0.052344
75%      -0.050000
max      -0.047266
Name: exitZscore, dtype: float64


In [10]:
optim_table.to_csv('z_score_optim.csv', index=True)