In [9]:
from TradingAgent import PairsTradingAgent
import numpy as np
import pandas as pd
from utils import compute_spread, zscore_normalization
from copy import deepcopy as dc
from tqdm import tqdm
import utils

In [3]:
raw_data = pd.read_csv("../data/raw_data.csv", index_col=0)
raw_data.index = pd.to_datetime(raw_data.index)
raw_data.head()

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,40.689999,40.91,26.3375,57.610001,42.93,22.950001,101.830002,91.970001,54.439999,35.84,...,124.980133,29.83,68.769997,35.700001,77.459999,36.080002,51.912292,98.844658,66.489998,47.27
2016-01-05,40.549999,40.52,25.6775,57.369999,42.919998,23.033333,102.360001,92.339996,54.040001,36.240002,...,125.839996,29.9,70.07,36.060001,78.120003,36.07,51.78289,100.902916,64.82,48.009998
2016-01-06,40.73,41.23,25.174999,57.380001,42.560001,23.07,102.160004,91.019997,51.740002,35.360001,...,125.839996,29.24,66.440002,36.439999,77.470001,35.619999,51.416248,101.339806,62.23,48.02
2016-01-07,39.0,40.450001,24.112499,57.209999,41.540001,23.046667,99.160004,89.110001,50.419998,34.52,...,114.949997,28.16,60.189999,36.580002,76.230003,34.700001,49.662113,99.009712,59.41,46.560001
2016-01-08,38.59,40.369999,24.24,55.650002,40.669998,22.806667,98.199997,87.849998,49.98,34.389999,...,116.620003,27.9,57.740002,36.18,74.690002,34.369999,48.98634,98.592232,59.25,45.880001


In [8]:
def grid_search(trading_agent: PairsTradingAgent, interval: float, 
                if_fixed_capital:bool, trading_capital=1.0, trading_capital_ratio=0.05):
    best_return = -np.inf
    best_entry_threshold = 0
    for entry_threshold in np.arange(0.8, 2.0, interval):   
        backtest_return_series = trading_agent.trading_simulation(entry=entry_threshold, exit=0.5, if_fixed_capital=if_fixed_capital,
                                                                  trading_capital=trading_capital, trading_capital_ratio=trading_capital_ratio)
        backtest_return = backtest_return_series.iloc[-1].values[0]
        if backtest_return > best_return:
            best_return = backtest_return
            best_entry_threshold = entry_threshold
    
    return best_entry_threshold, best_return

In [2]:
trading_pairs = pd.read_csv("../cluster_find_trading_pairs/trading_pairs.csv", index_col=0)
# trading_pairs.insert(loc=len(trading_pairs.columns), column="best_threshold", value=None)
# trading_pairs.insert(loc=len(trading_pairs.columns), column="best_return", value=None)
# trading_pairs.head()

In [10]:
for ind in tqdm(trading_pairs.index):
    leg1 = trading_pairs.loc[ind, "leg1"]
    leg2 = trading_pairs.loc[ind, "leg2"]

    spread = compute_spread(raw_data, leg1, leg2)
    spread = zscore_normalization(spread)
    
    trading_agent = PairsTradingAgent(raw_data.loc[:, leg1], raw_data.loc[:, leg2], spread, initial_capital=1.0)
    best_entry_threshold, best_return = grid_search(trading_agent, interval=0.05, if_fixed_capital=True, trading_capital=1.0)
    
    trading_pairs.loc[ind, "best_threshold"] = best_entry_threshold
    trading_pairs.loc[ind, "best_return"] = best_return
    # trading_pairs.to_csv("./trading_pairs_potential_fixed_capital.csv")
    # break

100%|██████████| 2069/2069 [4:05:27<00:00,  7.12s/it]  


In [4]:
trading_pairs = pd.read_csv("./trading_pairs_potential_fixed_capital.csv", index_col=0)
trading_pairs.head()

Unnamed: 0,leg1,leg2,coint_t,coint_pvalue,hedge_ratio,adf_pvalue,hurst_exponent,half_life,zero_cross,best_threshold,best_return
0,A,AVY,-3.992017,0.007372,1.245725,0.001497,0.39768,37.003035,117,0.8,2.695672
1,A,DGX,-4.20536,0.00358,0.561827,0.000161,0.422547,36.591205,96,0.8,3.297833
2,A,MS,-3.843708,0.011842,0.597435,0.002614,0.407522,50.032469,78,0.8,3.320636
3,A,MTD,-3.81459,0.012961,10.115935,0.001257,0.418362,40.976883,105,0.8,2.307756
4,A,NDAQ,-4.611753,0.000798,0.395134,0.000121,0.373605,29.043458,90,1.15,3.236758


In [5]:
sorted_trading_pairs = trading_pairs.sort_values("best_return", ascending=False)
sorted_trading_pairs.head(n=10)

Unnamed: 0,leg1,leg2,coint_t,coint_pvalue,hedge_ratio,adf_pvalue,hurst_exponent,half_life,zero_cross,best_threshold,best_return
901,DUK,ENPH,-3.922743,0.009225,7.188884,0.006725,0.409722,52.154031,111,0.95,10.517144
849,DLTR,ENPH,-3.673073,0.019838,2.724662,0.018532,0.407459,70.402656,59,0.8,10.195229
742,CZR,ETSY,-4.411852,0.001703,2.288153,0.006295,0.44065,57.508409,60,1.6,8.490281
744,CZR,NKE,-4.245217,0.003112,1.159681,0.013336,0.407289,65.688975,38,1.2,8.342575
319,APTV,ETSY,-4.130476,0.004638,1.955878,0.001353,0.376077,46.333395,121,0.9,7.031082
741,CZR,EPAM,-4.372294,0.00197,5.230379,0.014134,0.45638,74.706082,40,1.55,6.760948
1287,HOLX,TSLA,-3.800707,0.013526,6.041251,0.013859,0.449642,64.92793,87,0.85,6.687967
1045,ENPH,JNJ,-4.05591,0.005967,0.186662,0.000147,0.405688,39.557506,83,0.9,6.653791
159,ALGN,CZR,-5.141951,9e-05,0.147969,0.000114,0.373032,34.104557,92,1.7,6.38809
970,DXCM,SPGI,-3.792384,0.013876,2.452707,0.002625,0.413481,48.201554,55,0.85,6.11824


In [6]:
top_trading_pairs = sorted_trading_pairs[:300]
top_trading_pairs.to_csv("./top_trading_pairs.csv")

In [7]:
top_trading_paris = pd.read_csv("./top_trading_pairs.csv", index_col=0)
top_trading_paris.index = range(len(top_trading_paris))
top_trading_paris.head(n=10)

Unnamed: 0,leg1,leg2,coint_t,coint_pvalue,hedge_ratio,adf_pvalue,hurst_exponent,half_life,zero_cross,best_threshold,best_return
0,DUK,ENPH,-3.922743,0.009225,7.188884,0.006725,0.409722,52.154031,111,0.95,10.517144
1,DLTR,ENPH,-3.673073,0.019838,2.724662,0.018532,0.407459,70.402656,59,0.8,10.195229
2,CZR,ETSY,-4.411852,0.001703,2.288153,0.006295,0.44065,57.508409,60,1.6,8.490281
3,CZR,NKE,-4.245217,0.003112,1.159681,0.013336,0.407289,65.688975,38,1.2,8.342575
4,APTV,ETSY,-4.130476,0.004638,1.955878,0.001353,0.376077,46.333395,121,0.9,7.031082
5,CZR,EPAM,-4.372294,0.00197,5.230379,0.014134,0.45638,74.706082,40,1.55,6.760948
6,HOLX,TSLA,-3.800707,0.013526,6.041251,0.013859,0.449642,64.92793,87,0.85,6.687967
7,ENPH,JNJ,-4.05591,0.005967,0.186662,0.000147,0.405688,39.557506,83,0.9,6.653791
8,ALGN,CZR,-5.141951,9e-05,0.147969,0.000114,0.373032,34.104557,92,1.7,6.38809
9,DXCM,SPGI,-3.792384,0.013876,2.452707,0.002625,0.413481,48.201554,55,0.85,6.11824


In [10]:
spread_list = []
for ind in top_trading_paris.index:
    leg1 = top_trading_paris.loc[ind, "leg1"]
    leg2 = top_trading_paris.loc[ind, "leg2"]
    spread = utils.compute_spread(raw_data, leg1, leg2)
    spread = utils.zscore_normalization(spread)
    spread = pd.DataFrame(spread, columns=[f"{leg1}-{leg2}"])
    spread_list.append(spread)

spread_df = pd.concat(spread_list, axis=1)
spread_df.index = raw_data.index
spread_df.index = pd.to_datetime(spread_df.index)
spread_df.to_csv("./spread.csv")