In [1]:
import numpy as np
from numpy.linalg import norm
import pandas as pd
import itertools
import random
from matplotlib import pyplot as plt
from pair_trading_foundations.data_generation import ExecutePairTrading, generate_training_data
random.seed(23)
import cProfile
import pstats

In [2]:
data = pd.read_csv('Data/sp500_full_20181231_to_20231229.csv')

In [3]:
value_count_tb = data[['Ticker']].groupby('Ticker').size().reset_index()
value_count_tb.columns = ['Ticker', 'Count']
stock_to_keep = value_count_tb['Ticker'][value_count_tb.Count==value_count_tb.Count.max()]
data = data[data.Ticker.isin(stock_to_keep)]

sampled_tickers = random.sample(list(stock_to_keep.values), 200)

data_tech = data[data['GICS Sector'].isin(['Information Technology'])]
data_sampled = data[data['Ticker'].isin(sampled_tickers)]

In [None]:
# data_sampled = data_sampled[['Date', 'Ticker','Close', 'GICS Sector', 'GICS Sub-Industry']]

In [None]:
data_sampled.shape[0]/200

In [None]:
data_sampled

original: 1259*200 , 10

option 1: 200 , (1259 * 10)
option 2: 1259 , (200 * 10)

In [None]:
1259

In [None]:
1259 - 500 - 120

In [None]:
len(set(data_sampled.Date))

# Get the tech sector only

In [None]:
with cProfile.Profile() as pr:
    recorded_info_tb, features_tb, labels_tb = generate_training_data(
        data=data_tech,
        training_len=500,
        test_len=120,
        sample_size_per_pair=10
    )

stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)
# Now you have two options, either print the data or save it as a file
stats.print_stats() # Print The Stats

In [None]:
stats.dump_stats("logs/path.prof") # Saves the data

In [None]:
combined = pd.merge(recorded_info_tb, features_tb, how='inner', on= ['ticker1','ticker2', 'target_date'])
combined = pd.merge(combined,labels_tb, how='inner', on= ['ticker1','ticker2', 'target_date'] )

In [None]:
combined.to_csv('Data/tech_sector_pairs.csv', index=False)

In [None]:
combined

# Get sampled combs

In [None]:
124750

In [None]:
(0.013090133666992188 * 124750)/60

In [None]:
recorded_info_tb, features_tb, labels_tb = generate_training_data(
        data=data,
        training_len=500,
        test_len=120,
        sample_size_per_pair=10
    )

In [None]:
combined = pd.merge(recorded_info_tb, features_tb, how='inner', on= ['ticker1','ticker2', 'target_date'])
combined = pd.merge(combined,labels_tb, how='inner', on= ['ticker1','ticker2', 'target_date'] )
combined.to_csv('Data/sampled_sector_pairs_200.csv', index=False)

# Examining

In [4]:
combined = pd.read_csv('Data/sampled_sector_pairs_200.csv')

In [5]:
combined

Unnamed: 0,ticker1,ticker2,target_date,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,same_sector_flag,same_sub_industry_flag,cos_sim,corr_coef,abs_spread_normed_max,abs_spread_normed_90th,abs_spread_normed_75th,abs_spread_normed_median,abs_spread_normed_l7_avg,abs_spread_normed_l14_avg,total_pnl,total_pnl_l28_mean_std
0,AOS,ACN,2023-05-01,240.776680,29.447227,210.128214,7.873468,False,False,0.996553,0.767593,3.051333,1.554588,1.101518,0.774493,1.154835,1.014099,0.000000,-0.023657
1,AOS,ACN,2023-05-22,240.485420,29.770582,208.078574,7.256505,False,False,0.996459,0.761829,3.027975,1.552428,1.119296,0.763016,0.921849,1.156237,0.000000,-0.010565
2,AOS,ACN,2023-03-30,241.075400,29.184488,196.992144,8.863890,False,False,0.996636,0.772436,3.068568,1.558348,1.100849,0.760678,1.347036,1.638972,0.000000,-0.114807
3,AOS,ACN,2021-03-12,158.070800,24.359007,194.141785,4.011568,False,False,0.995308,0.712080,2.156870,1.598808,1.336947,0.791321,1.342328,1.426427,0.075028,0.075028
4,AOS,ACN,2022-06-24,228.286960,40.043574,229.798572,9.412906,False,False,0.997186,0.888413,2.555792,1.494871,1.221832,0.832344,0.067493,0.070107,0.000000,-0.000349
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207671,ZBH,ZION,2023-02-09,73.502347,19.940661,74.520357,2.113730,False,False,0.983694,-0.015494,2.314429,1.558013,1.195646,0.820050,0.073243,0.027249,-0.095009,-0.065728
207672,ZBH,ZION,2021-06-29,95.221879,11.447629,102.160617,3.942420,False,False,0.990127,0.795919,3.879434,1.472769,1.050586,0.689339,0.932763,0.646860,0.000000,0.000000
207673,ZBH,ZION,2022-07-22,84.696412,21.988832,54.221071,2.233056,False,False,0.965485,-0.182229,2.179695,1.432994,1.158252,0.923377,1.407434,1.389958,0.000000,-0.000384
207674,ZBH,ZION,2022-09-21,80.836752,22.686565,54.791430,2.836686,False,False,0.970473,-0.250925,1.942528,1.296551,1.164562,0.997263,1.050195,1.159480,0.000000,-0.268214


In [6]:
combined = combined.drop_duplicates().reset_index(drop=True)

In [7]:
combined.shape

(197566, 19)

In [8]:
combined.head()

Unnamed: 0,ticker1,ticker2,target_date,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,same_sector_flag,same_sub_industry_flag,cos_sim,corr_coef,abs_spread_normed_max,abs_spread_normed_90th,abs_spread_normed_75th,abs_spread_normed_median,abs_spread_normed_l7_avg,abs_spread_normed_l14_avg,total_pnl,total_pnl_l28_mean_std
0,AOS,ACN,2023-05-01,240.77668,29.447227,210.128214,7.873468,False,False,0.996553,0.767593,3.051333,1.554588,1.101518,0.774493,1.154835,1.014099,0.0,-0.023657
1,AOS,ACN,2023-05-22,240.48542,29.770582,208.078574,7.256505,False,False,0.996459,0.761829,3.027975,1.552428,1.119296,0.763016,0.921849,1.156237,0.0,-0.010565
2,AOS,ACN,2023-03-30,241.0754,29.184488,196.992144,8.86389,False,False,0.996636,0.772436,3.068568,1.558348,1.100849,0.760678,1.347036,1.638972,0.0,-0.114807
3,AOS,ACN,2021-03-12,158.0708,24.359007,194.141785,4.011568,False,False,0.995308,0.71208,2.15687,1.598808,1.336947,0.791321,1.342328,1.426427,0.075028,0.075028
4,AOS,ACN,2022-06-24,228.28696,40.043574,229.798572,9.412906,False,False,0.997186,0.888413,2.555792,1.494871,1.221832,0.832344,0.067493,0.070107,0.0,-0.000349


In [9]:
combined.shape

(197566, 19)

In [10]:
random.choices()

TypeError: Random.choices() missing 1 required positional argument: 'population'

In [11]:
combined[['ticker1','ticker2']].groupby(['ticker1','ticker2']).size().sort_values()

ticker1  ticker2
AOS      NEM         8
MOS      PNC         8
LYB      WRK         8
MCD      WHR         8
HSIC     TSCO        8
                    ..
CL       RMD        10
         RF         10
         REGN       10
         PYPL       10
ZBH      ZION       10
Length: 19900, dtype: int64

In [12]:
combined[(combined.ticker1=='COR')&(combined.ticker2=='META')].drop_duplicates()

Unnamed: 0,ticker1,ticker2,target_date,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,same_sector_flag,same_sub_industry_flag,cos_sim,corr_coef,abs_spread_normed_max,abs_spread_normed_90th,abs_spread_normed_75th,abs_spread_normed_median,abs_spread_normed_l7_avg,abs_spread_normed_l14_avg,total_pnl,total_pnl_l28_mean_std
65819,COR,META,2022-03-04,173.31058,46.65503,102.563214,45.747405,False,False,0.989441,0.64449,2.482274,1.784772,1.107604,0.688886,2.337105,2.257448,0.0,0.0
65820,COR,META,2021-01-04,119.80472,32.441846,176.367141,4.525372,False,False,0.993131,0.849093,2.664623,1.664988,1.250614,0.634828,1.687445,1.716487,0.042613,0.111987
65821,COR,META,2021-11-16,166.09522,45.924089,207.677501,7.772153,False,False,0.994149,0.915048,2.311972,1.684437,1.18876,0.780963,0.985892,0.882134,0.0,0.022934
65822,COR,META,2023-01-20,124.573541,86.944173,42.296786,9.199986,False,False,0.900391,-0.800255,1.570737,1.326661,1.181661,0.967041,1.091398,1.037373,0.0,-0.078859
65823,COR,META,2021-07-15,144.0803,39.873358,225.6225,10.153602,False,False,0.994088,0.906021,2.428181,1.611195,1.138727,0.800535,2.294077,2.278779,-0.091278,0.04286
65824,COR,META,2022-07-22,159.41016,67.52999,23.92,7.186597,False,False,0.955883,-0.423326,2.20021,1.723711,1.27166,0.691394,1.921561,1.953346,0.0,0.150726
65825,COR,META,2021-09-28,158.67022,46.829999,245.547145,10.713675,False,False,0.993297,0.920619,2.188123,1.691582,1.223419,0.729767,1.544823,1.773157,0.0,0.0
65826,COR,META,2022-03-24,172.69814,48.083886,59.894288,11.127205,False,False,0.98467,0.410404,2.749739,1.72144,1.07794,0.670703,2.407123,2.514332,0.0,0.051583


In [13]:
combined.total_pnl.describe()

count    197566.000000
mean          0.001262
std           0.077982
min          -0.781776
25%          -0.003163
50%           0.000000
75%           0.010343
max           0.765085
Name: total_pnl, dtype: float64

In [14]:
combined[combined.total_pnl>0.7]

Unnamed: 0,ticker1,ticker2,target_date,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,same_sector_flag,same_sub_industry_flag,cos_sim,corr_coef,abs_spread_normed_max,abs_spread_normed_90th,abs_spread_normed_75th,abs_spread_normed_median,abs_spread_normed_l7_avg,abs_spread_normed_l14_avg,total_pnl,total_pnl_l28_mean_std
175506,META,WAT,2022-11-07,64.54322,61.537267,161.779644,27.712445,False,False,0.96849,0.431315,2.43327,1.465856,1.03471,0.877192,2.327155,1.892471,0.765085,0.420658
178882,MOS,ODFL,2021-12-02,187.216527,53.344449,309.870003,13.313581,False,False,0.984149,0.870763,2.574278,1.534646,1.256241,0.691543,2.47396,2.473371,0.711223,0.0


In [None]:
ticker1 = 'AOS'
ticker2 = 'AMD'
# Get a list of unique dates for later use
all_dates = data['Date'].unique()
vec1_full = data['Close'][data.Ticker==ticker1].values
vec2_full = data['Close'][data.Ticker==ticker2].values

In [None]:
idx = np.where(all_dates=='2021-11-23')[0][0]

In [None]:
plt.plot(vec1_full, label='Stock1', color='green')
plt.plot(vec2_full, label='Stock2', color='blue')
# plt.plot(abs(vec1_full - vec2_full), label='Absolute Spread', color='grey')
plt.axvline(x=idx, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=idx-500, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=idx+120, color='red', linestyle='--', label='Vertical Line')

In [None]:
trade = ExecutePairTrading(
    abs_spread_mean=21.923220,
    abs_spread_std=15.530315
).execute(
vec1=vec1_full[idx:(idx+120)],
    vec2=vec2_full[idx:(idx+120)]
)

In [None]:
trade.trade_execution_table

In [None]:
plt.plot(vec1_full[idx:(idx+120)], label='Stock1', color='green')
plt.plot(vec2_full[idx:(idx+120)], label='Stock2', color='blue')
plt.axvline(x=1, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=106, color='red', linestyle='--', label='Vertical Line')

In [None]:
trade.final_pl_pct

In [None]:
trade.final_pl