# Implementing a Statistical Arbitrage Strategy

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import sklearn
import numpy as np
from Utility import Utility
from TradingStrategy import TradingStrategy
from sklearn.linear_model import LinearRegression

Read in and clean dataset

In [2]:
# Read in prices and largest-cap tokens data
tokens_price = pd.read_csv('coin_all_prices_full.csv')
tokens_largest_cap = pd.read_csv('coin_universe_150K_40.csv')

# Convert the column 'startTime' to datetime object
tokens_price['startTime'] = pd.to_datetime(tokens_price['startTime'])
tokens_largest_cap['startTime'] = pd.to_datetime(tokens_largest_cap['startTime'])

# Set the 'startTime' column as the index
tokens_price.set_index('startTime', inplace=True)
tokens_largest_cap.set_index('startTime', inplace=True)


Pick a start time and window

In [3]:
# Pick a start time any time around 2021
start_time = datetime.strptime('2021-03-08 05:00:00+00:00', '%Y-%m-%d %H:%M:%S%z')
M = 240
tokens_price_window = Utility.select_window(tokens_price, M, start_time)
tokens_largest_cap_window = Utility.select_window(tokens_largest_cap, M, start_time)

Locate common tokens

In [4]:
common_tokens = Utility.find_tokens(tokens_largest_cap_window, M)
common_tokens_prices = Utility.get_common_token_prices(tokens_price_window, common_tokens)
hourly_returns = Utility.get_hourly_returns(common_tokens_prices)
hourly_returns

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_token_prices.fillna(method='ffill', axis=0, inplace=True)


Unnamed: 0_level_0,BTC,BNB,FTM,CEL,SUSHI,GRT,FTT,SNX,MATIC,SOL,...,RAY,ALPHA,SRM,ASD,HT,TRX,1INCH,LINA,CHZ,BAND
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-26 06:00:00+00:00,-0.032202,-0.061446,-0.098065,-0.020747,-0.046535,-0.068215,-0.032727,-0.046913,-0.067735,-0.038502,...,-0.042539,-0.059235,-0.055992,-0.003538,-0.070401,-0.037666,-0.083863,-0.031783,-0.050269,-0.045736
2021-02-26 07:00:00+00:00,0.012725,0.037147,0.066102,0.006685,0.012045,0.023371,0.006921,0.010550,0.041926,0.006090,...,0.007544,0.022836,0.016466,-0.001287,0.036610,0.010399,0.046351,-0.021371,0.015450,0.019934
2021-02-26 08:00:00+00:00,-0.024848,-0.030348,-0.075993,-0.016244,-0.019732,-0.033878,-0.036673,-0.030524,-0.022286,-0.019993,...,-0.074679,-0.041120,-0.019857,0.001303,-0.030280,-0.028163,-0.025643,-0.059544,-0.049991,-0.034500
2021-02-26 09:00:00+00:00,0.017625,0.028507,0.054953,0.012566,0.019185,0.020351,0.021217,0.045150,0.036077,0.014599,...,0.013723,0.021651,0.025413,0.002470,0.025327,0.026418,0.054672,0.017624,0.065237,0.038550
2021-02-26 10:00:00+00:00,0.015243,0.044196,0.030829,0.001641,0.031757,0.027923,0.018658,0.028361,0.044042,0.016787,...,0.012158,0.023774,0.036915,-0.002154,0.012757,0.019389,0.098529,0.014036,0.057340,0.030424
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08 01:00:00+00:00,0.010000,-0.003230,0.018427,0.000900,-0.008205,0.004625,0.005714,-0.000115,0.009288,-0.005596,...,0.002320,-0.009432,-0.016276,-0.003087,-0.000963,-0.000048,0.017573,0.001938,0.054711,-0.000074
2021-03-08 02:00:00+00:00,0.000971,-0.006623,-0.006840,-0.000500,-0.007638,-0.009684,0.000484,0.004377,0.002993,0.001815,...,0.010414,0.003283,-0.008104,0.004807,-0.007002,-0.001931,-0.016581,-0.000903,0.003181,-0.007448
2021-03-08 03:00:00+00:00,-0.016078,-0.008002,-0.008887,0.003100,-0.016385,-0.021054,-0.014680,-0.019957,-0.020083,-0.018844,...,0.039216,-0.025260,-0.020085,-0.033630,-0.014781,-0.012332,-0.019837,-0.003227,-0.014872,-0.021312
2021-03-08 04:00:00+00:00,-0.002168,0.004731,-0.004707,0.005284,-0.007981,0.018095,-0.000295,0.006998,0.001715,-0.006279,...,0.031639,0.007875,0.014765,0.000400,0.004432,0.000539,0.003292,-0.017805,0.030477,0.013648


In [5]:
correlation_matrix =  Utility.calculate_correlation_matrix(hourly_returns)
correlation_matrix

Unnamed: 0,BTC,BNB,FTM,CEL,SUSHI,GRT,FTT,SNX,MATIC,SOL,...,RAY,ALPHA,SRM,ASD,HT,TRX,1INCH,LINA,CHZ,BAND
BTC,1.0,0.829674,0.607437,0.240938,0.679009,0.780632,0.870192,0.758822,0.612929,0.665413,...,0.532292,0.702203,0.737616,0.199912,0.685276,0.811458,0.803021,0.214696,0.323419,0.833677
BNB,0.829674,1.0,0.579687,0.246167,0.649066,0.720765,0.79069,0.707294,0.561019,0.659023,...,0.487405,0.687955,0.707957,0.145374,0.676995,0.735798,0.818948,0.178052,0.323232,0.76926
FTM,0.607437,0.579687,1.0,0.14841,0.52198,0.576091,0.586086,0.516621,0.442986,0.534907,...,0.355679,0.488019,0.55452,0.001797,0.476303,0.507795,0.557955,0.135451,0.284183,0.547259
CEL,0.240938,0.246167,0.14841,1.0,0.142522,0.191831,0.296714,0.186419,0.216299,0.164779,...,0.259032,0.177902,0.212249,0.045433,0.199479,0.206012,0.208689,0.128284,0.16896,0.250803
SUSHI,0.679009,0.649066,0.52198,0.142522,1.0,0.60538,0.622109,0.709639,0.430059,0.563583,...,0.43443,0.619315,0.660795,0.060733,0.553066,0.635138,0.704051,0.350086,0.193916,0.712016
GRT,0.780632,0.720765,0.576091,0.191831,0.60538,1.0,0.697043,0.643234,0.566137,0.575542,...,0.443398,0.659164,0.67377,0.118218,0.609963,0.700781,0.696465,0.160899,0.348154,0.72847
FTT,0.870192,0.79069,0.586086,0.296714,0.622109,0.697043,1.0,0.72944,0.494447,0.653922,...,0.509861,0.666608,0.721573,0.230936,0.652955,0.704996,0.734757,0.211563,0.31727,0.757915
SNX,0.758822,0.707294,0.516621,0.186419,0.709639,0.643234,0.72944,1.0,0.49094,0.597314,...,0.469459,0.660901,0.698748,0.233032,0.671753,0.697032,0.7319,0.303572,0.239722,0.738034
MATIC,0.612929,0.561019,0.442986,0.216299,0.430059,0.566137,0.494447,0.49094,1.0,0.48903,...,0.316671,0.460765,0.49156,0.096228,0.440152,0.578107,0.593196,0.023523,0.248396,0.576548
SOL,0.665413,0.659023,0.534907,0.164779,0.563583,0.575542,0.653922,0.597314,0.48903,1.0,...,0.553487,0.583774,0.715563,0.153138,0.519105,0.556595,0.6542,0.042876,0.17924,0.569373


In [6]:
eigenvectors, eigenvalues = Utility.principal_component_analysis(correlation_matrix)
eigenportfolios = Utility.compute_eigenportfolios(eigenvectors, hourly_returns, start_time)
eigenportfolios

Unnamed: 0,time,common_tokens,eigenportfolio1,eigenportfolio2
0,2021-03-08 05:00:00+00:00,BTC,-22.319786,-4.954129
1,2021-03-08 05:00:00+00:00,BNB,-13.406679,-2.852916
2,2021-03-08 05:00:00+00:00,FTM,-5.306828,1.042162
3,2021-03-08 05:00:00+00:00,CEL,2.060375,3.016732
4,2021-03-08 05:00:00+00:00,SUSHI,-8.29243,10.829239
5,2021-03-08 05:00:00+00:00,GRT,-10.302129,-1.932823
6,2021-03-08 05:00:00+00:00,FTT,-16.631442,-4.479839
7,2021-03-08 05:00:00+00:00,SNX,-11.461108,3.20527
8,2021-03-08 05:00:00+00:00,MATIC,-5.836792,-8.01491
9,2021-03-08 05:00:00+00:00,SOL,-9.886581,-10.335961


Unnamed: 0_level_0,BTC,BNB,FTM,CEL,SUSHI,GRT,FTT,SNX,MATIC,SOL,...,RAY,ALPHA,SRM,ASD,HT,TRX,1INCH,LINA,CHZ,BAND
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-26 06:00:00+00:00,-0.032202,-0.061446,-0.098065,-0.020747,-0.046535,-0.068215,-0.032727,-0.046913,-0.067735,-0.038502,...,-0.042539,-0.059235,-0.055992,-0.003538,-0.070401,-0.037666,-0.083863,-0.031783,-0.050269,-0.045736
2021-02-26 07:00:00+00:00,0.012725,0.037147,0.066102,0.006685,0.012045,0.023371,0.006921,0.010550,0.041926,0.006090,...,0.007544,0.022836,0.016466,-0.001287,0.036610,0.010399,0.046351,-0.021371,0.015450,0.019934
2021-02-26 08:00:00+00:00,-0.024848,-0.030348,-0.075993,-0.016244,-0.019732,-0.033878,-0.036673,-0.030524,-0.022286,-0.019993,...,-0.074679,-0.041120,-0.019857,0.001303,-0.030280,-0.028163,-0.025643,-0.059544,-0.049991,-0.034500
2021-02-26 09:00:00+00:00,0.017625,0.028507,0.054953,0.012566,0.019185,0.020351,0.021217,0.045150,0.036077,0.014599,...,0.013723,0.021651,0.025413,0.002470,0.025327,0.026418,0.054672,0.017624,0.065237,0.038550
2021-02-26 10:00:00+00:00,0.015243,0.044196,0.030829,0.001641,0.031757,0.027923,0.018658,0.028361,0.044042,0.016787,...,0.012158,0.023774,0.036915,-0.002154,0.012757,0.019389,0.098529,0.014036,0.057340,0.030424
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08 01:00:00+00:00,0.010000,-0.003230,0.018427,0.000900,-0.008205,0.004625,0.005714,-0.000115,0.009288,-0.005596,...,0.002320,-0.009432,-0.016276,-0.003087,-0.000963,-0.000048,0.017573,0.001938,0.054711,-0.000074
2021-03-08 02:00:00+00:00,0.000971,-0.006623,-0.006840,-0.000500,-0.007638,-0.009684,0.000484,0.004377,0.002993,0.001815,...,0.010414,0.003283,-0.008104,0.004807,-0.007002,-0.001931,-0.016581,-0.000903,0.003181,-0.007448
2021-03-08 03:00:00+00:00,-0.016078,-0.008002,-0.008887,0.003100,-0.016385,-0.021054,-0.014680,-0.019957,-0.020083,-0.018844,...,0.039216,-0.025260,-0.020085,-0.033630,-0.014781,-0.012332,-0.019837,-0.003227,-0.014872,-0.021312
2021-03-08 04:00:00+00:00,-0.002168,0.004731,-0.004707,0.005284,-0.007981,0.018095,-0.000295,0.006998,0.001715,-0.006279,...,0.031639,0.007875,0.014765,0.000400,0.004432,0.000539,0.003292,-0.017805,0.030477,0.013648


### Compute factor returns of the two risk factors at time t

In [13]:
principal_components, explained_variance, eigenportfolios = Utility.principal_component_analysis(hourly_returns, n_components=2)
display(eigenportfolios)
factors_return = Utility.calculate_factor_returns(hourly_returns, eigenportfolios)
factors_return

Unnamed: 0,PC1,PC2
BTC,-0.108642,-0.022379
BNB,-0.16441,-0.044495
FTM,-0.255386,-0.091723
CEL,-0.05238,0.027331
SUSHI,-0.216376,0.076542
GRT,-0.185573,-0.0556
FTT,-0.119639,-0.019521
SNX,-0.167875,0.019537
MATIC,-0.199834,-0.167347
SOL,-0.161556,-0.102451


Unnamed: 0_level_0,PC1,PC2
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-02-26 06:00:00+00:00,0.291025,0.017185
2021-02-26 07:00:00+00:00,-0.117222,-0.044933
2021-02-26 08:00:00+00:00,0.180234,-0.036103
2021-02-26 09:00:00+00:00,-0.160683,-0.005762
2021-02-26 10:00:00+00:00,-0.174868,-0.016454
...,...,...
2021-03-08 01:00:00+00:00,-0.015304,-0.001324
2021-03-08 02:00:00+00:00,0.019402,0.002850
2021-03-08 03:00:00+00:00,0.064800,0.016144
2021-03-08 04:00:00+00:00,-0.017694,-0.010995


In [8]:
# Call the function and get the DataFrames
regression_coeffs_df, residuals_df = Utility.estimate_residual_return(hourly_returns, factors_return)
display(regression_coeffs_df)
display(residuals_df)

Unnamed: 0,beta_0,beta_1,beta_2
BTC,-0.000121,-0.108642,-0.022379
BNB,-0.000404,-0.16441,-0.044495
FTM,-0.00191,-0.255386,-0.091723
CEL,-5.7e-05,-0.05238,0.027331
SUSHI,8.9e-05,-0.216376,0.076542
GRT,-0.000334,-0.185573,-0.0556
FTT,-0.000102,-0.119639,-0.019521
SNX,6.1e-05,-0.167875,0.019537
MATIC,0.00037,-0.199834,-0.167347
SOL,-0.000666,-0.161556,-0.102451


Unnamed: 0_level_0,BTC,BNB,FTM,CEL,SUSHI,GRT,FTT,SNX,MATIC,SOL,...,RAY,ALPHA,SRM,ASD,HT,TRX,1INCH,LINA,CHZ,BAND
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-26 06:00:00+00:00,-0.000079,-0.012430,-0.020256,-0.005916,0.015031,-0.012919,0.002529,0.001546,-0.007072,0.010941,...,0.021529,0.010270,0.012535,0.010931,-0.020038,-0.000840,-0.018637,-0.005220,-0.004662,0.009224
2021-02-26 07:00:00+00:00,-0.000894,0.016279,0.033954,0.001831,-0.009969,-0.000547,-0.007878,-0.008313,0.010612,-0.016786,...,-0.014230,-0.007538,-0.011062,-0.009780,0.017417,-0.005331,0.020812,0.004402,-0.007597,-0.000261
2021-02-26 08:00:00+00:00,-0.005954,-0.001918,-0.031366,-0.005760,0.021941,-0.002105,-0.015712,0.000377,0.007319,0.006092,...,-0.030980,-0.000630,0.021636,0.007606,0.000961,-0.006418,0.014019,0.001445,-0.022138,0.001506
2021-02-26 09:00:00+00:00,0.000161,0.002237,0.015298,0.004365,-0.015231,-0.009454,0.001983,0.018226,0.002633,-0.011285,...,-0.020869,-0.016956,-0.011127,-0.006036,-0.000963,0.006122,0.020642,-0.000221,0.033779,0.008542
2021-02-26 10:00:00+00:00,-0.004002,0.015118,-0.013429,-0.007011,-0.004910,-0.005109,-0.002482,-0.000735,0.005974,-0.012484,...,-0.024730,-0.018751,-0.003188,-0.011912,-0.016007,-0.002926,0.061127,0.004278,0.023702,-0.001858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08 01:00:00+00:00,0.008429,-0.005401,0.016307,0.000192,-0.011504,0.002046,0.003959,-0.002720,0.005639,-0.007538,...,-0.000282,-0.013399,-0.019079,-0.004356,-0.002602,-0.002026,0.015412,0.001188,0.048128,-0.002619
2021-03-08 02:00:00+00:00,0.003264,-0.002902,0.000286,0.000496,-0.003748,-0.005591,0.002963,0.007518,0.006977,0.005908,...,0.015198,0.007750,-0.002771,0.005432,-0.002735,0.000534,-0.011053,-0.000534,0.002461,-0.003560
2021-03-08 03:00:00+00:00,-0.008556,0.003775,0.011052,0.006110,-0.003689,-0.007797,-0.006510,-0.009455,-0.004802,-0.006055,...,0.053033,-0.009352,-0.003901,-0.030113,-0.002737,-0.003879,-0.004053,-0.008843,-0.008112,-0.009318
2021-03-08 04:00:00+00:00,-0.004215,0.001737,-0.008325,0.004715,-0.011057,0.014534,-0.002524,0.004182,-0.004031,-0.009598,...,0.029282,0.002836,0.011153,-0.001497,0.002323,-0.001956,0.000362,-0.009710,0.023716,0.011032


In [9]:
ou_parameters = Utility.calculate_ou_parameters(hourly_returns)
ou_parameters = ou_parameters.dropna()

  sigma_eq = np.sqrt(residuals_variance / (1 - b**2))


### Compute s-score and Generate trading signals at time t

In [11]:
strategy = TradingStrategy(time=start_time)
s_score = strategy.calculate_s_score(ou_parameters)
signals = strategy.generate_signals(s_score)
signals

Unnamed: 0,time,Token,trading_signal,s_score
0,2021-03-08 05:00:00+00:00,BTC,hold,-1.166492
1,2021-03-08 05:00:00+00:00,BNB,hold,-0.913787
2,2021-03-08 05:00:00+00:00,FTM,buy_open,1.488416
3,2021-03-08 05:00:00+00:00,CEL,hold,1.11055
4,2021-03-08 05:00:00+00:00,SUSHI,sell_open,-2.923935
5,2021-03-08 05:00:00+00:00,GRT,hold,-1.106941
6,2021-03-08 05:00:00+00:00,FTT,hold,-1.204145
7,2021-03-08 05:00:00+00:00,SNX,sell_open,-2.362183
8,2021-03-08 05:00:00+00:00,MATIC,sell_open,-4.093712
9,2021-03-08 05:00:00+00:00,SOL,hold,0.176414


In [None]:

# Example usage:
# strategy = TradingStrategy()
# s_scores = {'Token1': 1.5, 'Token2': -1.7}  # Example s-scores
# signals = strategy.generate_signals(s_scores)
# tstart = '2021-01-01 00:00:00'
# tend = '2021-01-31 23:00:00'
# hourly_returns = pd.DataFrame()  # Your hourly returns DataFrame here
# performance, sharpe_ratio, max_drawdown = strategy.evaluate_performance(hourly_returns, signals, tstart, tend)