In [1]:
import matplotlib.pyplot as plt   
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from tqdm import tqdm
import warnings

#ignore by message
warnings.filterwarnings("ignore", message="Maximum number of iterations")
warnings.filterwarnings("ignore", message="divide by zero encountered")
warnings.filterwarnings("ignore", message="invalid value encountered in matmul")

In [2]:
sp500_hist = pd.read_csv(
    "./data/sp500_hist_prices.csv",
    parse_dates=['Date'],
    index_col='Date'
)

sp500_hist = sp500_hist.loc['2014-01-01':,]
sp500_hist.dropna(axis=1, inplace=True)


print(f'Shape: {sp500_hist.shape}')
sp500_hist.head()

Shape: (2266, 470)


Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,37.141602,23.907921,101.185295,17.388981,35.240082,60.504734,31.847378,19.123333,68.658348,59.290001,...,169.137527,20.609207,66.582367,44.643036,30.301586,45.261662,83.170258,53.18,24.822664,30.329689
2014-01-03,37.610752,25.020359,104.080498,17.007023,35.457024,60.548027,32.188923,18.756666,68.886833,59.16,...,167.479828,20.601709,66.422142,44.857124,30.576576,45.544949,83.530914,53.580002,24.998468,30.039131
2014-01-06,37.425739,25.482304,103.084709,17.09976,34.16214,60.331593,32.613773,18.58,68.159042,58.119999,...,167.360214,20.489336,66.522278,44.587177,30.52335,45.508789,84.071892,53.400002,24.822664,29.973518
2014-01-07,37.960957,25.369169,104.357124,16.977465,34.229923,60.989532,32.363853,18.586666,68.988396,58.970001,...,172.188049,20.841444,67.463448,45.276005,30.612062,46.147724,85.749016,53.950001,24.898008,30.085989
2014-01-08,38.582062,26.047947,103.545738,17.08498,34.141792,61.586887,32.655426,18.476667,69.521561,58.900002,...,175.417999,20.759035,67.243179,45.359783,30.594316,46.129646,87.849907,53.91,25.115677,29.748577


In [3]:
TRAIN_START_DATE, TRAIN_END_DATE = "2014-01-01", "2019-01-01"
VAL_START_DATE, VAL_END_DATE = TRAIN_END_DATE, "2021-01-01"
TEST_START_DATE, TEST_END_DATE = VAL_END_DATE, "2022-12-30"

df_train = sp500_hist.loc[TRAIN_START_DATE:TRAIN_END_DATE,:]
df_val = sp500_hist.loc[VAL_START_DATE:VAL_END_DATE,:]
df_test = sp500_hist.loc[TEST_START_DATE:TEST_END_DATE,:]

print(f"Train shape: {df_train.shape}")
print(f"Validation shape: {df_val.shape}")
print(f"Test shape: {df_test.shape}")

assert df_train.shape[1] == df_val.shape[1] and df_val.shape[1] == df_test.shape[1]


Train shape: (1258, 470)
Validation shape: (505, 470)
Test shape: (503, 470)


In [4]:
df_train_pct = df_train.pct_change()
df_train_pct.dropna(axis=0, inplace=True)
df_train_pct.replace(0.0, 1e-20, inplace=True)
df_train_pct

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-03,0.012631,0.046530,0.028613,-0.021966,0.006156,0.000716,0.010724,-0.019174,0.003328,-0.002193,...,-0.009801,-0.000364,-0.002406,0.004796,0.009075,0.006259,0.004336,0.007522,0.007082,-0.009580
2014-01-06,-0.004919,0.018463,-0.009567,0.005453,-0.036520,-0.003575,0.013199,-0.009419,-0.010565,-0.017579,...,-0.000714,-0.005455,0.001508,-0.006018,-0.001741,-0.000794,0.006476,-0.003359,-0.007033,-0.002184
2014-01-07,0.014301,-0.004440,0.012343,-0.007152,0.001984,0.010905,-0.007663,0.000359,0.012168,0.014625,...,0.028847,0.017185,0.014148,0.015449,0.002906,0.014040,0.019949,0.010300,0.003035,0.003752
2014-01-08,0.016362,0.026756,-0.007775,0.006333,-0.002575,0.009794,0.009009,-0.005918,0.007728,-0.001187,...,0.018758,-0.003954,-0.003265,0.001850,-0.000580,-0.000392,0.024500,-0.000741,0.008742,-0.011215
2014-01-09,0.000343,0.064785,0.011131,-0.012770,0.017077,0.003374,0.001786,0.009020,0.009738,0.003226,...,-0.002533,0.003970,-0.009728,0.003694,0.004059,-0.019339,-0.009853,0.004637,0.007333,0.006931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-24,-0.009796,-0.034438,-0.016445,-0.025874,-0.008950,-0.016949,-0.025420,-0.029354,-0.025729,-0.017433,...,-0.014988,-0.045187,-0.038315,-0.005330,-0.032472,-0.012707,-0.021634,-0.022906,-0.014081,-0.029383
2018-12-26,0.045795,0.086474,0.041029,0.070422,0.057985,0.032222,0.061928,0.045161,0.039949,0.086713,...,0.078470,0.019753,0.047779,0.048787,0.047084,0.034130,0.049265,0.067320,0.051935,0.045913
2018-12-27,0.014342,-0.007742,-0.002193,-0.006490,0.009771,0.006709,0.014507,0.023920,0.010071,0.009823,...,-0.011186,0.008540,0.004371,0.000485,0.016804,0.010779,-0.000688,0.018555,-0.001234,0.017004
2018-12-28,-0.005368,-0.006554,0.004978,0.000512,0.013458,0.005848,0.006513,-0.005652,-0.004202,-0.008928,...,-0.008969,-0.007056,-0.011169,-0.000539,-0.000765,0.007146,0.009242,0.003991,-0.000741,0.001897


In [5]:
assets = df_train_pct.columns.to_list()
n_assets = len(assets)
q = 0.1
df_corr = pd.DataFrame(
    data=np.zeros(shape=(n_assets, n_assets)),
    index=assets,
    columns=assets)
parameters = []

for i in tqdm(assets):
    for j in assets:
        # beta_1.2
        mod_12 = smf.quantreg(f'{j} ~ {i}', df_train_pct) 
        res_12 = mod_12.fit(q=q) 
        beta_12 = res_12.params.loc[i]
        
        # beta_2.1
        mod_21 = smf.quantreg(f'{i} ~ {j}', df_train_pct) 
        res_21 = mod_21.fit(q=q) 
        beta_21 = res_21.params.loc[j]

        # tau quantile correlation
        rho = np.sign(beta_21) * np.sqrt(beta_21 * beta_12) if beta_21 * beta_12 >= 0 else 0
        
        # save values
        parameters.append((i, j, beta_12, beta_21, np.sign(beta_21), rho))
        df_corr.at[i, j] = rho

  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1-u**2) * np.where(np.abs(u) <= 1, 1, 0)
  fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))
  kernels['epa'] = lambda u: 3. / 4 * (1

In [6]:
# Save correlation matrix
df_corr.round(10).to_csv(f"./data/correlation_matrix_q{str(q).replace('.', '')}.csv")

In [7]:
df_corr.isnull().any().any()

False

In [8]:
df_corr.max().max()

1.0000000000000033

In [9]:
df_corr.min().min()

-0.11279471500161707

In [10]:
df_corr.mean().mean()

0.33380787305447596

In [11]:
df_corr

Unnamed: 0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
A,1.000000,0.442407,0.290852,0.482742,0.466217,0.354084,0.608911,0.389624,0.590267,0.557184,...,0.340388,0.120108,0.451713,0.501018,0.542209,0.469523,0.544049,0.453144,0.495837,0.504091
AAL,0.442407,1.000000,0.276891,0.305480,0.312601,0.327899,0.433957,0.333718,0.414869,0.410106,...,0.238798,0.043975,0.219912,0.374898,0.406696,0.329421,0.350584,0.350868,0.389132,0.335027
AAP,0.290852,0.276891,1.000000,0.237547,0.224675,0.225011,0.313222,0.163076,0.275250,0.214045,...,0.119680,0.115138,0.220131,0.243206,0.250646,0.236228,0.242159,0.220252,0.300744,0.242389
AAPL,0.482742,0.305480,0.237547,1.000000,0.255802,0.269966,0.457019,0.270024,0.496182,0.454899,...,0.233425,0.128124,0.369967,0.355540,0.380421,0.330405,0.370461,0.344012,0.363045,0.391840
ABBV,0.466217,0.312601,0.224675,0.255802,1.000000,0.406099,0.513930,0.204487,0.369500,0.355155,...,0.252647,0.124428,0.324202,0.363873,0.338884,0.296838,0.429252,0.264158,0.341856,0.442376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YUM,0.469523,0.329421,0.236228,0.330405,0.296838,0.243412,0.454263,0.358986,0.448422,0.421461,...,0.285874,0.241313,0.394253,0.321847,0.385334,1.000000,0.372222,0.288380,0.379268,0.391499
ZBH,0.544049,0.350584,0.242159,0.370461,0.429252,0.357780,0.582458,0.288539,0.444515,0.434214,...,0.262921,0.176315,0.367484,0.475992,0.436215,0.372222,1.000000,0.302545,0.372711,0.400666
ZBRA,0.453144,0.350868,0.220252,0.344012,0.264158,0.211369,0.390576,0.224146,0.392406,0.374701,...,0.259697,0.000000,0.336394,0.269326,0.374173,0.288380,0.302545,1.000000,0.375047,0.313999
ZION,0.495837,0.389132,0.300744,0.363045,0.341856,0.311061,0.446626,0.392509,0.451023,0.365858,...,0.323511,0.000000,0.436069,0.357941,0.516586,0.379268,0.372711,0.375047,1.000000,0.342212
