# Predictive Regression

## Imports

In [1]:
# <include-predictive_regression/utils.py>

In [103]:
# <imports>
import numpy as np
import pandas as pd
import plotly.io as pio

from predictive_regression import utils

pd.options.plotting.backend = "plotly"
pio.templates.default = "seaborn"
# pio.base_renderers.default = "vscode"

from patsy import dmatrices
import statsmodels.api as sm

## Summary

Requirements:

* box_car and ewm regressions of various window lengths for cds_resid_n ~ equity_resid_n-1

Interesting things to try:

* r-squared overall for equity, cds and cds_resd ~ eq_resid models
* r_squared by date and by ticker for each model
* robust regressions for each model type

* end_to_end model with cds_return_n ~ cds_n-1, equity_n-1, spy_n-1, index_n-1
* transformer model for end to end integrated model

* across all tickers together as oppossed to individually

## Contemporaneous CAPM Model

In [42]:
fig = utils.px.line(utils.np.arange(10))
fig.update_layout()
fig.show()

In [82]:
date_range = pd.date_range("2018-01-03", "2021-04-30", freq="7D")
df_data = utils.get_data(date_range)
df_data.tail()

Unnamed: 0_level_0,series,adj_close,r_equity,r_spread,spread5y,r_index,r_spy
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-04-28,LUV,61.77,-0.004362,-0.017298,0.008339,-0.010192,0.003191
2021-04-28,MAR,149.45,0.031885,-0.008381,0.008758,-0.011003,0.003191
2021-04-28,T,30.96,0.027839,-0.023042,0.006872,-0.00967,0.003191
2021-04-28,WFC,44.983531,0.041673,-0.031137,0.005364,-0.008934,0.003191
2021-04-28,XOM,58.11,0.036986,0.009787,0.00385,-0.012655,0.003191


In [86]:
df_eq_errors, eq_summary = utils.get_errors(df_data)


kurtosistest only valid for n>=20 ... continuing anyway, n=16



In [87]:
df_cds_errors, cds_summary = utils.get_errors(df_data, formula="r_spread ~ r_equity + r_index + 1")


kurtosistest only valid for n>=20 ... continuing anyway, n=16



In [90]:
df_eq_errors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,resid,scale,sresid,model_date,distance
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-05-02,BA,0.064893,0.001752,37.032145,2018-01-10,1
2018-05-02,C,0.015434,0.000382,40.421242,2018-01-10,1
2018-05-02,DD,-0.006366,0.000185,-34.372388,2018-01-10,1
2018-05-02,F,-0.015277,0.000945,-16.16338,2018-01-10,1
2018-05-02,GE,-0.024766,0.001452,-17.056614,2018-01-10,1


In [89]:
df_cds_errors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,resid,scale,sresid,model_date,distance
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-05-02,BA,-0.023965,0.002592,-9.245123,2018-01-10,1
2018-05-02,C,-0.018142,0.001087,-16.69048,2018-01-10,1
2018-05-02,DD,0.032307,0.001503,21.494086,2018-01-10,1
2018-05-02,F,-0.026872,0.001788,-15.03101,2018-01-10,1
2018-05-02,GE,0.024747,0.005124,4.829331,2018-01-10,1


In [108]:
df_resid = pd.concat([df_cds_errors.resid, df_eq_errors.resid.groupby("ticker").shift()], axis=1)
df_resid.columns = ["cds_resid", "eq_resid"]
df_resid = df_resid.unstack("ticker")
df_resid

Unnamed: 0_level_0,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,...,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid
ticker,BA,C,DD,F,GE,JPM,LOW,LUV,MAR,T,...,DD,F,GE,JPM,LOW,LUV,MAR,T,WFC,XOM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-05-02,-0.023965,-0.018142,0.032307,-0.026872,0.024747,0.003363,0.036606,0.031649,0.016281,-0.053479,...,,,,,,,,,,
2018-05-09,-0.021706,0.032670,-0.015516,-0.067377,-0.056519,0.041347,0.038904,0.003282,-0.094782,-0.001285,...,-0.006366,-0.015277,-0.024766,0.020534,-0.037295,0.018750,0.022898,0.090877,0.008267,0.029411
2018-05-16,0.023761,-0.075160,0.037687,-0.018176,0.011207,-0.050720,0.049054,0.027403,0.029787,-0.001424,...,-0.008642,0.011162,-0.045043,-0.032694,0.010633,0.025311,-0.000192,0.020085,-0.017324,-0.012450
2018-05-23,-0.008951,-0.017493,0.008775,0.047571,0.028472,0.005813,-0.040002,0.011268,-0.047954,-0.003406,...,-0.018808,-0.029694,-0.031514,0.010680,-0.016246,-0.034370,-0.006729,-0.035467,-0.017834,-0.024698
2018-05-30,0.028033,-0.053678,0.058067,-0.009378,0.001472,-0.058052,0.052912,0.043470,-0.014984,-0.007946,...,0.018463,-0.001864,0.057496,0.011228,-0.112103,0.047915,0.022857,-0.007326,-0.022409,0.003893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-31,-0.071666,-0.046801,-0.037242,0.028774,0.075175,-0.029784,0.119921,0.019200,-0.029520,-0.026238,...,0.031606,0.038051,0.118387,0.017405,-0.067160,0.033524,0.052065,0.003610,0.027328,0.026805
2021-04-07,-0.013804,0.019338,-0.028963,0.042296,0.012803,0.011651,0.065905,0.028434,-0.009815,0.010864,...,0.010240,0.031722,-0.034214,0.024207,-0.020239,-0.005023,0.010683,-0.002242,0.022377,0.070671
2021-04-14,-0.081772,0.056963,-0.014011,0.012213,0.009154,0.083278,-0.023491,-0.011357,-0.033637,0.003719,...,0.037908,0.001046,-0.002020,0.012084,0.017309,0.020853,0.043188,0.000556,0.025553,0.042207
2021-04-21,-0.008310,-0.012406,-0.006644,0.062043,0.005952,-0.045300,-0.012605,0.013919,-0.013024,-0.029066,...,-0.005106,0.067199,-0.006067,0.046201,-0.022514,0.039469,0.012998,0.008668,-0.022250,0.013852


In [104]:
win_lengths = np.array([3, 6, 10, 30, 60, 90])

In [131]:
col_list = []
for w in win_lengths:
    for s in df_resid.columns.levels[1]:
        df_pair = df_resid.loc[:, [("cds_resid", s), ("eq_resid", s)]]
        df_pair.columns = ["cds_resid", "eq_resid"]
        for win_type, df_win in {
            "exp": df_pair.ewm(alpha=1 / w),
            "boxcar": df_pair.rolling(window=2 * w)
        }.items():
            df_cov = df_win.cov()
            df_mean = df_win.mean()

            s_var = df_cov["eq_resid"].xs("eq_resid", level=1)
            s_var.name = ("var_x", win_type, f"t_{w:02d}", s)

            s_cov = df_cov["eq_resid"].xs("cds_resid", level=1)
            s_cov.name = ("cov_xy", win_type, f"t_{w:02d}", s)

            s_beta_1 = s_cov / s_var
            s_beta_1.name = ("beta_1", win_type, f"t_{w:02d}", s)

            s_beta_0 = s_beta_1 * df_mean.eq_resid - df_mean.cds_resid
            s_beta_0.name = ("beta_0", win_type, f"t_{w:02d}", s)

            col_list.extend([s_beta_0, s_beta_1, s_var, s_cov])

df_betas = pd.concat(col_list, axis=1)
df_betas.columns.names = ["stat", "win_type", "win_length", "ticker"]
df_betas = df_betas.stack("ticker")
df_betas = df_betas.swaplevel("stat", "win_type", axis=1)
df_betas = df_betas.sort_index(axis=1)
df_betas

Unnamed: 0_level_0,win_type,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,...,exp,exp,exp,exp,exp,exp,exp,exp,exp,exp
Unnamed: 0_level_1,stat,beta_0,beta_0,beta_0,beta_0,beta_0,beta_0,beta_1,beta_1,beta_1,beta_1,...,cov_xy,cov_xy,cov_xy,cov_xy,var_x,var_x,var_x,var_x,var_x,var_x
Unnamed: 0_level_2,win_length,t_03,t_06,t_10,t_30,t_60,t_90,t_03,t_06,t_10,t_30,...,t_10,t_30,t_60,t_90,t_03,t_06,t_10,t_30,t_60,t_90
date,ticker,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3
2018-05-16,BA,,,,,,,,,,,...,-0.002427,-0.002427,-0.002427,-0.002427,0.005700,0.005700,0.005700,0.005700,0.005700,0.005700
2018-05-16,C,,,,,,,,,,,...,0.003355,0.003355,0.003355,0.003355,0.001936,0.001936,0.001936,0.001936,0.001936,0.001936
2018-05-16,DD,,,,,,,,,,,...,-0.000061,-0.000061,-0.000061,-0.000061,0.000003,0.000003,0.000003,0.000003,0.000003,0.000003
2018-05-16,F,,,,,,,,,,,...,0.000650,0.000650,0.000650,0.000650,0.000350,0.000350,0.000350,0.000350,0.000350,0.000350
2018-05-16,GE,,,,,,,,,,,...,-0.000687,-0.000687,-0.000687,-0.000687,0.000206,0.000206,0.000206,0.000206,0.000206,0.000206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-04-28,LUV,-0.009048,-0.010328,0.011121,-0.022038,-0.005798,,0.077914,-0.309721,-0.557295,-0.692852,...,-0.000741,-0.002057,-0.002436,-0.002432,0.000945,0.001797,0.002155,0.003108,0.003159,0.003052
2021-04-28,MAR,0.055747,0.011312,0.011773,-0.008494,-0.000809,,0.916310,-0.537071,-0.540226,-0.443525,...,-0.001387,-0.001671,-0.001620,-0.001523,0.001350,0.002622,0.003029,0.003639,0.003507,0.003318
2021-04-28,T,0.000782,0.002669,-0.001508,-0.005985,-0.004105,,-2.569993,0.074105,0.047191,0.132328,...,-0.000017,0.000030,0.000049,0.000054,0.000139,0.000345,0.000546,0.000916,0.000997,0.001004
2021-04-28,WFC,-0.021386,-0.006116,-0.004902,-0.001663,-0.005022,,0.024673,0.541634,0.018514,-0.255580,...,0.000271,-0.000054,-0.000170,-0.000181,0.000631,0.001088,0.001524,0.002136,0.002026,0.001906


In [129]:
df_betas

Unnamed: 0_level_0,beta_0,beta_1,var_x,cov_xy,beta_0,beta_1,var_x,cov_xy,beta_0,beta_1,...,var_x,cov_xy,beta_0,beta_1,var_x,cov_xy,beta_0,beta_1,var_x,cov_xy
Unnamed: 0_level_1,exp,exp,exp,exp,boxcar,boxcar,boxcar,boxcar,exp,exp,...,boxcar,boxcar,exp,exp,exp,exp,boxcar,boxcar,boxcar,boxcar
Unnamed: 0_level_2,t_03,t_03,t_03,t_03,t_03,t_03,t_03,t_03,t_03,t_03,...,t_90,t_90,t_90,t_90,t_90,t_90,t_90,t_90,t_90,t_90
Unnamed: 0_level_3,BA,BA,BA,BA,BA,BA,BA,BA,C,C,...,WFC,WFC,XOM,XOM,XOM,XOM,XOM,XOM,XOM,XOM
date,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
2018-05-02,,,,,,,,,,,...,,,,,,,,,,
2018-05-09,,,,,,,,,,,...,,,,,,,,,,
2018-05-16,0.000292,-0.425834,0.005700,-0.002427,,,,,-0.008845,1.733061,...,,,0.012593,0.944350,0.000876,0.000827,,,,
2018-05-23,0.001415,-0.444178,0.002328,-0.001034,,,,,0.013419,1.356056,...,,,0.003678,0.417597,0.000802,0.000335,,,,
2018-05-30,-0.000529,-0.496496,0.002158,-0.001071,,,,,0.038262,0.507062,...,,,0.003263,0.441160,0.000543,0.000240,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-31,0.057818,0.019264,0.003302,0.000064,0.065041,-0.034104,0.002391,-0.000082,0.010754,-0.113240,...,,,0.004716,-0.061388,0.001455,-0.000089,,,,
2021-04-07,0.043783,-0.069290,0.002276,-0.000158,0.053439,0.020914,0.002314,0.000048,0.000172,-0.214420,...,,,0.005120,-0.080426,0.001505,-0.000121,,,,
2021-04-14,0.051856,-0.297657,0.002803,-0.000834,0.073606,0.282420,0.003041,0.000859,-0.014891,0.171065,...,,,0.005049,-0.078385,0.001509,-0.000118,,,,
2021-04-21,0.036010,-0.264062,0.001875,-0.000495,0.045805,-0.277314,0.002052,-0.000569,-0.004614,0.192042,...,,,0.004759,-0.075758,0.001491,-0.000113,,,,


In [122]:
w = 3
s = "BA"
df_pair = df_resid.loc[:, [("cds_resid", s), ("eq_resid", s)]].dropna()
df_pair.columns = ["cds_resid", "eq_resid"]
df_pair.ewm(alpha=0.3).mean()

Unnamed: 0_level_0,cds_resid,eq_resid
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-09,-0.021706,0.064893
2018-05-16,0.005039,0.002086
2018-05-23,-0.001349,0.006529
2018-05-30,0.010251,-0.014159
2018-06-06,0.002896,-0.007791
...,...,...
2021-03-31,-0.055337,-0.004981
2021-04-07,-0.042877,-0.009333
2021-04-14,-0.054546,0.011300
2021-04-21,-0.040675,0.013521


In [119]:
df_pair.ewm(alpha=0.3).cov()a

Unnamed: 0_level_0,Unnamed: 1_level_0,cds_resid,eq_resid
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-05-09,cds_resid,,
2018-05-09,eq_resid,,
2018-05-16,cds_resid,0.001034,-0.002427
2018-05-16,eq_resid,-0.002427,0.005700
2018-05-23,cds_resid,0.000502,-0.001052
...,...,...,...
2021-04-14,eq_resid,-0.000718,0.002776
2021-04-21,cds_resid,0.002508,-0.000416
2021-04-21,eq_resid,-0.000416,0.001957
2021-04-28,cds_resid,0.001761,-0.000206


In [None]:
models = [
    {"model": "OLS", "penalty": None, "B0": 1, "name": "OLS"},
    {"model": "RLM", "penalty": "Huber", "B0": 1, "name": "RLM:Huber"},
    {"model": "RLM", "penalty": "Tukey", "B0": 1, "name": "RLM:Tukey"},
]

In [None]:
dfs_error = {model["name"]: utils.get_errors(df_data, **model)[0] for model in models}