# Predictive Regression

## Imports

In [1]:
# <include-predictive_regression/utils.py>

In [1]:
# <imports>
import numpy as np
import pandas as pd
import plotly.io as pio

from predictive_regression import utils

pd.options.plotting.backend = "plotly"
pio.templates.default = "seaborn"
# pio.base_renderers.default = "vscode"

from patsy import dmatrices
import statsmodels.api as sm

## Summary

In [44]:
df_data.index.levels[1]

Index(['BA', 'C', 'DD', 'F', 'GE', 'JPM', 'LOW', 'LUV', 'MAR', 'T', 'WFC',
       'XOM'],
      dtype='object', name='ticker')

In this assignment we create a two-stage regression model to predict the future weekly "returns" of five-year credit default swaps for 12 large-capitalization, publicly traded companies. The first entails creating contemporaneous models of the CDS "return" and stock price return and then calculating residuals. In the second stage, we create a model that predicts the CDS "return" residual based on the prior day's equity return residual. Our period of analysis spans from 2018-01-03 to 2018-4-30.

#### Contemporaneous Models

We model contemporaneous CDS "returns" as a function of stock price return and "return" an index of other CDS "returns", defined as the arithmetic average the CDS's of the other 11 companies in our universe. For each of these models we perform ordinary least squares regression for each ticker, for each contiguous 16 week period throughout our entire period of analysis. 

$$
r_{E}^{CDS} \sim r_{E}^{Equity} + r_{Index}^{CDS} + \epsilon
$$

We model contemporaneous equity returns as a function of the return on the market, defined as the return on SPY.

$$
r_{E}^{Equity} \sim r_{SPY}^{Equity} + \epsilon
$$

To predict the contemporaneous CDS return we end up with
$$
f_{E,n} = \beta_{E,n}^{Intercept} + \beta_{E,n}^{Equity} \cdot r_{E,n}^{Equity} + \beta_{E,n}^{Index} \cdot r_{E,n}^{Index}
$$

$$
g_{E,n} = \gamma_{E,n}^{Intercept} + \gamma_{E,n}^{SPY} \cdot r_{SPY,n}^{Equity}
$$

Contemporanous residuals can the be defined as for CDS
$$
\rho_{E,n} = r_{E,n}^{CDS} - f_{E,n}
$$

and

$$
c_{E,n} = r_{E,n}^{Equity} - g_{E,n}
$$

#### Predictive Model

Our predictive model then becomes

$$
\rho_{E,n} = c_{E,n-1} + \epsilon
$$

where we are lagging the equity residual back one observation from the CDS residual and using various window methodologies and lengths to calculate the regression coefficients.

Requirements:

* box_car and ewm regressions of various window lengths for cds_resid_n ~ equity_resid_n-1

Interesting things to try:

* r-squared overall for equity, cds and cds_resd ~ eq_resid models
* r_squared by date and by ticker for each model
* robust regressions for each model type

* end_to_end model with cds_return_n ~ cds_n-1, equity_n-1, spy_n-1, index_n-1
* transformer model for end to end integrated model

* across all tickers together as oppossed to individually

## Contemporaneous CAPM Model

In [2]:
date_range = pd.date_range("2018-01-03", "2021-04-30", freq="7D")
df_data = utils.get_data(date_range)
df_data.tail()

Unnamed: 0_level_0,series,adj_close,r_equity,r_spread,spread5y,r_index,r_spy
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-04-28,LUV,61.77,-0.004362,-0.017298,0.008339,-0.010192,0.003191
2021-04-28,MAR,149.45,0.031885,-0.008381,0.008758,-0.011003,0.003191
2021-04-28,T,30.96,0.027839,-0.023042,0.006872,-0.00967,0.003191
2021-04-28,WFC,44.983531,0.041673,-0.031137,0.005364,-0.008934,0.003191
2021-04-28,XOM,58.11,0.036986,0.009787,0.00385,-0.012655,0.003191


In [29]:
df_eq_errors, eq_summary = utils.get_errors(df_data, model="RLM")

In [35]:
df_cds_errors, cds_summary = utils.get_errors(df_data, model="RLM", formula="r_spread ~ r_equity + r_index + 1")

In [36]:
df_resid = pd.concat([df_cds_errors.resid, df_eq_errors.resid.groupby("ticker").shift()], axis=1)
df_resid.columns = ["cds_resid", "eq_resid"]
df_resid = df_resid.unstack("ticker")
df_resid

Unnamed: 0_level_0,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,cds_resid,...,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid,eq_resid
ticker,BA,C,DD,F,GE,JPM,LOW,LUV,MAR,T,...,DD,F,GE,JPM,LOW,LUV,MAR,T,WFC,XOM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-05-02,-0.025113,-0.018046,0.032198,-0.028223,0.026602,0.004113,0.015891,0.028792,0.015846,-0.026628,...,,,,,,,,,,
2018-05-09,-0.041016,0.035478,-0.016231,-0.067737,-0.052186,0.038183,0.032949,-0.010534,-0.094166,0.000600,...,-0.005991,-0.011397,-0.027041,0.022754,-0.037866,0.021486,0.023560,0.091845,0.006137,0.032137
2018-05-16,0.019178,-0.075946,0.033173,-0.018585,0.021928,-0.050763,0.041921,0.002748,0.025423,-0.000250,...,-0.008350,0.016492,-0.048740,-0.032638,0.009505,0.028936,-0.000261,0.025363,-0.012648,-0.018023
2018-05-23,-0.020246,-0.020514,0.009462,0.047375,0.011939,0.005813,-0.043524,0.019622,-0.050097,-0.001256,...,-0.017731,-0.029959,-0.031649,0.011019,-0.016197,-0.033420,-0.006868,-0.029981,-0.015973,-0.025735
2018-05-30,0.024082,-0.057301,0.052816,-0.010973,0.004920,-0.058052,0.061210,0.030632,-0.015132,-0.003654,...,0.019622,-0.001384,0.057012,0.011225,-0.111260,0.047955,0.022784,-0.002864,-0.022581,0.004106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-31,-0.066483,-0.040304,-0.040181,0.033832,0.073766,-0.029731,0.111618,0.017395,-0.031394,-0.030551,...,0.047641,0.037435,0.110701,0.017484,-0.060987,0.024703,0.031129,0.000912,0.028157,0.026193
2021-04-07,-0.011485,0.021210,-0.028963,0.051149,0.008956,0.012997,0.066706,0.032072,-0.012402,0.007130,...,0.009938,0.027596,-0.039561,0.024891,-0.016783,-0.008228,0.007647,-0.003580,0.020143,0.074611
2021-04-14,-0.080350,0.058123,-0.014011,0.009428,0.008991,0.077935,-0.023491,-0.006063,-0.033278,0.000662,...,0.038059,-0.003320,-0.005559,0.012474,0.022144,0.017528,0.039289,-0.001811,0.022989,0.042207
2021-04-21,-0.008596,-0.012307,-0.005475,0.058544,0.012861,-0.041268,-0.010135,0.017703,-0.008046,-0.029433,...,-0.006435,0.063250,-0.006985,0.046571,-0.020004,0.034666,0.004559,0.007111,-0.025593,0.013852


In [37]:
win_lengths = np.array([3, 6, 10, 30, 60])

In [38]:
col_list = []
for w in win_lengths:
    for s in df_resid.columns.levels[1]:
        df_pair = df_resid.loc[:, [("cds_resid", s), ("eq_resid", s)]]
        df_pair.columns = ["cds_resid", "eq_resid"]
        for win_type, df_win in {
            "exp_wm": df_pair.ewm(alpha=1 / w),
            "boxcar": df_pair.rolling(window=2 * w)
        }.items():
            df_cov = df_win.cov()
            df_mean = df_win.mean()

            s_var = df_cov["eq_resid"].xs("eq_resid", level=1)
            s_var.name = ("var_x", win_type, f"t_{w:02d}", s)

            s_cov = df_cov["eq_resid"].xs("cds_resid", level=1)
            s_cov.name = ("cov_xy", win_type, f"t_{w:02d}", s)

            s_beta_1 = s_cov / s_var
            s_beta_1.name = ("beta_1", win_type, f"t_{w:02d}", s)

            s_beta_0 = s_beta_1 * df_mean.eq_resid - df_mean.cds_resid
            s_beta_0.name = ("beta_0", win_type, f"t_{w:02d}", s)

            s_resid_sq = (s_beta_0 + df_pair.eq_resid * s_beta_1 - df_pair.cds_resid).pow(2)
            s_resid_sq.name = ("resid_sq", win_type, f"t_{w:02d}", s)

            s_error_sq = (df_pair.cds_resid.loc[~s_resid_sq.isna()] - df_pair.cds_resid.loc[~s_resid_sq.isna()].mean()).pow(2)
            s_error_sq.name = ("error_sq", win_type, f"t_{w:02d}", s)

            col_list.extend([s_resid_sq, s_error_sq, s_beta_0, s_beta_1, s_var, s_cov])

df_betas = pd.concat(col_list, axis=1)
df_betas.columns.names = ["stat", "win_type", "win_length", "ticker"]
df_betas = df_betas.stack("ticker")
df_betas = df_betas.swaplevel("stat", "win_type", axis=1)
df_betas = df_betas.sort_index(axis=1)
df_betas.tail(60)

Unnamed: 0_level_0,win_type,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,boxcar,...,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm,exp_wm
Unnamed: 0_level_1,stat,beta_0,beta_0,beta_0,beta_0,beta_0,beta_1,beta_1,beta_1,beta_1,beta_1,...,resid_sq,resid_sq,resid_sq,resid_sq,resid_sq,var_x,var_x,var_x,var_x,var_x
Unnamed: 0_level_2,win_length,t_03,t_06,t_10,t_30,t_60,t_03,t_06,t_10,t_30,t_60,...,t_03,t_06,t_10,t_30,t_60,t_03,t_06,t_10,t_30,t_60
date,ticker,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3
2021-03-31,BA,0.054376,0.009141,0.001138,3.5e-05,0.006138,-0.011463,0.430815,-0.144311,0.026272,0.006193,...,0.013076,0.01013533,0.006902942,0.004613776,0.004774521,0.003478,0.002729,0.002833,0.004738,0.005131
2021-03-31,C,0.00796,2.3e-05,0.003558,-0.001399,-0.000316,-0.028721,-0.139663,0.036011,-0.077839,-0.057777,...,0.001356,0.001645959,0.001802889,0.001725187,0.001536336,0.002541,0.002402,0.002431,0.00232,0.001987
2021-03-31,DD,-0.01276,-0.005523,0.002789,0.005823,0.011121,-1.018256,-0.033936,-0.067648,-0.297029,-0.346114,...,0.000718,3.550798e-05,0.0005871845,0.001285639,0.0009635353,0.001831,0.002065,0.002049,0.002011,0.00198
2021-03-31,F,0.04787,0.0058,0.008098,0.010913,0.003646,0.71011,0.065154,0.090835,-0.863093,-0.678817,...,0.000884,1.732548e-05,0.000380951,0.001834061,0.002546882,0.001586,0.00181,0.001956,0.002372,0.002353
2021-03-31,GE,0.022958,0.028081,0.016034,0.007689,0.007333,0.400507,0.303957,0.042775,-0.303613,-0.17284,...,0.002883,2.098505e-05,0.002471462,0.008250858,0.007705402,0.004699,0.003522,0.003341,0.003509,0.003781
2021-03-31,JPM,0.001605,-0.005888,0.004469,-0.010142,-0.006048,0.161862,-0.168611,0.031272,-0.464054,-0.322285,...,0.001161,0.0009101078,0.0009006217,0.0006220857,0.0004576778,0.001266,0.00142,0.001447,0.00143,0.001277
2021-03-31,LOW,-0.043445,0.0009,0.006154,0.006165,-0.000935,-0.930284,-0.846436,-1.15106,0.265111,0.200319,...,0.007266,0.004386099,0.003694748,0.009203006,0.01224106,0.001887,0.00171,0.00166,0.00209,0.002171
2021-03-31,LUV,-0.00186,0.013452,0.017177,0.017138,0.009323,-0.438067,-0.497493,-0.536656,-0.810574,-0.712163,...,0.000525,0.0004077803,0.0003761764,0.0004616073,0.0005498518,0.002482,0.002666,0.002729,0.003379,0.003253
2021-03-31,MAR,0.03172,0.022074,0.018311,0.000223,0.00039,-0.711282,-0.639014,-0.346888,-0.605484,-0.547257,...,0.002424,0.001571149,0.001206183,0.0005684476,0.0003791253,0.003942,0.004064,0.003793,0.003935,0.003679
2021-03-31,T,0.012399,0.003088,-0.008481,-0.002971,-0.001902,-0.039949,0.449481,-0.067637,0.080708,0.073315,...,0.001582,0.001174557,0.0009637224,0.0008268077,0.0008205271,0.000471,0.000652,0.000791,0.001004,0.001015


In [39]:
1 - df_betas.boxcar.resid_sq.sum() / df_betas.boxcar.error_sq.sum()

win_length
t_03   -0.316958
t_06   -0.008247
t_10    0.089397
t_30    0.079890
t_60   -0.114675
dtype: float64

In [40]:
1 - df_betas.exp_wm.resid_sq.sum() / df_betas.exp_wm.error_sq.sum()

win_length
t_03   -0.496354
t_06   -0.074866
t_10    0.033386
t_30    0.073777
t_60    0.064288
dtype: float64