# Rolling Regressions

## Imports

http://www.mit.edu/~6.s085/notes/lecture5.pdf

jt -t monokai -f fira -fs 13 -nf ptsans -nfs 11 -N -kl -cursw 5 -cursc r -cellw 95% -T

In [None]:
# <include-rolling_regressions/utils.py>

In [None]:
# <imports>
import numpy as np
import pandas as pd
import plotly.io as pio
from patsy import dmatrices
import statsmodels.api as sm

from rolling_regressions import utils

pio.renderers.default = "vscode"
pd.options.plotting.backend = "plotly"
pio.templates.default = "seaborn"

In [None]:
if False:
    df_prices = utils.fetch_all_tickers([tickers] + ["SPY"], query_params={"start_date": "2016-01-01", "end_date": "2020-12-31"})
    df_prices.to_csv("df_prices.csv")
df_prices = pd.read_csv("df_prices.csv", usecols=["ticker", "date", "adj_close"]).set_index(["ticker", "date"])
data = utils.fetch_ticker("SPY", query_params={"start_date": "2016-01-01", "end_date": "2020-12-31"})
df_prices = pd.concat([df_prices, data[["ticker", "date", "adj_close"]].set_index(["ticker", "date"])], axis=0)


In [None]:
excl_tickers = ["AMRC", "AT", "CCO"]
tickers = [s for s in df_prices.index.levels[0][:210].to_list() if s not in excl_tickers]
df_prices = df_prices.loc[tickers  + ["SPY"]].sort_index()
assert df_prices.isna().groupby("ticker").max().sum()[0] == 0

## Summary

## Analysis

Here we specify times in terms of numbers of days (which we take to be numbers of observations ignoring differences in duration between observations) and the calculate chacteristic time, $\lambda$ as $\frac{1}{\text{days}}$. We start out with inifinite series not correcting for the long small tails of the weights, even for the shorter time periods. The chart below is a bit of a simplification in that it normalizes the weights based on their sum through 360 days, excluding the weights still applicable to observations beyond 360 days, but since they are small, doing so does not alter the point of the chart. The main point is that observations well past the time $t$ continue to impact the moving average. It appears that about 65% of the statistic is comprised as a function of observations up to to $t$ with the remaining 35% coming from after that. Even at a boxcar window of $\frac{2}{\lambda}$ for 90 days results in a window of 180 days, but from the chart below that only accounts for approximately 88% of the statistic with another 12% coming from observations after 180 days.

### Potential Analyses
* Different box car window periods relative to the same characteristic times

In [None]:
times = np.array([3, 6, 10, 30, 60])
lambdas = 1 / times

In [None]:
nobs = 180
fig = utils.go.Figure()
for t in times:
    weights = np.power(1 - 1/t, np.arange(nobs))
    weights = weights / weights.sum()
    fig.add_scatter(x=np.arange(nobs), y=pd.Series(weights).cumsum(), name=f"t = {t}")
fig.update_layout(title_text="Cumulative Weight by Time Period")
fig.show()

In [None]:
df_ret = np.log(df_prices.adj_close.unstack("ticker") / df_prices.adj_close.unstack("ticker").shift())
df_ret

In [None]:
df_betas = utils.get_betas(df_ret, times)
df_betas.boxcar_fwd.beta_1.t_03.groupby("ticker").mean().plot(kind="bar", title="Mean Beta")

In [None]:
time = times[0]
for time in times:
    df_select = df_betas[[("exp_wm", "beta_1", f"t_{time:02d}"), ("boxcar", "beta_1", f"t_{time:02d}"), ("boxcar_fwd", "beta_1", f"t_{time:02d}")]].dropna()
    fig = utils.px.histogram(df_select.stack(["win_type", "stat"]).reset_index(), x=f"t_{time:02d}", color="win_type", barmode="overlay", title=f"Histogram of Coefficients: t_{time:02d}", histnorm="percent", marginal="box", height=600, opacity=0.7)
    fig.show()
    print(df_select.describe())

In [None]:
df_corr = df_betas.swaplevel("stat", "win_type", axis=1).beta_1.corr()
flat_index = df_corr.index.to_flat_index().map(lambda x: f"{x[0]}_{x[1]}")
df_corr.index = flat_index
df_corr.columns = flat_index
incl_cols = [f for f in flat_index if not ("boxcar_fwd" in f and int(f[-2:]) != 3)]
df_corr = df_corr.loc[incl_cols, incl_cols]
utils.px.imshow(df_corr, height=600, title="Beta Correlation Matrix")

In [None]:
print(df_betas[[("boxcar", "beta_1", f"t_{time:02d}"), ("boxcar_fwd", "beta_1", "t_05"), ("exp_wm", "beta_1", f"t_{time:02d}")]].loc[ticker].dropna().describe())

As a check to makes sure the that the calculations appear to be being performed correctly we can plot one ticker, SPY and the variance and covariance. This looks like its doing the right thing - the 90 day moving average with much more muted responses to changes in AAPL variance.

In [None]:
fig = utils.make_subplots(specs=[[{"secondary_y": True}]])
fig.add_scatter(x=df_betas.index.levels[1], y=df_prices.loc["AAPL"].adj_close, name="AAPL")
fig.add_scatter(x=df_betas.index.levels[1], y=df_betas.loc["AAPL"].exp_wm.var_x.t_05, name="t_05", secondary_y=True)
fig.add_scatter(x=df_betas.index.levels[1], y=df_betas.loc["AAPL"].exp_wm.var_x.t_90, name="t_90", secondary_y=True)
fig.update_layout(title="EWM Variance", showlegend=True)

Covariance appears to be working as well.

In [None]:
fig = utils.make_subplots(specs=[[{"secondary_y": True}]])
fig.add_scatter(x=df_betas.index.levels[1], y=df_prices.loc["AAPL"].adj_close / df_prices.loc["AAPL"].adj_close.iloc[0] * 100, name="AAPL") 
fig.add_scatter(x=df_betas.index.levels[1], y=df_prices.loc["SPY"].adj_close / df_prices.loc["SPY"].adj_close.iloc[0] * 100, name="SPY", line=dict(color=utils.COLORS[3]))
fig.add_scatter(x=df_betas.index.levels[1], y=df_betas.loc["AAPL"].cov_xy.t_05, name="cov_xy.t_05", secondary_y=True, line=dict(color=utils.COLORS[1]))
fig.add_scatter(x=df_betas.index.levels[1], y=df_betas.loc["AAPL"].cov_xy.t_90, name="cov_xy.t_90", secondary_y=True, line=dict(color=utils.COLORS[2]))
fig.update_layout(title="EWM Covariance", showlegend=True)

This can be simplified with the calculation of ewma above by just calcing the var and covs and the rest is the same.

In [None]:
col_list = []
for t in times:
    s_var = pd.Series((np.concatenate(np.repeat(np.expand_dims(np.eye(len(df_ret.columns)), axis=0), len(df_ret.index), axis=0), axis=0) * df_ret.rolling(window=2 * t).cov()).sum(axis=1), name=("var_x", f"t_{t:02d}"))
    s_cov = df_ret.rolling(window=2 * t).cov()["SPY"]
    s_cov.name = ("cov_xy", f"t_{t:02d}")
    col_list.extend([s_var, s_cov])
df_vars = pd.concat(col_list, axis=1)
df_vars = df_vars.loc[df_vars.index.get_level_values("ticker") != "SPY"]
df_vars.columns.names = ["stat", "time"]

df_beta = df_vars["cov_xy"].divide(df_vars["var_x"])
df_beta.columns = pd.MultiIndex.from_tuples([("beta_1", c) for c in df_beta.columns], names=["stat", "time"])

df_betas = pd.concat([df_vars, df_beta], axis=1)
df_betas = df_betas.sort_index(axis=1)
df_betas.index = df_betas.index.swaplevel()
df_betas = df_betas.sort_index()
df_betas.loc["AAPL"].head(60)

### Forward Looking 5 Box Car Coefficients

In [None]:
df_ret.loc["2020-04-27":"2020-05-11"]

In [None]:
res = sm.OLS.from_formula(y, X)
res.summary()

In [None]:
df_cov = df_ret.loc["2020-04-27":"2020-05-11"].cov()
df_cov

In [None]:
beta = df_cov.SPY.iloc[1] / df_cov.SPY.iloc[0]
beta

In [None]:
len(df_ret.loc["2020-04-27":"2020-05-11"])

In [None]:
df_ret

In [None]:
cov_SPY_SUN = (df_ret.loc["2020-04-27":"2020-05-11"] - df_ret.loc["2020-04-27":"2020-05-11"].mean()).prod(axis=1).sum() / 10
cov_SPY_SUN

In [None]:
var_SPY_SUN = (df_ret.loc["2020-04-27":"2020-05-11"] - df_ret.loc["2020-04-27":"2020-05-11"].mean()).pow(2).sum() / 10
var_SPY_SUN

In [None]:
df_ret.loc["2020-04-27":"2020-05-11"].mean().SUN - beta * df_ret.loc["2020-04-27":"2020-05-11"].mean().SPY

## Check to see if