In [56]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.metrics

sns.set_context("paper", font_scale=1.7)
sns.set_style("ticks", {"axes.grid": True, "grid.color": "0.95", "grid.linestyle": "-"})

# Import Results

In [57]:
forecast_results_df = pd.read_parquet('../../data/_temp/20211203_214201_test_.parquet')
forecast_log_df = pd.read_pickle('../../data/_temp/20211203_214201_test_log.pkl')
forecast_results_df.columns = ['model_' + x for x in forecast_results_df]

## Load true values

In [58]:
fret_df = pd.read_parquet('../../data/proc/_temp/1996_all.parquet', columns = ['ff__mkt'])
forecast_results_df['truth'] = fret_df['ff__mkt']

# Check error

In [59]:
sklearn.metrics.mean_squared_error(forecast_results_df['truth'], forecast_results_df['model_0'])

1.8216701605209295e-06

In [90]:
def compute_rsquared(truth, pred):
    return 1 - np.sum(np.square(truth-pred))/np.sum(np.square(truth))

metrics_df = pd.DataFrame([], index = [col for col in forecast_results_df.columns if 'model' in col])

for col in forecast_results_df.columns:
    if 'model' in col:
        metrics_df.loc[col, 'MSE'] = sklearn.metrics.mean_squared_error(forecast_results_df['truth'], forecast_results_df[col])
        metrics_df.loc[col, 'R2'] = compute_rsquared(forecast_results_df['truth'], forecast_results_df[col])

metrics_df*100

Unnamed: 0,MSE,R2
model_0,0.000182,-2.861721
model_1,0.000177,-0.013125
model_2,0.000177,0.009773
model_3,0.000177,-0.195133
model_4,0.000178,-0.506698


In [79]:
[col for col in forecast_results_df.columns if 'model' not in col]

['truth']

In [61]:
forecast_results_df

fit = smf.ols('truth ~ ' + ' + '.join((x for x in forecast_results_df.columns if 'model_' in x)), forecast_results_df).fit()
fit.summary()

0,1,2,3
Dep. Variable:,truth,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,3.214
Date:,"Sat, 04 Dec 2021",Prob (F-statistic):,0.00672
Time:,13:16:19,Log-Likelihood:,23048.0
No. Observations:,4428,AIC:,-46080.0
Df Residuals:,4422,BIC:,-46050.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.221e-05,2.18e-05,0.559,0.576,-3.06e-05,5.5e-05
model_0,0.0473,0.088,0.537,0.591,-0.125,0.220
model_1,0.9385,0.908,1.034,0.301,-0.842,2.719
model_2,0.5612,0.734,0.764,0.445,-0.878,2.000
model_3,0.0422,0.231,0.182,0.855,-0.411,0.496
model_4,-1.0899,0.421,-2.590,0.010,-1.915,-0.265

0,1,2,3
Omnibus:,2284.841,Durbin-Watson:,1.855
Prob(Omnibus):,0.0,Jarque-Bera (JB):,372730.425
Skew:,-1.394,Prob(JB):,0.0
Kurtosis:,47.86,Cond. No.,56900.0
