In [3]:
import pandas as pd 
import numpy as np
from regularized_var import VAR, MinnesotaVAR, WalkForward, WalkForwardValidator, mse, mae, pseudo_r2

In [2]:
# load sample data
X = pd.read_csv('data.csv')
X['date'] = pd.to_datetime(X['date'])
X.set_index('date', inplace=True)

X.head()[['ct2_bps', 'ct5_bps', 'ct10_bps', 'ct30_bps']]

Unnamed: 0_level_0,ct2_bps,ct5_bps,ct10_bps,ct30_bps
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1977-02-15,6.04,6.72,7.36,7.7
1977-02-16,6.02,6.7,7.34,7.67
1977-02-17,5.99,6.67,7.26,7.67
1977-02-18,6.06,6.82,7.41,7.76
1977-02-21,6.06,6.82,7.41,7.76


In [6]:
# define variables to include. Note model assumes already differenced/stationary
# no need to standardize; walk-forward functions will handle train/test split standardization without leakage
ENDOG = [
    'ct2_bps_ret',
    'ct5_bps_ret',
    'ct10_bps_ret',
    'ct30_bps_ret'
]

**Run Walk-forward Validation**

In [7]:
splitter = WalkForward(train_size=1000, min_train_size=1000, horizon=5, step=1)

# set up regularization strengths to grid search
alphas = np.concatenate(([0.0], np.logspace(-3, 2, num=6)))

output = {"alpha": [], "mae": [], "mse": [], "corr": [], "r2": []}

In [8]:
for a in alphas:

    wf = WalkForwardValidator(
        estimator_class=MinnesotaVAR,
        params={"n_lags": 5, "alpha_own": a, "alpha_cross": a*2, "power": 2.0, "include_const": False},
        splitter=splitter,
        metric=mse,
        readd_mean_on_inverse=False,
        verbose=True
    )
    wf.run(X[ENDOG])

    # store errors
    pred_h, true_h = wf.concatenated()

    yt = true_h.values.ravel()
    yp = pred_h.values.ravel()

    pooled = {
        "mae": mae(yt, yp),
        "mse": mse(yt, yp),
        "corr": float(pd.Series(yp).corr(pd.Series(yt))),
        "r2": pseudo_r2(true_h, pred_h)
    }
    
    output["alpha"].append(a)
    output["mae"].append(pooled["mae"])
    output["mse"].append(pooled["mse"])
    output["corr"].append(pooled["corr"])
    output["r2"].append(pooled["r2"])

Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:39<00:00, 295.05it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:39<00:00, 294.66it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.41it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 304.43it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.44it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:39<00:00, 295.17it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.09it/s]


In [20]:
output_df = pd.DataFrame(output)

In [21]:
output_df

Unnamed: 0,alpha,mae,mse,corr,r2
0,0.0,0.111169,0.025835,-0.003267,-0.024424
1,0.001,0.111014,0.02576,-0.001302,-0.021445
2,0.01,0.11064,0.025554,0.007077,-0.013248
3,0.1,0.110272,0.025323,0.023717,-0.004091
4,1.0,0.110076,0.025196,0.037045,0.000943
5,10.0,0.110056,0.025186,0.03817,0.001306
6,100.0,0.110085,0.025211,0.036771,0.000317
