In [1]:
import pandas as pd 
import numpy as np
from regularized_var.var import VAR, MinnesotaVAR
from regularized_var.model_selection import WalkForward, WalkForwardValidator
from regularized_var.metrics import mse, mae, pseudo_r2

In [2]:
# load sample data
X = pd.read_csv('data.csv')
X['date'] = pd.to_datetime(X['date'])
X.set_index('date', inplace=True)

X.head()[['ct2_bps', 'ct5_bps', 'ct10_bps', 'ct30_bps']]

Unnamed: 0_level_0,ct2_bps,ct5_bps,ct10_bps,ct30_bps
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1977-02-15,6.04,6.72,7.36,7.7
1977-02-16,6.02,6.7,7.34,7.67
1977-02-17,5.99,6.67,7.26,7.67
1977-02-18,6.06,6.82,7.41,7.76
1977-02-21,6.06,6.82,7.41,7.76


In [3]:
# define variables to include. Note model assumes already differenced/stationary
# no need to standardize; walk-forward functions will handle train/test split standardization without leakage
ENDOG = [
    'ct2_bps_ret',
    'ct5_bps_ret',
    'ct10_bps_ret',
    'ct30_bps_ret'
]

**Run Walk-forward Validation**

In [4]:
splitter = WalkForward(train_size=1000, min_train_size=1000, horizon=5, step=1)

# set up regularization strengths to grid search
alphas = np.concatenate(([0.0], np.logspace(-3, 2, num=6)))

output = {"alpha": [], "mae": [], "mse": [], "corr": [], "r2": []}

In [5]:
for a in alphas:

    wf = WalkForwardValidator(
        estimator_class=MinnesotaVAR,
        params={"n_lags": 5, "alpha_own": a, "alpha_cross": a*2, "power": 2.0, "include_const": False},
        splitter=splitter,
        metric=mse,
        readd_mean_on_inverse=False,
        verbose=True
    )
    wf.run(X[ENDOG])

    # store errors
    pred_h, true_h = wf.concatenated()

    yt = true_h.values.ravel()
    yp = pred_h.values.ravel()

    pooled = {
        "mae": mae(yt, yp),
        "mse": mse(yt, yp),
        "corr": float(pd.Series(yp).corr(pd.Series(yt))),
        "r2": pseudo_r2(true_h, pred_h)
    }
    
    output["alpha"].append(a)
    output["mae"].append(pooled["mae"])
    output["mse"].append(pooled["mse"])
    output["corr"].append(pooled["corr"])
    output["r2"].append(pooled["r2"])

Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:39<00:00, 295.59it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 304.34it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.41it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.85it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:40<00:00, 284.90it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 301.65it/s]
Walk-forward splits: 100%|█████████████████████████████████████████████████████████| 11654/11654 [00:38<00:00, 303.37it/s]


In [6]:
output_df = pd.DataFrame(output)

In [7]:
output_df

Unnamed: 0,alpha,mae,mse,corr,r2
0,0.0,0.111169,0.025835,-0.003267,-0.024424
1,0.001,0.110622,0.025543,0.007639,-0.012839
2,0.01,0.11026,0.025315,0.024454,-0.00378
3,0.1,0.110073,0.025193,0.037316,0.001032
4,1.0,0.110057,0.025187,0.038085,0.001267
5,10.0,0.110085,0.025212,0.036748,0.000292
6,100.0,0.110091,0.025219,0.036529,3.3e-05
