In [1]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
  Downloading jobli

In [2]:
# ==============================================================
#  LSTM Regression on DNS_KF Forecast Errors
#  --------------------------------------------------------------
#  • Expanding‑window CV (train → val blocks)
#  • Rolling look‑back  = 756 b‑days  (3 yrs)
#  • Validation block   = 252 b‑days  (≈1 yr)
#  • Forecast horizon h = configurable (here default = 1)
#  --------------------------------------------------------------
#  This file merges the working CV logic from the “second model”
#  into the original DNS_KF error‑prediction script.
# ==============================================================

import os, time, random, ast, gc
from typing import List, Tuple

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# -------------------------- Repro ----------------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    torch.backends.cudnn.benchmark = True

# --------------------------- Config --------------------------- #
BUSINESS_DAYS_YEAR = 252
ROLL_YEARS         = 3
SEQ_LEN_DEFAULT    = BUSINESS_DAYS_YEAR * ROLL_YEARS   # 756
VAL_WINDOW         = BUSINESS_DAYS_YEAR                # 252
HOLDOUT_WINDOW     = BUSINESS_DAYS_YEAR * 3            # 756  (≈ 3 yrs)

# Forecast‑horizon‑dependent sequence length (map if needed)
SEQ_LEN_MAP = {
    1: 756,   # 3 yrs
    5: 756,
    21: 756,
    63: 756,
    252: 756,
}

EARLY_STOP_PATIENCE = 20

HSPACE = {
    "hidden_dim":   (32, 192),
    "num_layers":   [1, 2, 3],
    "dropout":      (0.0, 0.6),
    "learning_rate":(1e-4, 5e-3),
    "batch_size":   [32, 64, 128],
    "epochs":       (40, 80),
}

# --------------------------- Model --------------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim:int, hid:int, layers:int, out_dim:int=1, drop:float=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers>1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x:torch.Tensor)->torch.Tensor:
        _, (h_n, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(h_n[-1])))

# --------------------------- Utility Functions --------------------------- #

def load_target(horizon: int) -> pd.DataFrame:
    path = fr"dns_kf_total_h{horizon}_full_dataset.csv"
    if not os.path.exists(path):
        raise FileNotFoundError(path)
    df = pd.read_csv(path, parse_dates=["eval_date"]).sort_values("eval_date")

    true = df["true_yields"].apply(_parse_vec)
    pred = df["forecast_yields"].apply(_parse_vec)
    errors = pred.subtract(true)

    return pd.DataFrame(errors.tolist(),
                        index=df["eval_date"],
                        columns=[f"err_{i}" for i in range(6)])

def _parse_vec(col: str) -> np.ndarray:
    import ast
    return np.asarray(ast.literal_eval(col), dtype=np.float32)

# -------------------- Debug Fold Info ------------------------ #

def debug_cv_folds(folds):
    print("\n[DEBUG] Fold summary:\n")
    for i, f in enumerate(folds, 1):
        print(f"--- Fold {i} ---")
        print(f"Train X: {f['X_tr'].index[0].date()} → {f['X_tr'].index[-1].date()} ({len(f['X_tr'])} rows)")
        print(f"Val   X: {f['X_va'].index[0].date()} → {f['X_va'].index[-1].date()} ({len(f['X_va'])} rows)")
        print(f"Train Y: {f['Y_tr'].index[0].date()} → {f['Y_tr'].index[-1].date()} ({len(f['Y_tr'])} rows)")
        print(f"Val   Y: {f['Y_va'].index[0].date()} → {f['Y_va'].index[-1].date()} ({len(f['Y_va'])} rows)")
        print("-" * 60)

# --------------------- Sequence Generator ------------------------ #

def gen_seq(X_df: pd.DataFrame, Y_fold: pd.Series, seq_len: int, h: int):
    X_arr = X_df.values.astype(np.float32)
    idx_map = {ts: i for i, ts in enumerate(X_df.index)}

    X_seq, Y_seq = [], []

    for target_ts in Y_fold.index:
        t = idx_map.get(target_ts)
        if t is None or t + h >= len(X_df):  # ensure X has h-step-ahead
            continue

        # Predict y at time t+h from X[t - seq_len + 1 to t]
        start = t - seq_len + 1
        end   = t + 1
        if start < 0:
            continue

        window = X_arr[start:end]
        if np.isnan(window).any():
            continue

        future_ts = X_df.index[t + h]
        if future_ts not in Y_fold:
            continue

        X_seq.append(window)
        Y_seq.append(np.float32(Y_fold.loc[future_ts]))

    if not X_seq:
        print("[WARN] No sequences generated.")
        return np.empty((0, seq_len, X_arr.shape[1]), dtype=np.float32), np.empty((0, 1), dtype=np.float32)

    return np.stack(X_seq), np.asarray(Y_seq)[:, None]

# ----------------------- CV Generator ------------------------ #

def make_folds(X: pd.DataFrame, Y: pd.Series, horizon: int):
    """Expanding‑window folds with correct alignment.
       Each fold dict contains X_tr, Y_tr, X_va, Y_va, seq_len.
    """
    seq_len = SEQ_LEN_MAP.get(horizon, SEQ_LEN_DEFAULT)
    total   = len(X)

    train_y_len   = 252
    val_y_len     = 504
    holdout_len   = 756

    folds = []
    min_required = seq_len + horizon + train_y_len + 504
    val_start = min_required

    while val_start + val_y_len + holdout_len <= total:
        # Training set (expanding up to val_start - val_y_len)
        y_tr_end   = val_start - val_y_len
        y_tr_start = y_tr_end - train_y_len
        x_tr_end   = y_tr_end - horizon
        x_tr_start = max(0, x_tr_end - seq_len - train_y_len)

        # Validation set
        y_va_start = y_tr_end
        y_va_end   = y_va_start + val_y_len
        x_va_end   = y_va_end - horizon
        x_va_start = max(0, y_va_start - seq_len - horizon)

        if y_tr_start < 0 or x_tr_start < 0:
            break

        folds.append({
            "X_tr": X.iloc[x_tr_start:x_tr_end].copy(),
            "Y_tr": Y.iloc[y_tr_start:y_tr_end].copy(),
            "X_va": X.iloc[x_va_start:x_va_end].copy(),
            "Y_va": Y.iloc[y_va_start:y_va_end].copy(),
            "seq_len": seq_len,
        })

        val_start += val_y_len
        train_y_len += val_y_len

    return folds

# ---------------------- Debug Printer ------------------------ #

def debug_folds(folds:List[dict]):
    print(f"[DEBUG] Created {len(folds)} folds\n")
    for i,f in enumerate(folds,1):
        tr_x, tr_y = len(f["X_tr"]), len(f["Y_tr"])
        va_x, va_y = len(f["X_va"]), len(f["Y_va"])
        print(f"--- Fold {i} ---")
        print(f"Train‑Y rows : {tr_y:4d}   ({f['Y_tr'].index[0].date()} → {f['Y_tr'].index[-1].date()})")
        print(f"Train‑X rows : {tr_x:4d}   ({f['X_tr'].index[0].date()} → {f['X_tr'].index[-1].date()})")
        print(f"Val‑Y rows   : {va_y:4d}   ({f['Y_va'].index[0].date()} → {f['Y_va'].index[-1].date()})")
        print(f"Val‑X rows   : {va_x:4d}   ({f['X_va'].index[0].date()} → {f['X_va'].index[-1].date()})")
        print("-")

# -------------------- Optuna Objective ----------------------- #

def optuna_objective(trial, folds, horizon):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }

    fold_mse = []
    scaler = torch.amp.GradScaler('cuda')
    global_step = 0  # ✅ unique and flat step counter

    for fold_idx, f in enumerate(tqdm(folds, desc=f"[Trial {trial.number}] Evaluating folds"), 1):
        sc = StandardScaler()
        X_tr_s = pd.DataFrame(sc.fit_transform(f["X_tr"]), index=f["X_tr"].index, columns=f["X_tr"].columns)
        X_va_s = pd.DataFrame(sc.transform(f["X_va"]),     index=f["X_va"].index, columns=f["X_va"].columns)

        Xtr, Ytr = gen_seq(X_tr_s, f["Y_tr"], f["seq_len"], horizon)
        Xva, Yva = gen_seq(X_va_s, f["Y_va"], f["seq_len"], horizon)
        if len(Xtr) == 0 or len(Xva) == 0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best  = np.inf
        patience = 0

        train_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True)
        va_loader    = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"])

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in train_loader:
                xb, yb = xb.to(device), yb.to(device)
                opt.zero_grad()
                with torch.amp.autocast(device_type='cuda'):
                    pred = model(xb)
                    loss = nn.functional.mse_loss(pred, yb)
                scaler.scale(loss).backward()
                scaler.step(opt)
                scaler.update()

            # Evaluation
            model.eval()
            preds, gts = [], []
            with torch.no_grad(), torch.amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device)).cpu())
                    gts.append(yb)

            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            # ✅ Global step is guaranteed to be unique now
            trial.report(mse, step=global_step)
            global_step += 1

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

            if mse < best:
                best = mse
                patience = 0
            else:
                patience += 1
                if patience >= EARLY_STOP_PATIENCE:
                    break


        fold_mse.append(best)

    return np.mean(fold_mse) if fold_mse else float("inf")

# -------------------- Run Experiment ------------------------ #

def main_notebook(horizon: int, trials: int = 30, n_jobs: int = 1):
    # 1. Load features + target
    X_df = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    y_df = load_target(horizon)

    # 2. Join and clean
    X_df = X_df.join(y_df)
    X_df.dropna(inplace=True)
    
    common_dates = X_df.index.intersection(y_df.index)
    X_df = X_df.loc[common_dates]
    y_df = y_df.loc[common_dates]
    
    y_ser = y_df.mean(axis=1).rename("err")  # raw directional error

    # 3. Generate CV folds
    folds = make_folds(X_df, y_ser, horizon)
    print(f"Generated {len(folds)} folds\n")
    debug_cv_folds(folds)

    # 4. Run Optuna
    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=15)
    )

    t0 = time.time()
    study.optimize(lambda tr: optuna_objective(tr, folds, horizon),
                   n_trials=trials, n_jobs=n_jobs, show_progress_bar=True)
    duration = time.time() - t0

    # 5. Print results
    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Duration: {duration/60:.1f} min")

[INFO] Device: cuda
  • GPU: NVIDIA GeForce RTX 4090


In [3]:
# -------------------- Execution ------------------------ #

if __name__ == "__main__": 
    main_notebook(horizon=63, trials=10, n_jobs=1)

[I 2025-05-17 20:22:16,612] A new study created in memory with name: no-name-334e9434-1c50-46e0-b5c7-7518428d0639


Generated 4 folds


[DEBUG] Fold summary:

--- Fold 1 ---
Train X: 2006-11-15 → 2010-09-27 (1008 rows)
Val   X: 2007-11-02 → 2012-09-03 (1260 rows)
Train Y: 2010-01-06 → 2010-12-23 (252 rows)
Val   Y: 2010-12-24 → 2012-11-29 (504 rows)
------------------------------------------------------------
--- Fold 2 ---
Train X: 2006-11-15 → 2012-09-03 (1512 rows)
Val   X: 2009-10-09 → 2014-08-08 (1260 rows)
Train Y: 2010-01-06 → 2012-11-29 (756 rows)
Val   Y: 2012-11-30 → 2014-11-05 (504 rows)
------------------------------------------------------------
--- Fold 3 ---
Train X: 2006-11-15 → 2014-08-08 (2016 rows)
Val   X: 2011-09-15 → 2016-07-20 (1260 rows)
Train Y: 2010-01-06 → 2014-11-05 (1260 rows)
Val   Y: 2014-11-06 → 2016-10-17 (504 rows)
------------------------------------------------------------
--- Fold 4 ---
Train X: 2006-11-15 → 2016-07-20 (2520 rows)
Val   X: 2013-08-22 → 2018-06-26 (1260 rows)
Train Y: 2010-01-06 → 2016-10-17 (1764 rows)
Val   Y: 2016-10-18 → 2018-09-21 (504 rows)


  0%|          | 0/10 [00:00<?, ?it/s]


[Trial 0] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 0] Evaluating folds:  25%|██▌       | 1/4 [00:11<00:35, 11.75s/it][A
[Trial 0] Evaluating folds:  50%|█████     | 2/4 [00:30<00:32, 16.12s/it][A
[Trial 0] Evaluating folds:  75%|███████▌  | 3/4 [02:08<00:53, 53.24s/it][A
[Trial 0] Evaluating folds: 100%|██████████| 4/4 [02:51<00:00, 42.88s/it][A


[I 2025-05-17 20:25:08,200] Trial 0 finished with value: 0.115943418815732 and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.115943418815732.



[Trial 1] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 1] Evaluating folds:  25%|██▌       | 1/4 [00:19<00:58, 19.45s/it][A
[Trial 1] Evaluating folds:  50%|█████     | 2/4 [00:59<01:03, 31.72s/it][A
[Trial 1] Evaluating folds:  75%|███████▌  | 3/4 [02:06<00:47, 47.79s/it][A
[Trial 1] Evaluating folds: 100%|██████████| 4/4 [03:22<00:00, 50.56s/it][A


[I 2025-05-17 20:28:30,497] Trial 1 finished with value: 0.2480349913239479 and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 0 with value: 0.115943418815732.



[Trial 2] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 2] Evaluating folds:  25%|██▌       | 1/4 [00:12<00:36, 12.19s/it][A
[Trial 2] Evaluating folds:  50%|█████     | 2/4 [00:32<00:33, 16.90s/it][A
[Trial 2] Evaluating folds:  75%|███████▌  | 3/4 [01:21<00:31, 31.57s/it][A
[Trial 2] Evaluating folds: 100%|██████████| 4/4 [02:00<00:00, 30.05s/it][A


[I 2025-05-17 20:30:30,729] Trial 2 finished with value: 0.03647817112505436 and parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41}. Best is trial 2 with value: 0.03647817112505436.



[Trial 3] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 3] Evaluating folds:  25%|██▌       | 1/4 [00:18<00:54, 18.26s/it][A
[Trial 3] Evaluating folds:  50%|█████     | 2/4 [00:37<00:38, 19.11s/it][A
[Trial 3] Evaluating folds:  75%|███████▌  | 3/4 [01:22<00:30, 30.52s/it][A
[Trial 3] Evaluating folds: 100%|██████████| 4/4 [02:05<00:00, 31.29s/it][A


[I 2025-05-17 20:32:35,932] Trial 3 finished with value: 0.029958782717585564 and parameters: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}. Best is trial 3 with value: 0.029958782717585564.



[Trial 4] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 4] Evaluating folds:  25%|██▌       | 1/4 [00:12<00:38, 12.74s/it][A
[Trial 4] Evaluating folds:  50%|█████     | 2/4 [00:31<00:32, 16.49s/it][A
[Trial 4] Evaluating folds:  75%|███████▌  | 3/4 [01:31<00:36, 36.17s/it][A
[Trial 4] Evaluating folds: 100%|██████████| 4/4 [02:11<00:00, 32.76s/it][A


[I 2025-05-17 20:34:47,013] Trial 4 finished with value: 0.02706875652074814 and parameters: {'hidden_dim': 51, 'num_layers': 3, 'dropout': 0.15526798896001015, 'learning_rate': 0.0013353819088790589, 'batch_size': 128, 'epochs': 47}. Best is trial 4 with value: 0.02706875652074814.



[Trial 5] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 5] Evaluating folds:  25%|██▌       | 1/4 [00:13<00:38, 12.98s/it][A
[Trial 5] Evaluating folds:  50%|█████     | 2/4 [00:35<00:36, 18.44s/it][A
[Trial 5] Evaluating folds:  75%|███████▌  | 3/4 [01:47<00:43, 43.00s/it][A
[Trial 5] Evaluating folds: 100%|██████████| 4/4 [02:43<00:00, 40.89s/it][A


[I 2025-05-17 20:37:30,594] Trial 5 finished with value: 0.025974877178668976 and parameters: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}. Best is trial 5 with value: 0.025974877178668976.



[Trial 6] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 6] Evaluating folds:  25%|██▌       | 1/4 [00:09<00:27,  9.26s/it][A
[Trial 6] Evaluating folds:  50%|█████     | 2/4 [00:32<00:35, 17.58s/it][A
[Trial 6] Evaluating folds:  75%|███████▌  | 3/4 [01:36<00:38, 38.92s/it][A
[Trial 6] Evaluating folds: 100%|██████████| 4/4 [02:22<00:00, 35.72s/it][A


[I 2025-05-17 20:39:53,589] Trial 6 finished with value: 0.056067117024213076 and parameters: {'hidden_dim': 94, 'num_layers': 2, 'dropout': 0.16856070581242846, 'learning_rate': 0.0008356499023325525, 'batch_size': 64, 'epochs': 80}. Best is trial 5 with value: 0.025974877178668976.



[Trial 7] Evaluating folds:   0%|          | 0/4 [00:00<?, ?it/s][A
[Trial 7] Evaluating folds:  25%|██▌       | 1/4 [00:20<01:02, 20.69s/it][A
[Trial 7] Evaluating folds:  50%|█████     | 2/4 [00:57<01:00, 30.17s/it][A
[Trial 7] Evaluating folds:  75%|███████▌  | 3/4 [02:28<00:58, 58.07s/it][A
[Trial 7] Evaluating folds: 100%|██████████| 4/4 [03:42<00:00, 55.59s/it][A


[I 2025-05-17 20:43:36,009] Trial 7 finished with value: 0.02843462862074375 and parameters: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}. Best is trial 5 with value: 0.025974877178668976.



[Trial 8] Evaluating folds:   0%|          | 0/4 [00:06<?, ?it/s][A


[I 2025-05-17 20:43:42,289] Trial 8 pruned. 



[Trial 9] Evaluating folds:   0%|          | 0/4 [00:08<?, ?it/s][A


[I 2025-05-17 20:43:51,275] Trial 9 pruned. 
=== Best Trial ===
MSE   : 0.025975
Params: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}
Duration: 21.6 min


In [81]:
# -------------------- Holdout Evaluation --------------------
# Ensure best hyperparameters are defined (e.g., from previous Optuna run)
best_params = {
    'hidden_dim': 188,
    'num_layers': 2,
    'dropout': 0.3587399872866511,
    'learning_rate': 0.0036832964384234204,
    'batch_size': 64,
    'epochs': 53
}

# Define forecast horizon (must match your Optuna setup)
horizon = 1

# 1. Split data into full-training and holdout
HOLDOUT_LEN = HOLDOUT_WINDOW  # 756 business days
holdout_dates = y_ser.index[-HOLDOUT_LEN:]
holdout_start = holdout_dates[0]

# Training set: before holdout
train_dates = y_ser.index[y_ser.index < holdout_start]
X_train = X_df.loc[train_dates]
y_train = y_ser.loc[train_dates]

# Context for holdout: include seq_len history before holdout start\seq_len = SEQ_LEN_MAP.get(horizon, SEQ_LEN_DEFAULT)
hold_context_start = X_df.index.get_loc(holdout_start) - seq_len + 1
X_hold_context = X_df.iloc[hold_context_start:]
y_hold = y_ser.loc[holdout_dates]

# 2. Scale features
scaler = StandardScaler()
X_tr_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
X_hold_scaled = pd.DataFrame(scaler.transform(X_hold_context), index=X_hold_context.index, columns=X_hold_context.columns)

# 3. Generate sequences and capture timestamps for holdout
# Training sequences
X_tr_seq, Y_tr_seq = gen_seq(X_tr_scaled, y_train, seq_len, horizon)
# Holdout sequences with timestamps
# We'll replicate gen_seq logic to also record eval timestamps
def gen_seq_with_times(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    idx_map = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq, times = [], [], []
    for target_ts in Y_fold.index:
        t = idx_map.get(target_ts)
        if t is None or t + h >= len(X_df):
            continue
        start = t - seq_len + 1
        end = t + 1
        if start < 0:
            continue
        window = X_arr[start:end]
        if np.isnan(window).any():
            continue
        future_ts = X_df.index[t + h]
        if future_ts not in Y_fold:
            continue
        X_seq.append(window)
        Y_seq.append(np.float32(Y_fold.loc[future_ts]))
        times.append(future_ts)
    return np.stack(X_seq), np.asarray(Y_seq)[:, None], times

X_hold_seq, Y_hold_seq, hold_times = gen_seq_with_times(X_hold_scaled, y_hold, seq_len, horizon)
if X_hold_seq.shape[0] == 0:
    raise ValueError("No holdout sequences generated: ensure data length >= seq_len + horizon.")

# 4. Instantiate model
model = LSTMRegressor(
    in_dim=X_tr_seq.shape[2], hid=best_params['hidden_dim'], layers=best_params['num_layers'],
    out_dim=Y_tr_seq.shape[1], drop=best_params['dropout']
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=best_params['learning_rate'])

# 5. Train on full training
train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                          batch_size=best_params['batch_size'], shuffle=True)
for epoch in range(1, best_params['epochs'] + 1):
    model.train()
    losses = []
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        pred = model(xb)
        loss = nn.functional.mse_loss(pred, yb)
        loss.backward(); optimizer.step()
        losses.append(loss.item())
    print(f"Epoch {epoch}/{best_params['epochs']} - Train MSE: {np.mean(losses):.6f}")

# 6. Evaluate on holdout and save results
model.eval()
with torch.no_grad():
    Xh = torch.tensor(X_hold_seq).to(device)
    preds = model(Xh).cpu().numpy().flatten()
# Build results DataFrame
df_results = pd.DataFrame({
    'eval_date': hold_times,
    'horizon': horizon,
    'forecasted_error': preds,
    'true_error': Y_hold_seq.flatten()
})
df_results.to_csv(f'holdout_forecasts_h{horizon}.csv', index=False)

# 7. Report holdout MSE
mse_hold = mean_squared_error(Y_hold_seq, preds)
print(f"Holdout MSE (h={horizon}): {mse_hold:.6f}")

Epoch 1/53 - Train MSE: 0.173504
Epoch 2/53 - Train MSE: 0.003445
Epoch 3/53 - Train MSE: 0.001817
Epoch 4/53 - Train MSE: 0.001521
Epoch 5/53 - Train MSE: 0.001462
Epoch 6/53 - Train MSE: 0.001432
Epoch 7/53 - Train MSE: 0.001465
Epoch 8/53 - Train MSE: 0.001478
Epoch 9/53 - Train MSE: 0.001358
Epoch 10/53 - Train MSE: 0.001600
Epoch 11/53 - Train MSE: 0.001446
Epoch 12/53 - Train MSE: 0.001324
Epoch 13/53 - Train MSE: 0.001291
Epoch 14/53 - Train MSE: 0.001273
Epoch 15/53 - Train MSE: 0.001455
Epoch 16/53 - Train MSE: 0.001293
Epoch 17/53 - Train MSE: 0.001279
Epoch 18/53 - Train MSE: 0.001223
Epoch 19/53 - Train MSE: 0.001139
Epoch 20/53 - Train MSE: 0.001122
Epoch 21/53 - Train MSE: 0.001156
Epoch 22/53 - Train MSE: 0.001129
Epoch 23/53 - Train MSE: 0.001071
Epoch 24/53 - Train MSE: 0.001085
Epoch 25/53 - Train MSE: 0.001196
Epoch 26/53 - Train MSE: 0.001016
Epoch 27/53 - Train MSE: 0.001163
Epoch 28/53 - Train MSE: 0.001086
Epoch 29/53 - Train MSE: 0.001034
Epoch 30/53 - Train MSE

In [30]:
horizon = 1

# 1. Load features + target
X_df = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
y_df = load_target(horizon)

# 2. Join and clean
X_df = X_df.join(y_df)
X_df.dropna(inplace=True)

common_dates = X_df.index.intersection(y_df.index)
X_df = X_df.loc[common_dates]
y_df = y_df.loc[common_dates]

y_ser = y_df.mean(axis=1).rename("err")  # raw directional error

In [24]:
def find_y_not_in_x(X_df: pd.DataFrame, y_df: pd.DataFrame) -> pd.DatetimeIndex:
    """
    Returns dates present in y_df index but not in X_df index.
    """
    x_dates = pd.to_datetime(X_df.index)
    y_dates = pd.to_datetime(y_df.index)
    missing_dates = y_dates.difference(x_dates)
    return missing_dates

missing = find_y_not_in_x(X_df, y_df)
print(f"{len(missing)} dates in y_df not found in X_df:\n", missing)


7 dates in y_df not found in X_df:
 DatetimeIndex(['2008-12-25', '2011-12-16', '2015-10-01', '2015-10-02',
               '2015-10-07', '2015-10-08', '2020-03-26'],
              dtype='datetime64[ns]', freq=None)


In [16]:
# ---------------------- Debug Folds ---------------------- #
def debug_folds(folds, forecast_horizon=1):
    print(f"[DEBUG] Total folds generated: {len(folds)}\n")

    for i, f in enumerate(folds):
        print(f"\n--- Fold {i+1} ---")

        # Print shapes
        print(f"Train X: {f['X_tr'].shape}, Y: {f['Y_tr'].shape}")
        print(f"Valid X: {f['X_va'].shape}, Y: {f['Y_va'].shape}")

        # Show date ranges
        print(f"Train X range: {f['X_tr'].index[0].date()} → {f['X_tr'].index[-1].date()}")
        print(f"Train Y range: {f['Y_tr'].index[0].date()} → {f['Y_tr'].index[-1].date()}")
        print(f"Valid X range: {f['X_va'].index[0].date()} → {f['X_va'].index[-1].date()}")
        print(f"Valid Y range: {f['Y_va'].index[0].date()} → {f['Y_va'].index[-1].date()}")

        # Check alignment
        expected_end_x = f['Y_va'].index[0] - pd.Timedelta(days=forecast_horizon)
        actual_end_x   = f['X_va'].index[-1]
        print(f"Expected X_va end before Y_va start: {expected_end_x.date()}")
        print(f"Actual X_va end: {actual_end_x.date()}")

        # Check overlap
        overlap = set(f['X_va'].index).intersection(f['Y_va'].index)
        print(f"Overlap between X_va and Y_va: {len(overlap)} dates")

        if len(overlap) > 0:
            print("⚠️ Overlap detected between X_va and Y_va – check alignment logic.")
        if actual_end_x >= f['Y_va'].index[0]:
            print("❗ X_va may leak into Y_va – check sequence slicing.")

# Call it
debug_folds(folds, forecast_horizon=FORECAST_HORIZON)


[DEBUG] Total folds generated: 6


--- Fold 1 ---
Train X: (757, 56), Y: (757, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2009-07-21
Train Y range: 2009-07-21 → 2012-06-13
Valid X range: 2006-08-28 → 2011-06-24
Valid Y range: 2012-06-14 → 2014-05-20
Expected X_va end before Y_va start: 2012-06-13
Actual X_va end: 2011-06-24
Overlap between X_va and Y_va: 0 dates

--- Fold 2 ---
Train X: (1261, 56), Y: (1261, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2011-06-27
Train Y range: 2009-07-21 → 2014-05-20
Valid X range: 2008-08-01 → 2013-05-31
Valid Y range: 2014-05-21 → 2016-04-25
Expected X_va end before Y_va start: 2014-05-20
Actual X_va end: 2013-05-31
Overlap between X_va and Y_va: 0 dates

--- Fold 3 ---
Train X: (1765, 56), Y: (1765, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2013-06-03
Train Y range: 2009-07-21 → 2016-04-25
Valid X range: 2010-07-09 → 2015-05-07
Valid Y range: 2016-04-26 → 2018-03-30
Expected X_va end bef

In [5]:


# Example usage:
df_with_errors = get_forecast_errors_only(r"C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Model Fit\Output\DNS_Full_Forecast\dns_kf_total_h5_full_dataset.csv")

In [8]:
df_with_errors

Unnamed: 0_level_0,error_3m,error_6m,error_1y,error_3y,error_5y,error_10y
eval_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-08-25,0.049399,-0.041120,-0.007011,0.128507,0.121623,0.093890
2006-08-28,0.067899,-0.056128,-0.027910,0.103968,0.091551,0.060019
2006-08-29,0.100689,-0.036597,-0.013761,0.096428,0.070023,0.046928
2006-08-30,0.121152,-0.016889,0.014671,0.121862,0.114195,0.070162
2006-08-31,0.122509,0.014783,0.036736,0.173692,0.134809,0.088192
...,...,...,...,...,...,...
2025-02-27,0.018743,0.026411,0.132412,0.196475,0.234137,0.218851
2025-02-28,0.001930,0.039014,0.164062,0.236158,0.273256,0.247947
2025-03-03,-0.062340,-0.058485,0.141231,0.241750,0.274899,0.268048
2025-03-04,-0.053563,-0.032764,0.151810,0.210346,0.208558,0.178195


In [3]:
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

MSE   : 0.018671
Params: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}
Total run time: 6340.3 s


In [10]:
if __name__ == "__main__":

    FORECAST_HORIZON = 1
    BEST_PARAMS = {
        'hidden_dim': 156,
        'num_layers': 3,
        'dropout': 0.4241144063085703,
        'learning_rate': 0.001732053535845956,
        'batch_size': 32,
        'epochs': 44
    }
    SEQUENCE_LENGTH = 1512
    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_1.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.000339


In [18]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv("final_test_predictions_multioutput.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")


[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'


In [15]:
# ---------------------- Save Results ---------------------- #
import os

Y_test.index[-len(y_true):]

DatetimeIndex(['2022-04-13', '2022-04-14', '2022-04-15', '2022-04-18',
               '2022-04-19', '2022-04-20', '2022-04-21', '2022-04-22',
               '2022-04-25', '2022-04-26',
               ...
               '2025-02-20', '2025-02-21', '2025-02-24', '2025-02-25',
               '2025-02-26', '2025-02-27', '2025-02-28', '2025-03-03',
               '2025-03-04', '2025-03-05'],
              dtype='datetime64[ns]', length=756, freq=None)

In [17]:
len(y_true)

756