In [1]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 KB[0m [31m145.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytz>=2020.1
  Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.2/509.2 KB[0m [31m160.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.

In [2]:
# ==============================================================
#  LSTM Regression on Yield‑Curve Δ  |  Optuna (50 trials, h=1)
#  • Original loop‑based sequence logic
#  • Duplicate‑step warning fixed (unique global_step)
#  • Clean output: only final fold MSE shown
# ==============================================================

# ---------------------- Imports ---------------------- #
import os, sys, gc, time, random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import amp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# ---------------------- Reproducibility ---------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

# ---------------------- Device & CuDNN ---------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    cudnn.benchmark = True

# ---------------------- Config ---------------------- #
FORECAST_HORIZON = 21
TRIALS           = 30
EARLY_STOP       = 20
val_window_num_sequences = 504
holdout_base            = 756
seq_len_map             = {21: 1323}

HSPACE = {
    "hidden_dim"   : (32, 192),
    "num_layers"   : [1, 2, 3],
    "dropout"      : (0.0, 0.6),
    "learning_rate": (1e-4, 5e-3),
    "batch_size"   : [32, 64, 128],
    "epochs"       : (40, 80),
}

# ---------------------- Model ---------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim, hid, layers, out_dim, drop=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers > 1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(hn[-1])))

# ---------------------- Data Utilities ---------------------- #
def gen_seq(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_fold.reindex(X_df.index).values.astype(np.float32)
    idx   = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq = [], []
    for t in Y_fold.index:
        ti = idx.get(t)
        if ti is None:
            continue
        end = ti - h + 1; start = end - seq_len
        if start < 0 or end > len(X_arr):
            continue
        win = X_arr[start:end]
        if win.shape[0] != seq_len or np.isnan(win).any() or not np.isfinite(Y_arr[ti]).all():
            continue
        X_seq.append(win); Y_seq.append(Y_arr[ti])
    return np.asarray(X_seq, np.float32), np.asarray(Y_seq, np.float32)


def std_fold(Xtr, Xva):
    sc = StandardScaler()
    return (
        pd.DataFrame(sc.fit_transform(Xtr), index=Xtr.index, columns=Xtr.columns),
        pd.DataFrame(sc.transform(Xva),     index=Xva.index, columns=Xva.columns),
    )

# ---------------------- CV ---------------------- #
def expanding_folds(X, Y, h):
    seq_len = seq_len_map[h]; total = len(X); min_train = seq_len + h
    folds, i = [], min_train
    while i + val_window_num_sequences + holdout_base <= total:
        vs, ve = i, i + val_window_num_sequences
        folds.append({
            "X_tr": X.iloc[:i].copy(),
            "Y_tr": Y.iloc[:i].copy(),
            "X_va": X.iloc[vs - seq_len - h + 1: ve - h].copy(),
            "Y_va": Y.iloc[vs:ve].copy(),
            "seq_len": seq_len,
        })
        i += val_window_num_sequences
    return folds

# ---------------------- Optuna Objective ---------------------- #
def objective(trial, folds):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }
    scaler = amp.GradScaler(); mse_fold = []

    for f_idx, f in enumerate(tqdm(folds, desc="Folds", leave=False)):
        Xtr_s, Xva_s = std_fold(f["X_tr"], f["X_va"])
        Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
        Xva, Yva = gen_seq(Xva_s, f["Y_va"], f["seq_len"], FORECAST_HORIZON)
        if len(Xtr)==0 or len(Xva)==0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best, pat = np.inf, 0; report_every = max(1, p["ep"]//3)

        tr_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True, pin_memory=True)
        va_loader = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"], pin_memory=True)

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in tr_loader:
                xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
                opt.zero_grad(set_to_none=True)
                with amp.autocast(device_type='cuda'):
                    loss = nn.functional.mse_loss(model(xb), yb)
                scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

            model.eval(); preds, gts = [], []
            with torch.no_grad(), amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device, non_blocking=True)).cpu()); gts.append(yb)
            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            global_step = f_idx * p["ep"] + epoch
            if epoch % report_every == 0:
                trial.report(mse, global_step)
                if trial.should_prune():
                    raise optuna.TrialPruned()

            if mse + 1e-6 < best:
                best, pat = mse, 0
            else:
                pat += 1
                if pat >= EARLY_STOP:
                    break
        tqdm.write(f"Fold {f_idx+1} best MSE = {best:.4f}")
        mse_fold.append(best)
    return np.mean(mse_fold) if mse_fold else np.inf

# ---------------------- Main ---------------------- #
if __name__ == "__main__":
    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv(f"Y_df_change_{FORECAST_HORIZON}.csv", index_col=0, parse_dates=True)
    folds = expanding_folds(X, Y, FORECAST_HORIZON)
    print(f"Generated {len(folds)} folds\n")    

    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(8, 15)
    )

    t0 = time.time()
    study.optimize(
        lambda tr: objective(tr, folds),
        n_trials=TRIALS,
        n_jobs=1,
        show_progress_bar=True
    )
    dur = time.time() - t0

    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

[I 2025-05-13 12:13:06,942] A new study created in memory with name: no-name-ab0467ba-3cf3-461e-b23c-219f5b47bc1f


[INFO] Device: cuda
  • GPU: NVIDIA H100 80GB HBM3
Generated 6 folds



  0%|          | 0/30 [00:00<?, ?it/s]


Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:08<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:08<00:16,  4.09s/it][A

Fold 2 best MSE = 0.3747



[A                                                 
Folds:  33%|███▎      | 2/6 [00:30<00:16,  4.09s/it][A
Folds:  50%|█████     | 3/6 [00:30<00:35, 11.73s/it][A

Fold 3 best MSE = 0.4509



[A                                                 
Folds:  50%|█████     | 3/6 [01:03<00:35, 11.73s/it][A
Folds:  67%|██████▋   | 4/6 [01:03<00:38, 19.48s/it][A

Fold 4 best MSE = 1.0902



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:23<00:38, 19.48s/it][A
Folds:  83%|████████▎ | 5/6 [01:23<00:19, 19.69s/it][A

Fold 5 best MSE = 0.1413



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:40<00:19, 19.69s/it][A
Folds: 100%|██████████| 6/6 [01:40<00:00, 18.99s/it][A
                                                    [A

Fold 6 best MSE = 0.3880
[I 2025-05-13 12:14:47,788] Trial 0 finished with value: 0.489018115401268 and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.489018115401268.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:08<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:08<00:16,  4.23s/it][A

Fold 2 best MSE = 0.3277



[A                                                 
Folds:  33%|███▎      | 2/6 [00:24<00:16,  4.23s/it][A
Folds:  50%|█████     | 3/6 [00:24<00:27,  9.06s/it][A

Fold 3 best MSE = 0.3301



[A                                                 
Folds:  50%|█████     | 3/6 [00:45<00:27,  9.06s/it][A
Folds:  67%|██████▋   | 4/6 [00:45<00:27, 13.58s/it][A

Fold 4 best MSE = 0.9847



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:14<00:27, 13.58s/it][A
Folds:  83%|████████▎ | 5/6 [01:14<00:19, 19.08s/it][A

Fold 5 best MSE = 0.0328



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:32<00:19, 19.08s/it][A
Folds: 100%|██████████| 6/6 [01:32<00:00, 18.70s/it][A
                                                    [A

Fold 6 best MSE = 0.3847
[I 2025-05-13 12:16:20,620] Trial 1 finished with value: 0.4120042182505131 and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 1 with value: 0.4120042182505131.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:13<00:26,  6.62s/it][A

Fold 2 best MSE = 0.2212



[A                                                 
Folds:  33%|███▎      | 2/6 [00:30<00:26,  6.62s/it][A
Folds:  50%|█████     | 3/6 [00:30<00:32, 10.97s/it][A

Fold 3 best MSE = 0.2340



[A                                                 
Folds:  50%|█████     | 3/6 [00:49<00:32, 10.97s/it][A
Folds:  67%|██████▋   | 4/6 [00:49<00:28, 14.22s/it][A

Fold 4 best MSE = 0.8946



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:14<00:28, 14.22s/it][A
Folds:  83%|████████▎ | 5/6 [01:14<00:17, 17.74s/it][A

Fold 5 best MSE = 0.0144



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:45<00:17, 17.74s/it][A
Folds: 100%|██████████| 6/6 [01:45<00:00, 22.17s/it][A
                                                    [A

Fold 6 best MSE = 0.3178
[I 2025-05-13 12:18:06,067] Trial 2 finished with value: 0.33640558626502753 and parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41}. Best is trial 2 with value: 0.33640558626502753.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:10<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:10<00:20,  5.19s/it][A

Fold 2 best MSE = 0.1839



[A                                                 
Folds:  33%|███▎      | 2/6 [00:47<00:20,  5.19s/it][A
Folds:  50%|█████     | 3/6 [00:47<00:54, 18.30s/it][A

Fold 3 best MSE = 0.2097



[A                                                 
Folds:  50%|█████     | 3/6 [01:06<00:54, 18.30s/it][A
Folds:  67%|██████▋   | 4/6 [01:06<00:37, 18.79s/it][A

Fold 4 best MSE = 0.8958



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:35<00:37, 18.79s/it][A
Folds:  83%|████████▎ | 5/6 [01:35<00:22, 22.42s/it][A

Fold 5 best MSE = 0.0050



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:06<00:22, 22.42s/it][A
Folds: 100%|██████████| 6/6 [03:06<00:00, 45.04s/it][A
                                                    [A

Fold 6 best MSE = 0.2813
[I 2025-05-13 12:21:12,573] Trial 3 finished with value: 0.31512034544721246 and parameters: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:03<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:03<00:07,  1.78s/it][A

Fold 2 best MSE = 0.2409



[A                                                 
Folds:  33%|███▎      | 2/6 [00:19<00:07,  1.78s/it][A
Folds:  50%|█████     | 3/6 [00:19<00:22,  7.55s/it][A

Fold 3 best MSE = 0.2437



[A                                                 
Folds:  50%|█████     | 3/6 [00:32<00:22,  7.55s/it][A
Folds:  67%|██████▋   | 4/6 [00:32<00:19,  9.55s/it][A

Fold 4 best MSE = 0.9847



[A                                                 
Folds:  67%|██████▋   | 4/6 [00:47<00:19,  9.55s/it][A
Folds:  83%|████████▎ | 5/6 [00:47<00:11, 11.71s/it][A

Fold 5 best MSE = 0.0119



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:04<00:11, 11.71s/it][A
Folds: 100%|██████████| 6/6 [01:04<00:00, 13.43s/it][A
                                                    [A

Fold 6 best MSE = 0.2887
[I 2025-05-13 12:22:17,373] Trial 4 finished with value: 0.3539853794500232 and parameters: {'hidden_dim': 51, 'num_layers': 3, 'dropout': 0.15526798896001015, 'learning_rate': 0.0013353819088790589, 'batch_size': 128, 'epochs': 47}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:15<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:15<00:31,  7.88s/it][A

Fold 2 best MSE = 0.2185



[A                                                 
Folds:  33%|███▎      | 2/6 [00:37<00:31,  7.88s/it][A
Folds:  50%|█████     | 3/6 [00:37<00:41, 13.83s/it][A

Fold 3 best MSE = 0.2375



[A                                                 
Folds:  50%|█████     | 3/6 [01:10<00:41, 13.83s/it][A
Folds:  67%|██████▋   | 4/6 [01:10<00:41, 20.85s/it][A

Fold 4 best MSE = 0.9470



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:52<00:41, 20.85s/it][A
Folds:  83%|████████▎ | 5/6 [01:52<00:28, 28.27s/it][A

Fold 5 best MSE = 0.0088



[A                                                 
Folds:  83%|████████▎ | 5/6 [02:47<00:28, 28.27s/it][A
Folds: 100%|██████████| 6/6 [02:47<00:00, 37.03s/it][A
                                                    [A

Fold 6 best MSE = 0.2514
[I 2025-05-13 12:25:04,827] Trial 5 finished with value: 0.33263284917920827 and parameters: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:04<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:04<00:08,  2.23s/it][A

Fold 2 best MSE = 0.2325



[A                                                 
Folds:  33%|███▎      | 2/6 [00:13<00:08,  2.23s/it][A
Folds:  50%|█████     | 3/6 [00:13<00:14,  4.92s/it][A

Fold 3 best MSE = 0.2381



[A                                                 
Folds:  50%|█████     | 3/6 [00:34<00:14,  4.92s/it][A
Folds:  67%|██████▋   | 4/6 [00:34<00:21, 10.92s/it][A

Fold 4 best MSE = 0.8484



[A                                                 
Folds:  67%|██████▋   | 4/6 [00:50<00:21, 10.92s/it][A
Folds:  83%|████████▎ | 5/6 [00:50<00:12, 12.71s/it][A

Fold 5 best MSE = 0.0159



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:06<00:12, 12.71s/it][A
Folds: 100%|██████████| 6/6 [01:06<00:00, 13.80s/it][A
                                                    [A

Fold 6 best MSE = 0.4202
[I 2025-05-13 12:26:11,107] Trial 6 finished with value: 0.35103695653378963 and parameters: {'hidden_dim': 94, 'num_layers': 2, 'dropout': 0.16856070581242846, 'learning_rate': 0.0008356499023325525, 'batch_size': 64, 'epochs': 80}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:21<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:21<00:42, 10.57s/it][A

Fold 2 best MSE = 0.2050



[A                                                 
Folds:  33%|███▎      | 2/6 [01:08<00:42, 10.57s/it][A
Folds:  50%|█████     | 3/6 [01:08<01:17, 25.98s/it][A

Fold 3 best MSE = 0.2412



[A                                                 
Folds:  50%|█████     | 3/6 [02:10<01:17, 25.98s/it][A
Folds:  67%|██████▋   | 4/6 [02:10<01:18, 39.24s/it][A

Fold 4 best MSE = 0.9412



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:58<01:18, 39.24s/it][A
Folds:  83%|████████▎ | 5/6 [03:58<01:03, 63.19s/it][A

Fold 5 best MSE = 0.0100



[A                                                 
Folds:  83%|████████▎ | 5/6 [06:05<01:03, 63.19s/it][A
Folds: 100%|██████████| 6/6 [06:05<00:00, 84.36s/it][A
                                                    [A

Fold 6 best MSE = 0.3892
[I 2025-05-13 12:32:16,485] Trial 7 finished with value: 0.35732873249799013 and parameters: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:15<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:15<00:30,  7.64s/it][A

Fold 2 best MSE = 0.2295



[A                                                 
Folds:  33%|███▎      | 2/6 [00:42<00:30,  7.64s/it][A
Folds:  50%|█████     | 3/6 [00:42<00:47, 15.77s/it][A

Fold 3 best MSE = 0.2591



                                                    [A

[I 2025-05-13 12:32:59,818] Trial 8 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:33:00,161] Trial 9 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:09<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:09<00:19,  4.85s/it][A

Fold 2 best MSE = 0.2009



[A                                                 
Folds:  33%|███▎      | 2/6 [00:20<00:19,  4.85s/it][A
Folds:  50%|█████     | 3/6 [00:20<00:22,  7.51s/it][A

Fold 3 best MSE = 0.2349



                                                    [A

[I 2025-05-13 12:33:21,733] Trial 10 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:14<00:28,  7.15s/it][A

Fold 2 best MSE = 0.2182



[A                                                 
Folds:  33%|███▎      | 2/6 [00:42<00:28,  7.15s/it][A
Folds:  50%|█████     | 3/6 [00:42<00:47, 15.93s/it][A

Fold 3 best MSE = 0.2413



[A                                                 
Folds:  50%|█████     | 3/6 [01:20<00:47, 15.93s/it][A
Folds:  67%|██████▋   | 4/6 [01:20<00:48, 24.29s/it][A

Fold 4 best MSE = 0.9330



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:13<00:48, 24.29s/it][A
Folds:  83%|████████▎ | 5/6 [02:13<00:34, 34.04s/it][A

Fold 5 best MSE = 0.0053



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:20<00:34, 34.04s/it][A
Folds: 100%|██████████| 6/6 [03:20<00:00, 45.00s/it][A
                                                    [A

Fold 6 best MSE = 0.2148
[I 2025-05-13 12:36:42,020] Trial 11 finished with value: 0.322499908041209 and parameters: {'hidden_dim': 190, 'num_layers': 2, 'dropout': 0.5852524085771199, 'learning_rate': 0.0049440434786950415, 'batch_size': 64, 'epochs': 56}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:14<00:28,  7.15s/it][A

Fold 2 best MSE = 0.2094



[A                                                 
Folds:  33%|███▎      | 2/6 [00:31<00:28,  7.15s/it][A
Folds:  50%|█████     | 3/6 [00:31<00:34, 11.45s/it][A

Fold 3 best MSE = 0.2539



                                                    [A

[I 2025-05-13 12:37:34,080] Trial 12 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:13<00:26,  6.54s/it][A

Fold 2 best MSE = 0.1947



[A                                                 
Folds:  33%|███▎      | 2/6 [00:29<00:26,  6.54s/it][A
Folds:  50%|█████     | 3/6 [00:29<00:31, 10.56s/it][A

Fold 3 best MSE = 0.2453



                                                    [A

[I 2025-05-13 12:38:04,442] Trial 13 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:38:04,933] Trial 14 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:15<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:15<00:31,  7.88s/it][A

Fold 2 best MSE = 0.2086



[A                                                 
Folds:  33%|███▎      | 2/6 [00:38<00:31,  7.88s/it][A
Folds:  50%|█████     | 3/6 [00:38<00:42, 14.25s/it][A

Fold 3 best MSE = 0.2508



[A                                                 
Folds:  50%|█████     | 3/6 [01:26<00:42, 14.25s/it][A
Folds:  67%|██████▋   | 4/6 [01:26<00:53, 26.71s/it][A

Fold 4 best MSE = 0.8890



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:24<00:53, 26.71s/it][A
Folds:  83%|████████▎ | 5/6 [02:24<00:37, 37.69s/it][A

Fold 5 best MSE = 0.0063



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:18<00:37, 37.69s/it][A
Folds: 100%|██████████| 6/6 [03:18<00:00, 42.90s/it][A
                                                    [A

Fold 6 best MSE = 0.3597
[I 2025-05-13 12:41:23,238] Trial 15 finished with value: 0.34289634646847844 and parameters: {'hidden_dim': 141, 'num_layers': 3, 'dropout': 0.015329696886325794, 'learning_rate': 0.004922625520664036, 'batch_size': 64, 'epochs': 55}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:04<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:04<00:09,  2.41s/it][A

Fold 2 best MSE = 0.2110



[A                                                 
Folds:  33%|███▎      | 2/6 [00:12<00:09,  2.41s/it][A
Folds:  50%|█████     | 3/6 [00:12<00:13,  4.45s/it][A

Fold 3 best MSE = 0.2280



[A                                                 
Folds:  50%|█████     | 3/6 [00:23<00:13,  4.45s/it][A
Folds:  67%|██████▋   | 4/6 [00:23<00:13,  6.99s/it][A

Fold 4 best MSE = 1.0274



[A                                                 
Folds:  67%|██████▋   | 4/6 [00:34<00:13,  6.99s/it][A
Folds:  83%|████████▎ | 5/6 [00:34<00:08,  8.39s/it][A

Fold 5 best MSE = 0.0139



[A                                                 
Folds:  83%|████████▎ | 5/6 [00:55<00:08,  8.39s/it][A
Folds: 100%|██████████| 6/6 [00:55<00:00, 12.66s/it][A
                                                    [A

Fold 6 best MSE = 0.2492
[I 2025-05-13 12:42:18,938] Trial 16 finished with value: 0.3459096210077405 and parameters: {'hidden_dim': 117, 'num_layers': 2, 'dropout': 0.38311975635854834, 'learning_rate': 0.002793259096669491, 'batch_size': 128, 'epochs': 64}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:04<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:04<00:09,  2.30s/it][A

Fold 2 best MSE = 0.2100



[A                                                 
Folds:  33%|███▎      | 2/6 [00:16<00:09,  2.30s/it][A
Folds:  50%|█████     | 3/6 [00:16<00:18,  6.31s/it][A

Fold 3 best MSE = 0.2477



[A                                                 
Folds:  50%|█████     | 3/6 [00:26<00:18,  6.31s/it][A
Folds:  67%|██████▋   | 4/6 [00:26<00:15,  7.61s/it][A

Fold 4 best MSE = 1.0108



[A                                                 
Folds:  67%|██████▋   | 4/6 [00:50<00:15,  7.61s/it][A
Folds:  83%|████████▎ | 5/6 [00:50<00:13, 13.31s/it][A

Fold 5 best MSE = 0.0071



[A                                                 
Folds:  83%|████████▎ | 5/6 [01:05<00:13, 13.31s/it][A
Folds: 100%|██████████| 6/6 [01:05<00:00, 13.83s/it][A
                                                    [A

Fold 6 best MSE = 0.2889
[I 2025-05-13 12:43:24,199] Trial 17 finished with value: 0.3529004011303186 and parameters: {'hidden_dim': 75, 'num_layers': 3, 'dropout': 0.5492336280142828, 'learning_rate': 0.0005285228085105334, 'batch_size': 128, 'epochs': 72}. Best is trial 3 with value: 0.31512034544721246.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:08<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:08<00:16,  4.11s/it][A

Fold 2 best MSE = 0.2251



                                                    [A

[I 2025-05-13 12:43:45,674] Trial 18 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:43:49,468] Trial 19 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:21<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:21<00:42, 10.71s/it][A

Fold 2 best MSE = 0.2070



[A                                                 
Folds:  33%|███▎      | 2/6 [01:02<00:42, 10.71s/it][A
Folds:  50%|█████     | 3/6 [01:02<01:10, 23.46s/it][A

Fold 3 best MSE = 0.2334



[A                                                 
Folds:  50%|█████     | 3/6 [01:40<01:10, 23.46s/it][A
Folds:  67%|██████▋   | 4/6 [01:40<00:57, 28.80s/it][A

Fold 4 best MSE = 0.9533



                                                    [A

[I 2025-05-13 12:45:32,578] Trial 20 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:45:33,082] Trial 21 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:45:33,496] Trial 22 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:11<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:11<00:22,  5.54s/it][A

Fold 2 best MSE = 0.2193



[A                                                 
Folds:  33%|███▎      | 2/6 [00:25<00:22,  5.54s/it][A
Folds:  50%|█████     | 3/6 [00:25<00:27,  9.30s/it][A

Fold 3 best MSE = 0.2404



                                                    [A

[I 2025-05-13 12:46:17,643] Trial 23 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:46:18,106] Trial 24 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:46:18,686] Trial 25 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:46:24,955] Trial 26 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:46:32,431] Trial 27 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:46:42,253] Trial 28 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:23<00:46, 11.54s/it][A

Fold 2 best MSE = 0.2197



[A                                                 
Folds:  33%|███▎      | 2/6 [00:55<00:46, 11.54s/it][A
Folds:  50%|█████     | 3/6 [00:55<01:00, 20.18s/it][A

Fold 3 best MSE = 0.2369



[A                                                 
Folds:  50%|█████     | 3/6 [01:51<01:00, 20.18s/it][A
Folds:  67%|██████▋   | 4/6 [01:51<01:06, 33.44s/it][A

Fold 4 best MSE = 0.8828



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:42<01:06, 33.44s/it][A
Folds:  83%|████████▎ | 5/6 [02:42<00:39, 39.65s/it][A

Fold 5 best MSE = 0.0042



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:44<00:39, 39.65s/it][A
Folds: 100%|██████████| 6/6 [03:44<00:00, 47.16s/it][A
                                                    [A

Fold 6 best MSE = 0.3362
[I 2025-05-13 12:50:26,987] Trial 29 finished with value: 0.3359738756902516 and parameters: {'hidden_dim': 163, 'num_layers': 3, 'dropout': 0.4594227864328111, 'learning_rate': 0.0015184426823866438, 'batch_size': 128, 'epochs': 71}. Best is trial 3 with value: 0.31512034544721246.
=== Best Trial ===
MSE   : 0.315120
Params: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}
Total run time: 2240.0 s


In [10]:

if __name__ == "__main__":
    BEST_PARAMS = {
        'hidden_dim': 130,
        'num_layers': 2,
        'dropout': 0.21638671447680213,
        'learning_rate': 0.0025299948542400023,
        'batch_size': 32,
        'epochs': 63
    }

    FORECAST_HORIZON = 21
    SEQUENCE_LENGTH = 1323

    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_21.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    # ---------------------- Data Leakage Check ---------------------- #
    # Recreate index map to find actual target timestamps
    idx_map = {ts: i for i, ts in enumerate(X_train_std.index)}
    train_target_dates = []
    
    for t in Y_train.index:
        target_idx = idx_map.get(t)
        if target_idx is None:
            continue
        x_end = target_idx - FORECAST_HORIZON + 1
        x_start = x_end - SEQUENCE_LENGTH
        if x_start >= 0 and x_end <= len(X_train_std):
            train_target_dates.append(t)

    
    idx_map_test = {ts: i for i, ts in enumerate(X_test_std.index)}
    test_target_dates = []
    
    for t in Y_test.index:
        target_idx = idx_map_test.get(t)
        if target_idx is None:
            continue
        x_end = target_idx - FORECAST_HORIZON + 1
        x_start = x_end - SEQUENCE_LENGTH
        if x_start >= 0 and x_end <= len(X_test_std):
            test_target_dates.append(t)

    
    # Check for overlap
    overlap = set(train_target_dates).intersection(test_target_dates)
    if overlap:
        print(f"[LEAK WARNING] {len(overlap)} overlapping target timestamps between train and test!")
        print(f"[LEAK WARNING] Example overlapping dates: {list(overlap)[:5]}")
    else:
        print("[LEAK CHECK] ✅ No overlap between training and testing targets — safe to proceed.")


    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[LEAK CHECK] ✅ No overlap between training and testing targets — safe to proceed.
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.163979


In [11]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv(f"final_test_predictions_multioutput_H{FORECAST_HORIZON}.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")

[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'
