In [1]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
  Downloading jobl

In [2]:
# ==============================================================
#  LSTM Regression on Yield‑Curve Δ  |  Optuna (50 trials, h=1)
#  • Original loop‑based sequence logic
#  • Duplicate‑step warning fixed (unique global_step)
#  • Clean output: only final fold MSE shown
# ==============================================================

# ---------------------- Imports ---------------------- #
import os, sys, gc, time, random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import amp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# ---------------------- Reproducibility ---------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

# ---------------------- Device & CuDNN ---------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    cudnn.benchmark = True

# ---------------------- Config ---------------------- #
FORECAST_HORIZON = 5
TRIALS           = 30
EARLY_STOP       = 20
val_window_num_sequences = 504
holdout_base            = 756
seq_len_map             = {5: 1197}

HSPACE = {
    "hidden_dim"   : (32, 192),
    "num_layers"   : [1, 2, 3],
    "dropout"      : (0.0, 0.6),
    "learning_rate": (1e-4, 5e-3),
    "batch_size"   : [32, 64, 128],
    "epochs"       : (40, 80),
}

# ---------------------- Model ---------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim, hid, layers, out_dim, drop=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers > 1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(hn[-1])))

# ---------------------- Data Utilities ---------------------- #
def gen_seq(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_fold.reindex(X_df.index).values.astype(np.float32)
    idx   = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq = [], []
    for t in Y_fold.index:
        ti = idx.get(t)
        if ti is None:
            continue
        end = ti - h + 1; start = end - seq_len
        if start < 0 or end > len(X_arr):
            continue
        win = X_arr[start:end]
        if win.shape[0] != seq_len or np.isnan(win).any() or not np.isfinite(Y_arr[ti]).all():
            continue
        X_seq.append(win); Y_seq.append(Y_arr[ti])
    return np.asarray(X_seq, np.float32), np.asarray(Y_seq, np.float32)


def std_fold(Xtr, Xva):
    sc = StandardScaler()
    return (
        pd.DataFrame(sc.fit_transform(Xtr), index=Xtr.index, columns=Xtr.columns),
        pd.DataFrame(sc.transform(Xva),     index=Xva.index, columns=Xva.columns),
    )

# ---------------------- CV ---------------------- #
def expanding_folds(X, Y, h):
    seq_len = seq_len_map[h]; total = len(X); min_train = seq_len + h
    folds, i = [], min_train
    while i + val_window_num_sequences + holdout_base <= total:
        vs, ve = i, i + val_window_num_sequences
        folds.append({
            "X_tr": X.iloc[:i].copy(),
            "Y_tr": Y.iloc[:i].copy(),
            "X_va": X.iloc[vs - seq_len - h + 1: ve - h].copy(),
            "Y_va": Y.iloc[vs:ve].copy(),
            "seq_len": seq_len,
        })
        i += val_window_num_sequences
    return folds

# ---------------------- Optuna Objective ---------------------- #
def objective(trial, folds):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }
    scaler = amp.GradScaler(); mse_fold = []

    for f_idx, f in enumerate(tqdm(folds, desc="Folds", leave=False)):
        Xtr_s, Xva_s = std_fold(f["X_tr"], f["X_va"])
        Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
        Xva, Yva = gen_seq(Xva_s, f["Y_va"], f["seq_len"], FORECAST_HORIZON)
        if len(Xtr)==0 or len(Xva)==0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best, pat = np.inf, 0; report_every = max(1, p["ep"]//3)

        tr_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True, pin_memory=True)
        va_loader = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"], pin_memory=True)

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in tr_loader:
                xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
                opt.zero_grad(set_to_none=True)
                with amp.autocast(device_type='cuda'):
                    loss = nn.functional.mse_loss(model(xb), yb)
                scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

            model.eval(); preds, gts = [], []
            with torch.no_grad(), amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device, non_blocking=True)).cpu()); gts.append(yb)
            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            global_step = f_idx * p["ep"] + epoch
            if epoch % report_every == 0:
                trial.report(mse, global_step)
                if trial.should_prune():
                    raise optuna.TrialPruned()

            if mse + 1e-6 < best:
                best, pat = mse, 0
            else:
                pat += 1
                if pat >= EARLY_STOP:
                    break
        tqdm.write(f"Fold {f_idx+1} best MSE = {best:.4f}")
        mse_fold.append(best)
    return np.mean(mse_fold) if mse_fold else np.inf

# ---------------------- Main ---------------------- #
if __name__ == "__main__":
    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv(f"Y_df_change_{FORECAST_HORIZON}.csv", index_col=0, parse_dates=True)
    folds = expanding_folds(X, Y, FORECAST_HORIZON)
    print(f"Generated {len(folds)} folds\n")    

    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(8, 15)
    )

    t0 = time.time()
    study.optimize(
        lambda tr: objective(tr, folds),
        n_trials=TRIALS,
        n_jobs=1,
        show_progress_bar=True
    )
    dur = time.time() - t0

    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

[I 2025-05-13 12:11:18,180] A new study created in memory with name: no-name-6a246196-0a4f-476b-b798-ea868760bb3a


[INFO] Device: cuda
  • GPU: NVIDIA H100 80GB HBM3
Generated 6 folds



  0%|          | 0/30 [00:00<?, ?it/s]


Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:33<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:33<01:06, 16.58s/it][A

Fold 2 best MSE = 0.2064



[A                                                 
Folds:  33%|███▎      | 2/6 [01:33<01:06, 16.58s/it][A
Folds:  50%|█████     | 3/6 [01:33<01:44, 34.89s/it][A

Fold 3 best MSE = 0.0719



[A                                                 
Folds:  50%|█████     | 3/6 [02:47<01:44, 34.89s/it][A
Folds:  67%|██████▋   | 4/6 [02:47<01:38, 49.41s/it][A

Fold 4 best MSE = 0.1525



[A                                                 
Folds:  67%|██████▋   | 4/6 [04:31<01:38, 49.41s/it][A
Folds:  83%|████████▎ | 5/6 [04:31<01:08, 68.42s/it][A

Fold 5 best MSE = 0.0592



[A                                                 
Folds:  83%|████████▎ | 5/6 [06:11<01:08, 68.42s/it][A
Folds: 100%|██████████| 6/6 [06:11<00:00, 78.74s/it][A
                                                    [A

Fold 6 best MSE = 0.0288
[I 2025-05-13 12:17:29,324] Trial 0 finished with value: 0.10377488769590855 and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.10377488769590855.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:18<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:18<00:36,  9.22s/it][A

Fold 2 best MSE = 0.1621



[A                                                 
Folds:  33%|███▎      | 2/6 [01:00<00:36,  9.22s/it][A
Folds:  50%|█████     | 3/6 [01:00<01:08, 22.84s/it][A

Fold 3 best MSE = 0.0665



[A                                                 
Folds:  50%|█████     | 3/6 [02:04<01:08, 22.84s/it][A
Folds:  67%|██████▋   | 4/6 [02:04<01:16, 38.23s/it][A

Fold 4 best MSE = 0.1991



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:27<01:16, 38.23s/it][A
Folds:  83%|████████▎ | 5/6 [03:27<00:53, 53.83s/it][A

Fold 5 best MSE = 0.0188



[A                                                 
Folds:  83%|████████▎ | 5/6 [04:57<00:53, 53.83s/it][A
Folds: 100%|██████████| 6/6 [04:57<00:00, 65.81s/it][A
                                                    [A

Fold 6 best MSE = 0.0486
[I 2025-05-13 12:22:26,898] Trial 1 finished with value: 0.09900284893810748 and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 1 with value: 0.09900284893810748.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:15<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:15<00:30,  7.59s/it][A

Fold 2 best MSE = 0.0963



[A                                                 
Folds:  33%|███▎      | 2/6 [00:39<00:30,  7.59s/it][A
Folds:  50%|█████     | 3/6 [00:39<00:44, 14.76s/it][A

Fold 3 best MSE = 0.0482



[A                                                 
Folds:  50%|█████     | 3/6 [01:15<00:44, 14.76s/it][A
Folds:  67%|██████▋   | 4/6 [01:15<00:44, 22.35s/it][A

Fold 4 best MSE = 0.1299



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:06<00:44, 22.35s/it][A
Folds:  83%|████████▎ | 5/6 [02:06<00:32, 32.37s/it][A

Fold 5 best MSE = 0.0025



[A                                                 
Folds:  83%|████████▎ | 5/6 [02:58<00:32, 32.37s/it][A
Folds: 100%|██████████| 6/6 [02:58<00:00, 39.08s/it][A
                                                    [A

Fold 6 best MSE = 0.0032
[I 2025-05-13 12:25:25,810] Trial 2 finished with value: 0.056020981958135964 and parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41}. Best is trial 2 with value: 0.056020981958135964.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:23<00:46, 11.59s/it][A

Fold 2 best MSE = 0.0965



[A                                                 
Folds:  33%|███▎      | 2/6 [00:55<00:46, 11.59s/it][A
Folds:  50%|█████     | 3/6 [00:55<01:00, 20.18s/it][A

Fold 3 best MSE = 0.0475



[A                                                 
Folds:  50%|█████     | 3/6 [01:33<01:00, 20.18s/it][A
Folds:  67%|██████▋   | 4/6 [01:33<00:53, 26.79s/it][A

Fold 4 best MSE = 0.1291



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:41<00:53, 26.79s/it][A
Folds:  83%|████████▎ | 5/6 [02:41<00:41, 41.37s/it][A

Fold 5 best MSE = 0.0019



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:25<00:41, 41.37s/it][A
Folds: 100%|██████████| 6/6 [03:25<00:00, 42.06s/it][A
                                                    [A

Fold 6 best MSE = 0.0048
[I 2025-05-13 12:28:51,327] Trial 3 finished with value: 0.05595783882308751 and parameters: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}. Best is trial 3 with value: 0.05595783882308751.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:18<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:18<00:37,  9.46s/it][A

Fold 2 best MSE = 0.0954



[A                                                 
Folds:  33%|███▎      | 2/6 [00:39<00:37,  9.46s/it][A
Folds:  50%|█████     | 3/6 [00:39<00:41, 13.90s/it][A

Fold 3 best MSE = 0.0515



[A                                                 
Folds:  50%|█████     | 3/6 [01:05<00:41, 13.90s/it][A
Folds:  67%|██████▋   | 4/6 [01:05<00:37, 18.56s/it][A

Fold 4 best MSE = 0.1387



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:53<00:37, 18.56s/it][A
Folds:  83%|████████▎ | 5/6 [01:53<00:28, 28.83s/it][A

Fold 5 best MSE = 0.0026



[A                                                 
Folds:  83%|████████▎ | 5/6 [02:35<00:28, 28.83s/it][A
Folds: 100%|██████████| 6/6 [02:35<00:00, 33.30s/it][A
                                                    [A

Fold 6 best MSE = 0.0102
[I 2025-05-13 12:31:27,231] Trial 4 finished with value: 0.05966897546313703 and parameters: {'hidden_dim': 51, 'num_layers': 3, 'dropout': 0.15526798896001015, 'learning_rate': 0.0013353819088790589, 'batch_size': 128, 'epochs': 47}. Best is trial 3 with value: 0.05595783882308751.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:25<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:25<00:50, 12.62s/it][A

Fold 2 best MSE = 0.0974



[A                                                 
Folds:  33%|███▎      | 2/6 [01:29<00:50, 12.62s/it][A
Folds:  50%|█████     | 3/6 [01:29<01:42, 34.03s/it][A

Fold 3 best MSE = 0.0480



[A                                                 
Folds:  50%|█████     | 3/6 [02:12<01:42, 34.03s/it][A
Folds:  67%|██████▋   | 4/6 [02:12<01:14, 37.38s/it][A

Fold 4 best MSE = 0.1321



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:21<01:14, 37.38s/it][A
Folds:  83%|████████▎ | 5/6 [03:21<00:48, 48.61s/it][A

Fold 5 best MSE = 0.0025



[A                                                 
Folds:  83%|████████▎ | 5/6 [04:38<00:48, 48.61s/it][A
Folds: 100%|██████████| 6/6 [04:38<00:00, 57.90s/it][A
                                                    [A

Fold 6 best MSE = 0.0062
[I 2025-05-13 12:36:05,809] Trial 5 finished with value: 0.05725272037088871 and parameters: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}. Best is trial 3 with value: 0.05595783882308751.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:18<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:18<00:36,  9.07s/it][A

Fold 2 best MSE = 0.1005



[A                                                 
Folds:  33%|███▎      | 2/6 [00:40<00:36,  9.07s/it][A
Folds:  50%|█████     | 3/6 [00:40<00:44, 14.80s/it][A

Fold 3 best MSE = 0.0525



[A                                                 
Folds:  50%|█████     | 3/6 [01:12<00:44, 14.80s/it][A
Folds:  67%|██████▋   | 4/6 [01:12<00:42, 21.03s/it][A

Fold 4 best MSE = 0.1318



[A                                                 
Folds:  67%|██████▋   | 4/6 [01:59<00:42, 21.03s/it][A
Folds:  83%|████████▎ | 5/6 [01:59<00:30, 30.09s/it][A

Fold 5 best MSE = 0.0036



[A                                                 
Folds:  83%|████████▎ | 5/6 [02:52<00:30, 30.09s/it][A
Folds: 100%|██████████| 6/6 [02:52<00:00, 37.66s/it][A
                                                    [A

Fold 6 best MSE = 0.0225
[I 2025-05-13 12:38:58,374] Trial 6 finished with value: 0.062168549234047535 and parameters: {'hidden_dim': 94, 'num_layers': 2, 'dropout': 0.16856070581242846, 'learning_rate': 0.0008356499023325525, 'batch_size': 64, 'epochs': 80}. Best is trial 3 with value: 0.05595783882308751.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:46<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:46<01:32, 23.20s/it][A

Fold 2 best MSE = 0.0933



[A                                                 
Folds:  33%|███▎      | 2/6 [01:50<01:32, 23.20s/it][A
Folds:  50%|█████     | 3/6 [01:50<02:00, 40.32s/it][A

Fold 3 best MSE = 0.0478



[A                                                 
Folds:  50%|█████     | 3/6 [04:07<02:00, 40.32s/it][A
Folds:  67%|██████▋   | 4/6 [04:07<02:32, 76.44s/it][A

Fold 4 best MSE = 0.1318



[A                                                 
Folds:  67%|██████▋   | 4/6 [06:08<02:32, 76.44s/it][A
Folds:  83%|████████▎ | 5/6 [06:08<01:31, 91.76s/it][A

Fold 5 best MSE = 0.0020



[A                                                 
Folds:  83%|████████▎ | 5/6 [08:24<01:31, 91.76s/it][A
Folds: 100%|██████████| 6/6 [08:24<00:00, 106.37s/it][A
                                                     [A

Fold 6 best MSE = 0.0042
[I 2025-05-13 12:47:22,523] Trial 7 finished with value: 0.05582235571928322 and parameters: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}. Best is trial 7 with value: 0.05582235571928322.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:20<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:20<00:40, 10.18s/it][A

Fold 2 best MSE = 0.1501



[A                                                 
Folds:  33%|███▎      | 2/6 [01:05<00:40, 10.18s/it][A
Folds:  50%|█████     | 3/6 [01:05<01:14, 24.94s/it][A

Fold 3 best MSE = 0.0563



                                                    [A

[I 2025-05-13 12:48:29,785] Trial 8 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:48:30,462] Trial 9 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [01:10<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [01:10<02:21, 35.28s/it][A

Fold 2 best MSE = 0.0962



[A                                                 
Folds:  33%|███▎      | 2/6 [02:52<02:21, 35.28s/it][A
Folds:  50%|█████     | 3/6 [02:52<03:09, 63.13s/it][A

Fold 3 best MSE = 0.0497



                                                    [A

[I 2025-05-13 12:55:24,151] Trial 10 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [01:00<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [01:00<02:00, 30.20s/it][A

Fold 2 best MSE = 0.0922



[A                                                 
Folds:  33%|███▎      | 2/6 [02:06<02:00, 30.20s/it][A
Folds:  50%|█████     | 3/6 [02:06<02:15, 45.25s/it][A

Fold 3 best MSE = 0.0482



                                                    [A

[I 2025-05-13 12:57:34,594] Trial 11 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:57:43,453] Trial 12 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:40<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:40<01:21, 20.30s/it][A

Fold 2 best MSE = 0.0944



                                                    [A

[I 2025-05-13 12:59:17,653] Trial 13 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 12:59:18,216] Trial 14 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:52<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:52<01:45, 26.38s/it][A

Fold 2 best MSE = 0.0920



[A                                                 
Folds:  33%|███▎      | 2/6 [02:21<01:45, 26.38s/it][A
Folds:  50%|█████     | 3/6 [02:21<02:37, 52.34s/it][A

Fold 3 best MSE = 0.0482



[A                                                 
Folds:  50%|█████     | 3/6 [04:04<02:37, 52.34s/it][A
Folds:  67%|██████▋   | 4/6 [04:04<02:22, 71.12s/it][A

Fold 4 best MSE = 0.1354



[A                                                 
Folds:  67%|██████▋   | 4/6 [06:08<02:22, 71.12s/it][A
Folds:  83%|████████▎ | 5/6 [06:08<01:29, 89.55s/it][A

Fold 5 best MSE = 0.0025



[A                                                 
Folds:  83%|████████▎ | 5/6 [08:32<01:29, 89.55s/it][A
Folds: 100%|██████████| 6/6 [08:32<00:00, 107.86s/it][A
                                                     [A

Fold 6 best MSE = 0.0032
[I 2025-05-13 13:07:51,253] Trial 15 finished with value: 0.05626687780022621 and parameters: {'hidden_dim': 192, 'num_layers': 3, 'dropout': 0.31531717940643383, 'learning_rate': 0.0015684800651357047, 'batch_size': 32, 'epochs': 49}. Best is trial 7 with value: 0.05582235571928322.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:07:51,666] Trial 16 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [01:00<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [01:00<02:00, 30.16s/it][A

Fold 2 best MSE = 0.0936



[A                                                 
Folds:  33%|███▎      | 2/6 [02:03<02:00, 30.16s/it][A
Folds:  50%|█████     | 3/6 [02:03<02:12, 44.04s/it][A

Fold 3 best MSE = 0.0461



[A                                                 
Folds:  50%|█████     | 3/6 [03:38<02:12, 44.04s/it][A
Folds:  67%|██████▋   | 4/6 [03:38<02:05, 62.88s/it][A

Fold 4 best MSE = 0.1299



                                                    [A

[I 2025-05-13 13:13:07,692] Trial 17 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:37<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:37<01:15, 18.99s/it][A

Fold 2 best MSE = 0.0935



[A                                                 
Folds:  33%|███▎      | 2/6 [02:08<01:15, 18.99s/it][A
Folds:  50%|█████     | 3/6 [02:08<02:26, 48.70s/it][A

Fold 3 best MSE = 0.0478



[A                                                 
Folds:  50%|█████     | 3/6 [03:56<02:26, 48.70s/it][A
Folds:  67%|██████▋   | 4/6 [03:56<02:21, 70.75s/it][A

Fold 4 best MSE = 0.1326



[A                                                 
Folds:  67%|██████▋   | 4/6 [05:45<02:21, 70.75s/it][A
Folds:  83%|████████▎ | 5/6 [05:45<01:24, 84.19s/it][A

Fold 5 best MSE = 0.0024



[A                                                 
Folds:  83%|████████▎ | 5/6 [08:23<01:24, 84.19s/it][A
Folds: 100%|██████████| 6/6 [08:23<00:00, 108.78s/it][A
                                                     [A

Fold 6 best MSE = 0.0045
[I 2025-05-13 13:21:31,521] Trial 18 finished with value: 0.05616435413248837 and parameters: {'hidden_dim': 173, 'num_layers': 2, 'dropout': 0.4420620035148045, 'learning_rate': 0.0013851174672028994, 'batch_size': 32, 'epochs': 64}. Best is trial 7 with value: 0.05582235571928322.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:29<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:29<00:59, 14.96s/it][A

Fold 2 best MSE = 0.0973



[A                                                 
Folds:  33%|███▎      | 2/6 [01:01<00:59, 14.96s/it][A
Folds:  50%|█████     | 3/6 [01:01<01:06, 22.06s/it][A

Fold 3 best MSE = 0.0695



                                                    [A

[I 2025-05-13 13:23:04,659] Trial 19 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:23:26,658] Trial 20 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:23:27,064] Trial 21 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:18<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:18<00:36,  9.21s/it][A

Fold 2 best MSE = 0.0944



[A                                                 
Folds:  33%|███▎      | 2/6 [00:48<00:36,  9.21s/it][A
Folds:  50%|█████     | 3/6 [00:48<00:53, 17.85s/it][A

Fold 3 best MSE = 0.0470



[A                                                 
Folds:  50%|█████     | 3/6 [01:35<00:53, 17.85s/it][A
Folds:  67%|██████▋   | 4/6 [01:35<00:57, 28.92s/it][A

Fold 4 best MSE = 0.1322



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:25<00:57, 28.92s/it][A
Folds:  83%|████████▎ | 5/6 [02:25<00:36, 36.22s/it][A

Fold 5 best MSE = 0.0020



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:14<00:36, 36.22s/it][A
Folds: 100%|██████████| 6/6 [03:14<00:00, 40.26s/it][A
                                                    [A

Fold 6 best MSE = 0.0072
[I 2025-05-13 13:26:41,319] Trial 22 finished with value: 0.0565667022485286 and parameters: {'hidden_dim': 149, 'num_layers': 3, 'dropout': 0.2611886493901656, 'learning_rate': 0.0025734875131168955, 'batch_size': 128, 'epochs': 46}. Best is trial 7 with value: 0.05582235571928322.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:13<00:27,  6.97s/it][A

Fold 2 best MSE = 0.0977



[A                                                 
Folds:  33%|███▎      | 2/6 [00:34<00:27,  6.97s/it][A
Folds:  50%|█████     | 3/6 [00:34<00:37, 12.65s/it][A

Fold 3 best MSE = 0.0481



[A                                                 
Folds:  50%|█████     | 3/6 [01:14<00:37, 12.65s/it][A
Folds:  67%|██████▋   | 4/6 [01:14<00:45, 22.86s/it][A

Fold 4 best MSE = 0.1298



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:03<00:45, 22.86s/it][A
Folds:  83%|████████▎ | 5/6 [02:03<00:31, 31.76s/it][A

Fold 5 best MSE = 0.0024



[A                                                 
Folds:  83%|████████▎ | 5/6 [02:53<00:31, 31.76s/it][A
Folds: 100%|██████████| 6/6 [02:53<00:00, 37.89s/it][A
                                                    [A

Fold 6 best MSE = 0.0036
[I 2025-05-13 13:29:34,639] Trial 23 finished with value: 0.05631810231134295 and parameters: {'hidden_dim': 127, 'num_layers': 3, 'dropout': 0.3865159551546139, 'learning_rate': 0.004878693194817678, 'batch_size': 128, 'epochs': 40}. Best is trial 7 with value: 0.05582235571928322.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:29:35,053] Trial 24 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:29:35,974] Trial 25 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:29:36,360] Trial 26 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:29:55,019] Trial 27 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:30:03,859] Trial 28 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:46<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:46<01:33, 23.34s/it][A

Fold 2 best MSE = 0.0890



[A                                                 
Folds:  33%|███▎      | 2/6 [01:44<01:33, 23.34s/it][A
Folds:  50%|█████     | 3/6 [01:44<01:52, 37.54s/it][A

Fold 3 best MSE = 0.0488



[A                                                 
Folds:  50%|█████     | 3/6 [02:53<01:52, 37.54s/it][A
Folds:  67%|██████▋   | 4/6 [02:53<01:38, 49.32s/it][A

Fold 4 best MSE = 0.1298



[A                                                 
Folds:  67%|██████▋   | 4/6 [04:42<01:38, 49.32s/it][A
Folds:  83%|████████▎ | 5/6 [04:42<01:10, 70.14s/it][A

Fold 5 best MSE = 0.0039



[A                                                 
Folds:  83%|████████▎ | 5/6 [06:29<01:10, 70.14s/it][A
Folds: 100%|██████████| 6/6 [06:29<00:00, 82.44s/it][A
                                                    [A

Fold 6 best MSE = 0.0042
[I 2025-05-13 13:36:33,556] Trial 29 finished with value: 0.055132863111793995 and parameters: {'hidden_dim': 130, 'num_layers': 2, 'dropout': 0.21638671447680213, 'learning_rate': 0.0025299948542400023, 'batch_size': 32, 'epochs': 63}. Best is trial 29 with value: 0.055132863111793995.
=== Best Trial ===
MSE   : 0.055133
Params: {'hidden_dim': 130, 'num_layers': 2, 'dropout': 0.21638671447680213, 'learning_rate': 0.0025299948542400023, 'batch_size': 32, 'epochs': 63}
Total run time: 5115.4 s


In [6]:

if __name__ == "__main__":
    BEST_PARAMS = {
        'hidden_dim': 130,
        'num_layers': 2,
        'dropout': 0.21638671447680213,
        'learning_rate': 0.0025299948542400023,
        'batch_size': 32,
        'epochs': 63
    }
    
    FORECAST_HORIZON = 5
    SEQUENCE_LENGTH = 1197

    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_5.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.038973


In [7]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv(f"final_test_predictions_multioutput_H{FORECAST_HORIZON}.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")

[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'
