In [2]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
 

In [2]:
# ==============================================================
#  LSTM Regression on Yield‑Curve Δ  |  Optuna (50 trials, h=1)
#  • Original loop‑based sequence logic
#  • Duplicate‑step warning fixed (unique global_step)
#  • Clean output: only final fold MSE shown
# ==============================================================

# ---------------------- Imports ---------------------- #
import os, sys, gc, time, random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import amp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# ---------------------- Reproducibility ---------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

# ---------------------- Device & CuDNN ---------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    cudnn.benchmark = True

# ---------------------- Config ---------------------- #
FORECAST_HORIZON = 1
TRIALS           = 30
EARLY_STOP       = 20
val_window_num_sequences = 504
holdout_base            = 756
seq_len_map             = {1: 1512}

HSPACE = {
    "hidden_dim"   : (32, 192),
    "num_layers"   : [1, 2, 3],
    "dropout"      : (0.0, 0.6),
    "learning_rate": (1e-4, 5e-3),
    "batch_size"   : [32, 64, 128],
    "epochs"       : (40, 80),
}

# ---------------------- Model ---------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim, hid, layers, out_dim, drop=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers > 1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(hn[-1])))

# ---------------------- Data Utilities ---------------------- #
def gen_seq(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_fold.reindex(X_df.index).values.astype(np.float32)
    idx   = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq = [], []
    for t in Y_fold.index:
        ti = idx.get(t)
        if ti is None:
            continue
        end = ti - h + 1; start = end - seq_len
        if start < 0 or end > len(X_arr):
            continue
        win = X_arr[start:end]
        if win.shape[0] != seq_len or np.isnan(win).any() or not np.isfinite(Y_arr[ti]).all():
            continue
        X_seq.append(win); Y_seq.append(Y_arr[ti])
    return np.asarray(X_seq, np.float32), np.asarray(Y_seq, np.float32)


def std_fold(Xtr, Xva):
    sc = StandardScaler()
    return (
        pd.DataFrame(sc.fit_transform(Xtr), index=Xtr.index, columns=Xtr.columns),
        pd.DataFrame(sc.transform(Xva),     index=Xva.index, columns=Xva.columns),
    )

# ---------------------- CV ---------------------- #
def expanding_folds(X, Y, h):
    seq_len = seq_len_map[h]; total = len(X); min_train = seq_len + h
    folds, i = [], min_train
    while i + val_window_num_sequences + holdout_base <= total:
        vs, ve = i, i + val_window_num_sequences
        folds.append({
            "X_tr": X.iloc[:i].copy(),
            "Y_tr": Y.iloc[:i].copy(),
            "X_va": X.iloc[vs - seq_len - h + 1: ve - h].copy(),
            "Y_va": Y.iloc[vs:ve].copy(),
            "seq_len": seq_len,
        })
        i += val_window_num_sequences
    return folds

# ---------------------- Optuna Objective ---------------------- #
def objective(trial, folds):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }
    scaler = amp.GradScaler(); mse_fold = []

    for f_idx, f in enumerate(tqdm(folds, desc="Folds", leave=False)):
        Xtr_s, Xva_s = std_fold(f["X_tr"], f["X_va"])
        Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
        Xva, Yva = gen_seq(Xva_s, f["Y_va"], f["seq_len"], FORECAST_HORIZON)
        if len(Xtr)==0 or len(Xva)==0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best, pat = np.inf, 0; report_every = max(1, p["ep"]//3)

        tr_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True, pin_memory=True)
        va_loader = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"], pin_memory=True)

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in tr_loader:
                xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
                opt.zero_grad(set_to_none=True)
                with amp.autocast(device_type='cuda'):
                    loss = nn.functional.mse_loss(model(xb), yb)
                scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

            model.eval(); preds, gts = [], []
            with torch.no_grad(), amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device, non_blocking=True)).cpu()); gts.append(yb)
            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            global_step = f_idx * p["ep"] + epoch
            if epoch % report_every == 0:
                trial.report(mse, global_step)
                if trial.should_prune():
                    raise optuna.TrialPruned()

            if mse + 1e-6 < best:
                best, pat = mse, 0
            else:
                pat += 1
                if pat >= EARLY_STOP:
                    break
        tqdm.write(f"Fold {f_idx+1} best MSE = {best:.4f}")
        mse_fold.append(best)
    return np.mean(mse_fold) if mse_fold else np.inf

# ---------------------- Main ---------------------- #
if __name__ == "__main__":
    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_1.csv", index_col=0, parse_dates=True)
    folds = expanding_folds(X, Y, FORECAST_HORIZON)
    print(f"Generated {len(folds)} folds\n")    

    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(8, 15)
    )

    t0 = time.time()
    study.optimize(
        lambda tr: objective(tr, folds),
        n_trials=TRIALS,
        n_jobs=1,
        show_progress_bar=True
    )
    dur = time.time() - t0

    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

[I 2025-05-13 11:56:40,212] A new study created in memory with name: no-name-3927cc6f-bd77-4fd8-932a-3758f895ceb7


[INFO] Device: cuda
  • GPU: NVIDIA H200
Generated 6 folds



  0%|          | 0/30 [00:00<?, ?it/s]


Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:05<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:05<00:28,  5.74s/it][A

Fold 1 best MSE = 0.2068



[A                                                 
Folds:  17%|█▋        | 1/6 [00:34<00:28,  5.74s/it][A
Folds:  33%|███▎      | 2/6 [00:34<01:17, 19.49s/it][A

Fold 2 best MSE = 0.0364



[A                                                 
Folds:  33%|███▎      | 2/6 [01:23<01:17, 19.49s/it][A
Folds:  50%|█████     | 3/6 [01:23<01:38, 32.87s/it][A

Fold 3 best MSE = 0.0493



[A                                                 
Folds:  50%|█████     | 3/6 [02:30<01:38, 32.87s/it][A
Folds:  67%|██████▋   | 4/6 [02:30<01:32, 46.46s/it][A

Fold 4 best MSE = 0.0107



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:56<01:32, 46.46s/it][A
Folds:  83%|████████▎ | 5/6 [03:56<01:00, 60.69s/it][A

Fold 5 best MSE = 0.0027



[A                                                 
Folds:  83%|████████▎ | 5/6 [07:56<01:00, 60.69s/it][A
Folds: 100%|██████████| 6/6 [07:56<00:00, 121.38s/it][A
                                                     [A

Fold 6 best MSE = 0.0321
[I 2025-05-13 12:04:36,315] Trial 0 finished with value: 0.05634353787172586 and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.05634353787172586.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:03<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:03<00:17,  3.56s/it][A

Fold 1 best MSE = 0.1932



[A                                                 
Folds:  17%|█▋        | 1/6 [00:24<00:17,  3.56s/it][A
Folds:  33%|███▎      | 2/6 [00:24<00:55, 13.93s/it][A

Fold 2 best MSE = 0.0399



[A                                                 
Folds:  33%|███▎      | 2/6 [01:00<00:55, 13.93s/it][A
Folds:  50%|█████     | 3/6 [01:00<01:11, 23.88s/it][A

Fold 3 best MSE = 0.1068



[A                                                 
Folds:  50%|█████     | 3/6 [01:48<01:11, 23.88s/it][A
Folds:  67%|██████▋   | 4/6 [01:48<01:06, 33.28s/it][A

Fold 4 best MSE = 0.0256



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:46<01:06, 33.28s/it][A
Folds:  83%|████████▎ | 5/6 [02:46<00:42, 42.48s/it][A

Fold 5 best MSE = 0.0154



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:59<00:42, 42.48s/it][A
Folds: 100%|██████████| 6/6 [03:59<00:00, 52.55s/it][A
                                                    [A

Fold 6 best MSE = 0.0411
[I 2025-05-13 12:08:35,423] Trial 1 finished with value: 0.07033011270686984 and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 0 with value: 0.05634353787172586.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:16<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:16<01:23, 16.79s/it][A

Fold 1 best MSE = 0.0486



[A                                                 
Folds:  17%|█▋        | 1/6 [00:49<01:23, 16.79s/it][A
Folds:  33%|███▎      | 2/6 [00:49<01:45, 26.26s/it][A

Fold 2 best MSE = 0.0180



[A                                                 
Folds:  33%|███▎      | 2/6 [01:38<01:45, 26.26s/it][A
Folds:  50%|█████     | 3/6 [01:38<01:50, 36.67s/it][A

Fold 3 best MSE = 0.0346



[A                                                 
Folds:  50%|█████     | 3/6 [02:26<01:50, 36.67s/it][A
Folds:  67%|██████▋   | 4/6 [02:26<01:22, 41.20s/it][A

Fold 4 best MSE = 0.0009



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:24<01:22, 41.20s/it][A
Folds:  83%|████████▎ | 5/6 [03:24<00:47, 47.06s/it][A

Fold 5 best MSE = 0.0006



[A                                                 
Folds:  83%|████████▎ | 5/6 [05:07<00:47, 47.06s/it][A
Folds: 100%|██████████| 6/6 [05:07<00:00, 66.03s/it][A
                                                    [A

Fold 6 best MSE = 0.0298
[I 2025-05-13 12:13:42,659] Trial 2 finished with value: 0.022090455546276644 and parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41}. Best is trial 2 with value: 0.022090455546276644.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:19<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:19<01:35, 19.15s/it][A

Fold 1 best MSE = 0.0639



[A                                                 
Folds:  17%|█▋        | 1/6 [00:57<01:35, 19.15s/it][A
Folds:  33%|███▎      | 2/6 [00:57<02:02, 30.71s/it][A

Fold 2 best MSE = 0.0180



[A                                                 
Folds:  33%|███▎      | 2/6 [02:05<02:02, 30.71s/it][A
Folds:  50%|█████     | 3/6 [02:05<02:22, 47.46s/it][A

Fold 3 best MSE = 0.0348



[A                                                 
Folds:  50%|█████     | 3/6 [03:03<02:22, 47.46s/it][A
Folds:  67%|██████▋   | 4/6 [03:03<01:43, 51.50s/it][A

Fold 4 best MSE = 0.0009



[A                                                 
Folds:  67%|██████▋   | 4/6 [04:20<01:43, 51.50s/it][A
Folds:  83%|████████▎ | 5/6 [04:20<01:00, 60.76s/it][A

Fold 5 best MSE = 0.0005



[A                                                 
Folds:  83%|████████▎ | 5/6 [06:54<01:00, 60.76s/it][A
Folds: 100%|██████████| 6/6 [06:54<00:00, 92.41s/it][A
                                                    [A

Fold 6 best MSE = 0.0298
[I 2025-05-13 12:20:36,774] Trial 3 finished with value: 0.024672055878909305 and parameters: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}. Best is trial 2 with value: 0.022090455546276644.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:14<01:12, 14.40s/it][A

Fold 1 best MSE = 0.0300



[A                                                 
Folds:  17%|█▋        | 1/6 [00:41<01:12, 14.40s/it][A
Folds:  33%|███▎      | 2/6 [00:41<01:26, 21.69s/it][A

Fold 2 best MSE = 0.0183



[A                                                 
Folds:  33%|███▎      | 2/6 [01:12<01:26, 21.69s/it][A
Folds:  50%|█████     | 3/6 [01:12<01:18, 26.13s/it][A

Fold 3 best MSE = 0.0356



[A                                                 
Folds:  50%|█████     | 3/6 [01:52<01:18, 26.13s/it][A
Folds:  67%|██████▋   | 4/6 [01:52<01:03, 31.57s/it][A

Fold 4 best MSE = 0.0010



[A                                                 
Folds:  67%|██████▋   | 4/6 [02:39<01:03, 31.57s/it][A
Folds:  83%|████████▎ | 5/6 [02:39<00:37, 37.31s/it][A

Fold 5 best MSE = 0.0010



[A                                                 
Folds:  83%|████████▎ | 5/6 [03:53<00:37, 37.31s/it][A
Folds: 100%|██████████| 6/6 [03:53<00:00, 49.45s/it][A
                                                    [A

Fold 6 best MSE = 0.0300
[I 2025-05-13 12:24:29,836] Trial 4 finished with value: 0.019297625452357654 and parameters: {'hidden_dim': 51, 'num_layers': 3, 'dropout': 0.15526798896001015, 'learning_rate': 0.0013353819088790589, 'batch_size': 128, 'epochs': 47}. Best is trial 4 with value: 0.019297625452357654.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:23<01:59, 23.86s/it][A

Fold 1 best MSE = 0.0417



[A                                                 
Folds:  17%|█▋        | 1/6 [00:57<01:59, 23.86s/it][A
Folds:  33%|███▎      | 2/6 [00:57<01:57, 29.34s/it][A

Fold 2 best MSE = 0.0181



[A                                                 
Folds:  33%|███▎      | 2/6 [02:30<01:57, 29.34s/it][A
Folds:  50%|█████     | 3/6 [02:30<02:55, 58.41s/it][A

Fold 3 best MSE = 0.0339



[A                                                 
Folds:  50%|█████     | 3/6 [04:02<02:55, 58.41s/it][A
Folds:  67%|██████▋   | 4/6 [04:02<02:23, 71.72s/it][A

Fold 4 best MSE = 0.0014



[A                                                 
Folds:  67%|██████▋   | 4/6 [05:18<02:23, 71.72s/it][A
Folds:  83%|████████▎ | 5/6 [05:18<01:13, 73.54s/it][A

Fold 5 best MSE = 0.0006



[A                                                 
Folds:  83%|████████▎ | 5/6 [07:38<01:13, 73.54s/it][A
Folds: 100%|██████████| 6/6 [07:38<00:00, 96.01s/it][A
                                                    [A

Fold 6 best MSE = 0.0297
[I 2025-05-13 12:32:08,458] Trial 5 finished with value: 0.020885084978848074 and parameters: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}. Best is trial 4 with value: 0.019297625452357654.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:14<01:11, 14.33s/it][A

Fold 1 best MSE = 0.0484



[A                                                 
Folds:  17%|█▋        | 1/6 [00:38<01:11, 14.33s/it][A
Folds:  33%|███▎      | 2/6 [00:38<01:19, 19.83s/it][A

Fold 2 best MSE = 0.0189



[A                                                 
Folds:  33%|███▎      | 2/6 [01:38<01:19, 19.83s/it][A
Folds:  50%|█████     | 3/6 [01:38<01:55, 38.41s/it][A

Fold 3 best MSE = 0.0365



[A                                                 
Folds:  50%|█████     | 3/6 [02:14<01:55, 38.41s/it][A
Folds:  67%|██████▋   | 4/6 [02:14<01:14, 37.29s/it][A

Fold 4 best MSE = 0.0015



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:02<01:14, 37.29s/it][A
Folds:  83%|████████▎ | 5/6 [03:02<00:41, 41.23s/it][A

Fold 5 best MSE = 0.0010



[A                                                 
Folds:  83%|████████▎ | 5/6 [04:38<00:41, 41.23s/it][A
Folds: 100%|██████████| 6/6 [04:38<00:00, 59.95s/it][A
                                                    [A

Fold 6 best MSE = 0.0297
[I 2025-05-13 12:36:47,144] Trial 6 finished with value: 0.02268317361207058 and parameters: {'hidden_dim': 94, 'num_layers': 2, 'dropout': 0.16856070581242846, 'learning_rate': 0.0008356499023325525, 'batch_size': 64, 'epochs': 80}. Best is trial 4 with value: 0.019297625452357654.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:37<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:37<03:05, 37.12s/it][A

Fold 1 best MSE = 0.0289



[A                                                 
Folds:  17%|█▋        | 1/6 [01:50<03:05, 37.12s/it][A
Folds:  33%|███▎      | 2/6 [01:50<03:53, 58.34s/it][A

Fold 2 best MSE = 0.0181



[A                                                 
Folds:  33%|███▎      | 2/6 [04:27<03:53, 58.34s/it][A
Folds:  50%|█████     | 3/6 [04:27<05:10, 103.62s/it][A

Fold 3 best MSE = 0.0335



[A                                                  
Folds:  50%|█████     | 3/6 [08:15<05:10, 103.62s/it][A
Folds:  67%|██████▋   | 4/6 [08:15<05:05, 152.54s/it][A

Fold 4 best MSE = 0.0010



[A                                                  
Folds:  67%|██████▋   | 4/6 [11:09<05:05, 152.54s/it][A
Folds:  83%|████████▎ | 5/6 [11:09<02:40, 160.30s/it][A

Fold 5 best MSE = 0.0007



[A                                                  
Folds:  83%|████████▎ | 5/6 [16:50<02:40, 160.30s/it][A
Folds: 100%|██████████| 6/6 [16:50<00:00, 221.77s/it][A
                                                     [A

Fold 6 best MSE = 0.0298
[I 2025-05-13 12:53:37,726] Trial 7 finished with value: 0.018670764499499153 and parameters: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:23<01:57, 23.48s/it][A

Fold 1 best MSE = 0.1139



[A                                                 
Folds:  17%|█▋        | 1/6 [01:09<01:57, 23.48s/it][A
Folds:  33%|███▎      | 2/6 [01:09<02:27, 36.96s/it][A

Fold 2 best MSE = 0.0239



[A                                                 
Folds:  50%|█████     | 3/6 [02:39<02:05, 41.84s/it][A
Folds:  67%|██████▋   | 4/6 [02:39<01:30, 45.07s/it][A

Fold 4 best MSE = 0.0014



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:52<01:30, 45.07s/it][A
Folds:  83%|████████▎ | 5/6 [03:52<00:55, 55.28s/it][A

Fold 5 best MSE = 0.0009



[A                                                 
Folds:  83%|████████▎ | 5/6 [05:20<00:55, 55.28s/it][A
Folds: 100%|██████████| 6/6 [05:20<00:00, 66.41s/it][A
                                                    [A

Fold 6 best MSE = 0.0300
[I 2025-05-13 13:01:39,339] Trial 11 finished with value: 0.01897050131810829 and parameters: {'hidden_dim': 73, 'num_layers': 3, 'dropout': 0.02855624015246097, 'learning_rate': 0.0011218824307463397, 'batch_size': 32, 'epochs': 46}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:14<01:11, 14.22s/it][A

Fold 1 best MSE = 0.0295



[A                                                 
Folds:  33%|███▎      | 2/6 [01:44<01:43, 25.98s/it][A
Folds:  50%|█████     | 3/6 [01:44<01:59, 39.69s/it][A

Fold 3 best MSE = 0.0350



[A                                                 
Folds:  50%|█████     | 3/6 [02:59<01:59, 39.69s/it][A
Folds:  67%|██████▋   | 4/6 [02:59<01:47, 53.62s/it][A

Fold 4 best MSE = 0.0015



[A                                                 
Folds:  67%|██████▋   | 4/6 [04:05<01:47, 53.62s/it][A
Folds:  83%|████████▎ | 5/6 [04:05<00:58, 58.02s/it][A

Fold 5 best MSE = 0.0010



[A                                                 
Folds:  83%|████████▎ | 5/6 [05:33<00:58, 58.02s/it][A
Folds: 100%|██████████| 6/6 [05:33<00:00, 68.34s/it][A
                                                    [A

Fold 6 best MSE = 0.0300
[I 2025-05-13 13:07:13,027] Trial 12 finished with value: 0.019209401740226895 and parameters: {'hidden_dim': 81, 'num_layers': 3, 'dropout': 0.018536163615168397, 'learning_rate': 0.0010065994866629551, 'batch_size': 32, 'epochs': 40}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:12<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:12<01:02, 12.54s/it][A

Fold 1 best MSE = 0.0608



                                                    [A

[I 2025-05-13 13:07:48,792] Trial 13 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:08:35,370] Trial 14 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:23<01:57, 23.49s/it][A

Fold 1 best MSE = 0.0252



                                                    [A

[I 2025-05-13 13:08:59,877] Trial 15 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:23<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:23<01:58, 23.80s/it][A

Fold 1 best MSE = 0.0296



                                                    [A

[I 2025-05-13 13:09:45,493] Trial 16 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:09:59,067] Trial 17 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:10:34,389] Trial 18 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:17<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:17<01:28, 17.66s/it][A

Fold 1 best MSE = 0.0991



[A                                                 
Folds:  17%|█▋        | 1/6 [01:12<01:28, 17.66s/it][A
Folds:  33%|███▎      | 2/6 [01:12<02:37, 39.46s/it][A

Fold 2 best MSE = 0.0198



[A                                                 
Folds:  33%|███▎      | 2/6 [02:46<02:37, 39.46s/it][A
Folds:  50%|█████     | 3/6 [02:46<03:13, 64.40s/it][A

Fold 3 best MSE = 0.0375



[A                                                 
Folds:  50%|█████     | 3/6 [03:52<03:13, 64.40s/it][A
Folds:  67%|██████▋   | 4/6 [03:52<02:10, 65.15s/it][A

Fold 4 best MSE = 0.0025



[A                                                 
Folds:  67%|██████▋   | 4/6 [05:53<02:10, 65.15s/it][A
Folds:  83%|████████▎ | 5/6 [05:53<01:25, 85.25s/it][A

Fold 5 best MSE = 0.0027



[A                                                 
Folds:  83%|████████▎ | 5/6 [07:53<01:25, 85.25s/it][A
Folds: 100%|██████████| 6/6 [07:53<00:00, 97.07s/it][A
                                                    [A

Fold 6 best MSE = 0.0310
[I 2025-05-13 13:18:28,135] Trial 19 finished with value: 0.03209943285522362 and parameters: {'hidden_dim': 107, 'num_layers': 1, 'dropout': 0.3339844782242135, 'learning_rate': 0.00037559230620767353, 'batch_size': 32, 'epochs': 75}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:16<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:16<01:22, 16.42s/it][A

Fold 1 best MSE = 0.0365



[A                                                 
Folds:  17%|█▋        | 1/6 [00:50<01:22, 16.42s/it][A
Folds:  33%|███▎      | 2/6 [00:50<01:47, 26.77s/it][A

Fold 2 best MSE = 0.0180



[A                                                 
Folds:  33%|███▎      | 2/6 [01:36<01:47, 26.77s/it][A
Folds:  50%|█████     | 3/6 [01:36<01:46, 35.45s/it][A

Fold 3 best MSE = 0.0350



[A                                                 
Folds:  50%|█████     | 3/6 [02:48<01:46, 35.45s/it][A
Folds:  67%|██████▋   | 4/6 [02:48<01:39, 49.85s/it][A

Fold 4 best MSE = 0.0009



[A                                                 
Folds:  67%|██████▋   | 4/6 [04:06<01:39, 49.85s/it][A
Folds:  83%|████████▎ | 5/6 [04:06<01:00, 60.25s/it][A

Fold 5 best MSE = 0.0005



[A                                                 
Folds:  83%|████████▎ | 5/6 [06:21<01:00, 60.25s/it][A
Folds: 100%|██████████| 6/6 [06:21<00:00, 85.42s/it][A
                                                    [A

Fold 6 best MSE = 0.0297
[I 2025-05-13 13:24:49,346] Trial 20 finished with value: 0.020105815123921882 and parameters: {'hidden_dim': 66, 'num_layers': 3, 'dropout': 0.4180172675910732, 'learning_rate': 0.0010441714346034964, 'batch_size': 32, 'epochs': 56}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:13<01:09, 13.83s/it][A

Fold 1 best MSE = 0.0295



                                                    [A

[I 2025-05-13 13:25:04,173] Trial 21 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:14<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:14<01:10, 14.19s/it][A

Fold 1 best MSE = 0.0323



                                                    [A

[I 2025-05-13 13:25:40,571] Trial 22 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:15<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:15<01:15, 15.19s/it][A

Fold 1 best MSE = 0.0271



[A                                                 
Folds:  17%|█▋        | 1/6 [00:51<01:15, 15.19s/it][A
Folds:  33%|███▎      | 2/6 [00:51<01:51, 27.84s/it][A

Fold 2 best MSE = 0.0179



                                                    [A

[I 2025-05-13 13:27:32,584] Trial 23 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:21<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:21<01:49, 21.88s/it][A

Fold 1 best MSE = 0.0268



[A                                                 
Folds:  17%|█▋        | 1/6 [01:17<01:49, 21.88s/it][A
Folds:  33%|███▎      | 2/6 [01:17<02:47, 41.89s/it][A

Fold 2 best MSE = 0.0179



[A                                                 
Folds:  33%|███▎      | 2/6 [02:40<02:47, 41.89s/it][A
Folds:  50%|█████     | 3/6 [02:40<03:01, 60.39s/it][A

Fold 3 best MSE = 0.0344



[A                                                 
Folds:  50%|█████     | 3/6 [03:51<03:01, 60.39s/it][A
Folds:  67%|██████▋   | 4/6 [03:51<02:09, 64.90s/it][A

Fold 4 best MSE = 0.0011



[A                                                 
Folds:  67%|██████▋   | 4/6 [05:23<02:09, 64.90s/it][A
Folds:  83%|████████▎ | 5/6 [05:23<01:14, 74.60s/it][A

Fold 5 best MSE = 0.0008



                                                    [A

[I 2025-05-13 13:34:03,678] Trial 24 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:13<01:08, 13.67s/it][A

Fold 1 best MSE = 0.0394



[A                                                 
Folds:  17%|█▋        | 1/6 [00:54<01:08, 13.67s/it][A
Folds:  33%|███▎      | 2/6 [00:54<01:57, 29.47s/it][A

Fold 2 best MSE = 0.0184



                                                    [A

[I 2025-05-13 13:34:59,569] Trial 25 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:13<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:13<01:05, 13.09s/it][A

Fold 1 best MSE = 0.0308



[A                                                 
Folds:  17%|█▋        | 1/6 [00:45<01:05, 13.09s/it][A
Folds:  33%|███▎      | 2/6 [00:45<01:36, 24.21s/it][A

Fold 2 best MSE = 0.0184



[A                                                 
Folds:  33%|███▎      | 2/6 [01:38<01:36, 24.21s/it][A
Folds:  50%|█████     | 3/6 [01:38<01:52, 37.36s/it][A

Fold 3 best MSE = 0.0348



[A                                                 
Folds:  50%|█████     | 3/6 [02:43<01:52, 37.36s/it][A
Folds:  67%|██████▋   | 4/6 [02:43<01:37, 48.51s/it][A

Fold 4 best MSE = 0.0010



[A                                                 
Folds:  67%|██████▋   | 4/6 [03:49<01:37, 48.51s/it][A
Folds:  83%|████████▎ | 5/6 [03:49<00:54, 54.89s/it][A

Fold 5 best MSE = 0.0006



[A                                                 
Folds:  83%|████████▎ | 5/6 [05:42<00:54, 54.89s/it][A
Folds: 100%|██████████| 6/6 [05:42<00:00, 74.61s/it][A
                                                    [A

Fold 6 best MSE = 0.0298
[I 2025-05-13 13:40:42,453] Trial 26 finished with value: 0.01922988099977374 and parameters: {'hidden_dim': 39, 'num_layers': 3, 'dropout': 0.2286653698851398, 'learning_rate': 0.0005209628920999434, 'batch_size': 32, 'epochs': 43}. Best is trial 7 with value: 0.018670764499499153.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/6 [00:44<?, ?it/s][A
Folds:  17%|█▋        | 1/6 [00:44<03:43, 44.73s/it][A

Fold 1 best MSE = 0.0263



                                                    [A

[I 2025-05-13 13:42:06,396] Trial 27 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:42:12,906] Trial 28 pruned. 



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 13:42:20,487] Trial 29 pruned. 
=== Best Trial ===
MSE   : 0.018671
Params: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}
Total run time: 6340.3 s


In [3]:
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

MSE   : 0.018671
Params: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}
Total run time: 6340.3 s


In [10]:
if __name__ == "__main__":

    FORECAST_HORIZON = 1
    BEST_PARAMS = {
        'hidden_dim': 156,
        'num_layers': 3,
        'dropout': 0.4241144063085703,
        'learning_rate': 0.001732053535845956,
        'batch_size': 32,
        'epochs': 44
    }
    SEQUENCE_LENGTH = 1512
    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_1.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.000339


In [18]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv("final_test_predictions_multioutput.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")


[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'


In [15]:
# ---------------------- Save Results ---------------------- #
import os

Y_test.index[-len(y_true):]

DatetimeIndex(['2022-04-13', '2022-04-14', '2022-04-15', '2022-04-18',
               '2022-04-19', '2022-04-20', '2022-04-21', '2022-04-22',
               '2022-04-25', '2022-04-26',
               ...
               '2025-02-20', '2025-02-21', '2025-02-24', '2025-02-25',
               '2025-02-26', '2025-02-27', '2025-02-28', '2025-03-03',
               '2025-03-04', '2025-03-05'],
              dtype='datetime64[ns]', length=756, freq=None)

In [17]:
len(y_true)

756

In [1]:
import optuna
from optuna.visualization import (
    plot_parallel_coordinate,
    plot_slice,
    plot_contour,
)

# ----------------------------------------------------------------------
# Assume you have an existing Optuna Study in memory called `study`
# (e.g. from your previous .optimize(...) run)
# ----------------------------------------------------------------------

# 1. Parallel‐Coordinates Plot
#    This shows each trial as a polyline crossing one axis per hyperparameter,
#    colored by the objective value.
fig_pc = plot_parallel_coordinate(study)
fig_pc.show()

# 2. Slice Plot
#    This shows the relationship between each individual hyperparameter and
#    the objective, with one scatter‐and‐density plot per parameter.
fig_slice = plot_slice(study)
fig_slice.show()

# 3. Contour Plot
#    This gives a heat‐map style view of pairwise interactions between two
#    hyperparameters, colored by the objective value.
fig_contour = plot_contour(study)
fig_contour.show()


  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'study' is not defined