In [1]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
  Downloading jobli

In [5]:
# ==============================================================
#  LSTM Regression on Yield‑Curve Δ  |  Optuna (50 trials, h=1)
#  • Original loop‑based sequence logic
#  • Duplicate‑step warning fixed (unique global_step)
#  • Clean output: only final fold MSE shown
# ==============================================================

# ---------------------- Imports ---------------------- #
import os, sys, gc, time, random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import amp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# ---------------------- Reproducibility ---------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

# ---------------------- Device & CuDNN ---------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    cudnn.benchmark = True

# ---------------------- Config ---------------------- #
FORECAST_HORIZON = 252
TRIALS           = 75
EARLY_STOP       = 20
val_window_num_sequences = 504
holdout_base            = 756
seq_len_map             = {252: 1323}

HSPACE = {
    "hidden_dim"   : (32, 192),
    "num_layers"   : [1, 2, 3],
    "dropout"      : (0.0, 0.6),
    "learning_rate": (1e-4, 5e-3),
    "batch_size"   : [32, 64, 128],
    "epochs"       : (40, 80),
}

# ---------------------- Model ---------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim, hid, layers, out_dim, drop=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers > 1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(hn[-1])))

# ---------------------- Data Utilities ---------------------- #
def gen_seq(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_fold.reindex(X_df.index).values.astype(np.float32)
    idx   = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq = [], []
    for t in Y_fold.index:
        ti = idx.get(t)
        if ti is None:
            continue
        end = ti - h + 1; start = end - seq_len
        if start < 0 or end > len(X_arr):
            continue
        win = X_arr[start:end]
        if win.shape[0] != seq_len or np.isnan(win).any() or not np.isfinite(Y_arr[ti]).all():
            continue
        X_seq.append(win); Y_seq.append(Y_arr[ti])
    return np.asarray(X_seq, np.float32), np.asarray(Y_seq, np.float32)


def std_fold(Xtr, Xva):
    sc = StandardScaler()
    return (
        pd.DataFrame(sc.fit_transform(Xtr), index=Xtr.index, columns=Xtr.columns),
        pd.DataFrame(sc.transform(Xva),     index=Xva.index, columns=Xva.columns),
    )

# ---------------------- CV ---------------------- #
def expanding_folds(X, Y, h):
    seq_len = seq_len_map[h]; total = len(X); min_train = seq_len + h
    folds, i = [], min_train
    while i + val_window_num_sequences + holdout_base <= total:
        vs, ve = i, i + val_window_num_sequences
        folds.append({
            "X_tr": X.iloc[:i].copy(),
            "Y_tr": Y.iloc[:i].copy(),
            "X_va": X.iloc[vs - seq_len - h + 1: ve - h].copy(),
            "Y_va": Y.iloc[vs:ve].copy(),
            "seq_len": seq_len,
        })
        i += val_window_num_sequences
    return folds

# ---------------------- Optuna Objective ---------------------- #
def objective(trial, folds):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }
    scaler = amp.GradScaler(); mse_fold = []

    for f_idx, f in enumerate(tqdm(folds, desc="Folds", leave=False)):
        Xtr_s, Xva_s = std_fold(f["X_tr"], f["X_va"])
        Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
        Xva, Yva = gen_seq(Xva_s, f["Y_va"], f["seq_len"], FORECAST_HORIZON)
        if len(Xtr)==0 or len(Xva)==0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best, pat = np.inf, 0; report_every = max(1, p["ep"]//3)

        tr_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True, pin_memory=True)
        va_loader = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"], pin_memory=True)

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in tr_loader:
                xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
                opt.zero_grad(set_to_none=True)
                with amp.autocast(device_type='cuda'):
                    loss = nn.functional.mse_loss(model(xb), yb)
                scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

            model.eval(); preds, gts = [], []
            with torch.no_grad(), amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device, non_blocking=True)).cpu()); gts.append(yb)
            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            global_step = f_idx * p["ep"] + epoch
            if epoch % report_every == 0:
                trial.report(mse, global_step)
                if trial.should_prune():
                    raise optuna.TrialPruned()

            if mse + 1e-6 < best:
                best, pat = mse, 0
            else:
                pat += 1
                if pat >= EARLY_STOP:
                    break
        tqdm.write(f"Fold {f_idx+1} best MSE = {best:.4f}")
        mse_fold.append(best)
    return np.mean(mse_fold) if mse_fold else np.inf

# ---------------------- Main ---------------------- #
if __name__ == "__main__":
    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv(f"Y_df_change_{FORECAST_HORIZON}.csv", index_col=0, parse_dates=True)
    folds = expanding_folds(X, Y, FORECAST_HORIZON)
    print(f"Generated {len(folds)} folds\n")    

    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(8, 15)
    )

    t0 = time.time()
    study.optimize(
        lambda tr: objective(tr, folds),
        n_trials=TRIALS,
        n_jobs=1,
        show_progress_bar=True
    )
    dur = time.time() - t0

    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

[I 2025-05-13 21:36:50,337] A new study created in memory with name: no-name-3d686cc2-0a28-44d1-8aab-ab4ed9d9fdb1


[INFO] Device: cuda
  • GPU: NVIDIA H100 80GB HBM3
Generated 5 folds



  0%|          | 0/75 [00:00<?, ?it/s]


Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:25<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:25<00:38, 12.76s/it][A

Fold 2 best MSE = 1.6314



[A                                                 
Folds:  40%|████      | 2/5 [01:37<00:38, 12.76s/it][A
Folds:  60%|██████    | 3/5 [01:37<01:15, 37.61s/it][A

Fold 3 best MSE = 0.4596



[A                                                 
Folds:  60%|██████    | 3/5 [02:14<01:15, 37.61s/it][A
Folds:  80%|████████  | 4/5 [02:14<00:37, 37.12s/it][A

Fold 4 best MSE = 8.1141



[A                                                 
Folds:  80%|████████  | 4/5 [02:54<00:37, 37.12s/it][A
Folds: 100%|██████████| 5/5 [02:54<00:00, 38.29s/it][A
                                                    [A

Fold 5 best MSE = 0.5653
[I 2025-05-13 21:39:45,106] Trial 0 finished with value: 2.692591331899166 and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 2.692591331899166.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:32<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:32<00:49, 16.47s/it][A

Fold 2 best MSE = 1.4614



[A                                                 
Folds:  40%|████      | 2/5 [01:27<00:49, 16.47s/it][A
Folds:  60%|██████    | 3/5 [01:27<01:04, 32.49s/it][A

Fold 3 best MSE = 0.3699



[A                                                 
Folds:  60%|██████    | 3/5 [02:14<01:04, 32.49s/it][A
Folds:  80%|████████  | 4/5 [02:14<00:37, 37.72s/it][A

Fold 4 best MSE = 8.1092



[A                                                 
Folds:  80%|████████  | 4/5 [03:02<00:37, 37.72s/it][A
Folds: 100%|██████████| 5/5 [03:02<00:00, 41.47s/it][A
                                                    [A

Fold 5 best MSE = 0.2091
[I 2025-05-13 21:42:48,031] Trial 1 finished with value: 2.5374038182199 and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:14<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:14<00:22,  7.46s/it][A

Fold 2 best MSE = 1.4746



[A                                                 
Folds:  40%|████      | 2/5 [00:53<00:22,  7.46s/it][A
Folds:  60%|██████    | 3/5 [00:53<00:41, 20.56s/it][A

Fold 3 best MSE = 0.3375



[A                                                 
Folds:  60%|██████    | 3/5 [01:26<00:41, 20.56s/it][A
Folds:  80%|████████  | 4/5 [01:26<00:24, 24.91s/it][A

Fold 4 best MSE = 11.6626



[A                                                 
Folds:  80%|████████  | 4/5 [02:11<00:24, 24.91s/it][A
Folds: 100%|██████████| 5/5 [02:11<00:00, 32.21s/it][A
                                                    [A

Fold 5 best MSE = 0.5852
[I 2025-05-13 21:45:00,037] Trial 2 finished with value: 3.5149842724204063 and parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:16<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:16<00:24,  8.21s/it][A

Fold 2 best MSE = 1.1875



[A                                                 
Folds:  40%|████      | 2/5 [01:09<00:24,  8.21s/it][A
Folds:  60%|██████    | 3/5 [01:09<00:54, 27.09s/it][A

Fold 3 best MSE = 0.4416



[A                                                 
Folds:  60%|██████    | 3/5 [01:41<00:54, 27.09s/it][A
Folds:  80%|████████  | 4/5 [01:41<00:28, 28.92s/it][A

Fold 4 best MSE = 10.4413



[A                                                 
Folds:  80%|████████  | 4/5 [03:04<00:28, 28.92s/it][A
Folds: 100%|██████████| 5/5 [03:04<00:00, 47.62s/it][A
                                                    [A

Fold 5 best MSE = 0.5851
[I 2025-05-13 21:48:04,739] Trial 3 finished with value: 3.1638926342129707 and parameters: {'hidden_dim': 129, 'num_layers': 3, 'dropout': 0.5793792198447356, 'learning_rate': 0.0023628864184236428, 'batch_size': 128, 'epochs': 58}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:18<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:18<00:27,  9.06s/it][A

Fold 2 best MSE = 1.2442



[A                                                 
Folds:  40%|████      | 2/5 [00:59<00:27,  9.06s/it][A
Folds:  60%|██████    | 3/5 [00:59<00:45, 22.71s/it][A

Fold 3 best MSE = 0.4607



[A                                                 
Folds:  60%|██████    | 3/5 [01:27<00:45, 22.71s/it][A
Folds:  80%|████████  | 4/5 [01:27<00:24, 24.45s/it][A

Fold 4 best MSE = 8.6261



[A                                                 
Folds:  80%|████████  | 4/5 [02:05<00:24, 24.45s/it][A
Folds: 100%|██████████| 5/5 [02:05<00:00, 29.23s/it][A
                                                    [A

Fold 5 best MSE = 0.5978
[I 2025-05-13 21:50:10,308] Trial 4 finished with value: 2.7321842238307 and parameters: {'hidden_dim': 51, 'num_layers': 3, 'dropout': 0.15526798896001015, 'learning_rate': 0.0013353819088790589, 'batch_size': 128, 'epochs': 47}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:24<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:24<00:36, 12.13s/it][A

Fold 2 best MSE = 1.0311



[A                                                 
Folds:  40%|████      | 2/5 [01:27<00:36, 12.13s/it][A
Folds:  60%|██████    | 3/5 [01:27<01:06, 33.45s/it][A

Fold 3 best MSE = 0.3393



[A                                                 
Folds:  60%|██████    | 3/5 [02:32<01:06, 33.45s/it][A
Folds:  80%|████████  | 4/5 [02:32<00:45, 45.37s/it][A

Fold 4 best MSE = 21.0456



[A                                                 
Folds:  80%|████████  | 4/5 [03:51<00:45, 45.37s/it][A
Folds: 100%|██████████| 5/5 [03:51<00:00, 56.89s/it][A
                                                    [A

Fold 5 best MSE = 0.1571
[I 2025-05-13 21:54:01,827] Trial 5 finished with value: 5.643290229141712 and parameters: {'hidden_dim': 188, 'num_layers': 2, 'dropout': 0.3587399872866511, 'learning_rate': 0.0036832964384234204, 'batch_size': 64, 'epochs': 53}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:17<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:17<00:26,  8.72s/it][A

Fold 2 best MSE = 1.5967



[A                                                 
Folds:  40%|████      | 2/5 [01:25<00:26,  8.72s/it][A
Folds:  60%|██████    | 3/5 [01:25<01:06, 33.34s/it][A

Fold 3 best MSE = 0.3429



[A                                                 
Folds:  60%|██████    | 3/5 [01:57<01:06, 33.34s/it][A
Folds:  80%|████████  | 4/5 [01:57<00:33, 33.06s/it][A

Fold 4 best MSE = 9.2543



[A                                                 
Folds:  80%|████████  | 4/5 [02:39<00:33, 33.06s/it][A
Folds: 100%|██████████| 5/5 [02:39<00:00, 36.03s/it][A
                                                    [A

Fold 5 best MSE = 0.8607
[I 2025-05-13 21:56:41,319] Trial 6 finished with value: 3.013662226498127 and parameters: {'hidden_dim': 94, 'num_layers': 2, 'dropout': 0.16856070581242846, 'learning_rate': 0.0008356499023325525, 'batch_size': 64, 'epochs': 80}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:43<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:43<01:05, 21.78s/it][A

Fold 2 best MSE = 1.0817



[A                                                 
Folds:  40%|████      | 2/5 [02:01<01:05, 21.78s/it][A
Folds:  60%|██████    | 3/5 [02:01<01:30, 45.13s/it][A

Fold 3 best MSE = 0.3922



[A                                                 
Folds:  60%|██████    | 3/5 [03:32<01:30, 45.13s/it][A
Folds:  80%|████████  | 4/5 [03:32<01:02, 62.15s/it][A

Fold 4 best MSE = 21.6409



[A                                                 
Folds:  80%|████████  | 4/5 [07:46<01:02, 62.15s/it][A
Folds: 100%|██████████| 5/5 [07:46<00:00, 128.96s/it][A
                                                     [A

Fold 5 best MSE = 0.5946
[I 2025-05-13 22:04:27,796] Trial 7 finished with value: 5.927362248301506 and parameters: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:26<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:26<00:39, 13.04s/it][A

Fold 2 best MSE = 1.2369



[A                                                 
Folds:  40%|████      | 2/5 [00:48<00:39, 13.04s/it][A
Folds:  60%|██████    | 3/5 [00:48<00:34, 17.03s/it][A

Fold 3 best MSE = 0.3869



[A                                                 
Folds:  60%|██████    | 3/5 [01:20<00:34, 17.03s/it][A
Folds:  80%|████████  | 4/5 [01:20<00:22, 22.65s/it][A

Fold 4 best MSE = 8.3211



[A                                                 
Folds:  80%|████████  | 4/5 [02:04<00:22, 22.65s/it][A
Folds: 100%|██████████| 5/5 [02:04<00:00, 30.04s/it][A
                                                    [A

Fold 5 best MSE = 0.6083
[I 2025-05-13 22:06:32,532] Trial 8 finished with value: 2.6382987946271896 and parameters: {'hidden_dim': 170, 'num_layers': 1, 'dropout': 0.1865893930293973, 'learning_rate': 0.00035684261232554244, 'batch_size': 128, 'epochs': 59}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:06:33,662] Trial 9 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:06:34,364] Trial 10 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:06:34,952] Trial 11 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:06:56,164] Trial 12 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:25<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:25<00:38, 12.89s/it][A

Fold 2 best MSE = 1.5688



                                                    [A

[I 2025-05-13 22:07:23,255] Trial 13 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:07:24,569] Trial 14 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:30<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:30<00:45, 15.09s/it][A

Fold 2 best MSE = 1.4649



[A                                                 
Folds:  40%|████      | 2/5 [01:36<00:45, 15.09s/it][A
Folds:  60%|██████    | 3/5 [01:36<01:13, 36.63s/it][A

Fold 3 best MSE = 0.3371



[A                                                 
Folds:  60%|██████    | 3/5 [02:10<01:13, 36.63s/it][A
Folds:  80%|████████  | 4/5 [02:10<00:35, 35.51s/it][A

Fold 4 best MSE = 8.3114



[A                                                 
Folds:  80%|████████  | 4/5 [02:52<00:35, 35.51s/it][A
Folds: 100%|██████████| 5/5 [02:52<00:00, 37.76s/it][A
                                                    [A

Fold 5 best MSE = 0.8075
[I 2025-05-13 22:10:17,212] Trial 15 finished with value: 2.7302504032850266 and parameters: {'hidden_dim': 71, 'num_layers': 1, 'dropout': 0.234230813266048, 'learning_rate': 0.0005674717306853513, 'batch_size': 64, 'epochs': 63}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:10:41,572] Trial 16 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:10:42,084] Trial 17 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:11:05,856] Trial 18 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:32<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:33<00:49, 16.50s/it][A

Fold 2 best MSE = 1.7751



[A                                                 
Folds:  40%|████      | 2/5 [01:59<00:49, 16.50s/it][A
Folds:  60%|██████    | 3/5 [01:59<01:31, 45.79s/it][A

Fold 3 best MSE = 0.3777



[A                                                 
Folds:  60%|██████    | 3/5 [02:51<01:31, 45.79s/it][A
Folds:  80%|████████  | 4/5 [02:51<00:47, 47.99s/it][A

Fold 4 best MSE = 8.5030



[A                                                 
Folds:  80%|████████  | 4/5 [04:00<00:47, 47.99s/it][A
Folds: 100%|██████████| 5/5 [04:00<00:00, 55.26s/it][A
                                                    [A

Fold 5 best MSE = 1.0494
[I 2025-05-13 22:15:06,310] Trial 19 finished with value: 2.9263038262724876 and parameters: {'hidden_dim': 107, 'num_layers': 2, 'dropout': 0.07432130251385269, 'learning_rate': 0.00024677295740564465, 'batch_size': 32, 'epochs': 48}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:17<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:17<00:26,  8.69s/it][A

Fold 2 best MSE = 1.3848



                                                    [A

[I 2025-05-13 22:15:24,786] Trial 20 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:15:39,757] Trial 21 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:15:40,454] Trial 22 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:28<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:28<00:42, 14.25s/it][A

Fold 2 best MSE = 1.3101



[A                                                 
Folds:  40%|████      | 2/5 [00:50<00:42, 14.25s/it][A
Folds:  60%|██████    | 3/5 [00:50<00:35, 17.52s/it][A

Fold 3 best MSE = 0.3273



[A                                                 
Folds:  60%|██████    | 3/5 [01:22<00:35, 17.52s/it][A
Folds:  80%|████████  | 4/5 [01:22<00:22, 22.99s/it][A

Fold 4 best MSE = 8.3386



[A                                                 
Folds:  80%|████████  | 4/5 [02:05<00:22, 22.99s/it][A
Folds: 100%|██████████| 5/5 [02:05<00:00, 29.95s/it][A
                                                    [A

Fold 5 best MSE = 0.5576
[I 2025-05-13 22:17:46,306] Trial 23 finished with value: 2.633403852581978 and parameters: {'hidden_dim': 109, 'num_layers': 1, 'dropout': 0.21187723381764362, 'learning_rate': 0.00026452119117093654, 'batch_size': 64, 'epochs': 67}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:23<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:23<00:35, 11.93s/it][A

Fold 2 best MSE = 1.3396



[A                                                 
Folds:  40%|████      | 2/5 [01:23<00:35, 11.93s/it][A
Folds:  60%|██████    | 3/5 [01:23<01:03, 31.87s/it][A

Fold 3 best MSE = 0.4087



[A                                                 
Folds:  60%|██████    | 3/5 [02:06<01:03, 31.87s/it][A
Folds:  80%|████████  | 4/5 [02:06<00:35, 35.83s/it][A

Fold 4 best MSE = 8.3774



[A                                                 
Folds:  80%|████████  | 4/5 [03:09<00:35, 35.83s/it][A
Folds: 100%|██████████| 5/5 [03:09<00:00, 45.45s/it][A
                                                    [A

Fold 5 best MSE = 0.6514
[I 2025-05-13 22:20:56,000] Trial 24 finished with value: 2.6942782253026962 and parameters: {'hidden_dim': 143, 'num_layers': 1, 'dropout': 0.22180021375270814, 'learning_rate': 0.00046600318274811703, 'batch_size': 64, 'epochs': 65}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:25<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:25<00:37, 12.57s/it][A

Fold 2 best MSE = 1.4273



[A                                                 
Folds:  40%|████      | 2/5 [00:48<00:37, 12.57s/it][A
Folds:  60%|██████    | 3/5 [00:48<00:33, 16.96s/it][A

Fold 3 best MSE = 0.4068



[A                                                 
Folds:  60%|██████    | 3/5 [01:20<00:33, 16.96s/it][A
Folds:  80%|████████  | 4/5 [01:20<00:22, 22.84s/it][A

Fold 4 best MSE = 8.4243



[A                                                 
Folds:  80%|████████  | 4/5 [02:06<00:22, 22.84s/it][A
Folds: 100%|██████████| 5/5 [02:06<00:00, 30.86s/it][A
                                                    [A

Fold 5 best MSE = 0.3661
[I 2025-05-13 22:23:02,954] Trial 25 finished with value: 2.656140759587288 and parameters: {'hidden_dim': 110, 'num_layers': 1, 'dropout': 0.3112952324919726, 'learning_rate': 0.00028259400367428077, 'batch_size': 64, 'epochs': 74}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:23:17,554] Trial 26 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:23:18,358] Trial 27 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:42<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:42<01:03, 21.10s/it][A

Fold 2 best MSE = 1.4858



                                                    [A

[I 2025-05-13 22:24:42,065] Trial 28 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:25:00,360] Trial 29 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:25:16,570] Trial 30 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:25:17,168] Trial 31 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:30<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:30<00:46, 15.44s/it][A

Fold 2 best MSE = 1.3505



[A                                                 
Folds:  40%|████      | 2/5 [01:48<00:46, 15.44s/it][A
Folds:  60%|██████    | 3/5 [01:48<01:22, 41.42s/it][A

Fold 3 best MSE = 0.3820



[A                                                 
Folds:  60%|██████    | 3/5 [02:21<01:22, 41.42s/it][A
Folds:  80%|████████  | 4/5 [02:21<00:38, 38.06s/it][A

Fold 4 best MSE = 8.0522



[A                                                 
Folds:  80%|████████  | 4/5 [03:01<00:38, 38.06s/it][A
Folds: 100%|██████████| 5/5 [03:01<00:00, 38.84s/it][A
                                                    [A

Fold 5 best MSE = 0.4555
[I 2025-05-13 22:28:18,695] Trial 32 finished with value: 2.5600352212786674 and parameters: {'hidden_dim': 115, 'num_layers': 1, 'dropout': 0.30605899609568793, 'learning_rate': 0.00022837222778991547, 'batch_size': 64, 'epochs': 73}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:29<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:29<00:44, 14.93s/it][A

Fold 2 best MSE = 1.1318



[A                                                 
Folds:  40%|████      | 2/5 [01:43<00:44, 14.93s/it][A
Folds:  60%|██████    | 3/5 [01:43<01:19, 39.58s/it][A

Fold 3 best MSE = 0.4210



[A                                                 
Folds:  60%|██████    | 3/5 [02:22<01:19, 39.58s/it][A
Folds:  80%|████████  | 4/5 [02:22<00:39, 39.10s/it][A

Fold 4 best MSE = 8.2864



[A                                                 
Folds:  80%|████████  | 4/5 [03:10<00:39, 39.10s/it][A
Folds: 100%|██████████| 5/5 [03:10<00:00, 42.37s/it][A
                                                    [A

Fold 5 best MSE = 0.3253
[I 2025-05-13 22:31:29,518] Trial 33 finished with value: 2.541120409965515 and parameters: {'hidden_dim': 141, 'num_layers': 1, 'dropout': 0.4086692432023684, 'learning_rate': 0.00021864968904309026, 'batch_size': 64, 'epochs': 61}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:31:48,576] Trial 34 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:32:04,770] Trial 35 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:32:05,577] Trial 36 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:20<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:20<00:30, 10.24s/it][A

Fold 2 best MSE = 1.0828



                                                    [A

[I 2025-05-13 22:32:27,362] Trial 37 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:32:44,752] Trial 38 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:32:45,454] Trial 39 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:29<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:29<00:43, 14.61s/it][A

Fold 2 best MSE = 1.7022



                                                    [A

[I 2025-05-13 22:33:34,570] Trial 40 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:24<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:24<00:36, 12.10s/it][A

Fold 2 best MSE = 1.3912



                                                    [A

[I 2025-05-13 22:33:59,995] Trial 41 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:10,170] Trial 42 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:27<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:27<00:41, 13.99s/it][A

Fold 2 best MSE = 1.4075



                                                    [A

[I 2025-05-13 22:34:39,568] Trial 43 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:40,874] Trial 44 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:41,386] Trial 45 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:42,156] Trial 46 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:57,452] Trial 47 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:58,057] Trial 48 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:59,166] Trial 49 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:34:59,965] Trial 50 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:35:15,662] Trial 51 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:35:31,570] Trial 52 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:35:32,352] Trial 53 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:25<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:25<00:38, 12.99s/it][A

Fold 2 best MSE = 1.5551



[A                                                 
Folds:  40%|████      | 2/5 [01:25<00:38, 12.99s/it][A
Folds:  60%|██████    | 3/5 [01:25<01:04, 32.41s/it][A

Fold 3 best MSE = 0.4433



[A                                                 
Folds:  60%|██████    | 3/5 [01:57<01:04, 32.41s/it][A
Folds:  80%|████████  | 4/5 [01:57<00:32, 32.26s/it][A

Fold 4 best MSE = 8.0730



[A                                                 
Folds:  80%|████████  | 4/5 [02:38<00:32, 32.26s/it][A
Folds: 100%|██████████| 5/5 [02:38<00:00, 35.19s/it][A
                                                    [A

Fold 5 best MSE = 0.7756
[I 2025-05-13 22:38:10,706] Trial 54 finished with value: 2.7117336317896843 and parameters: {'hidden_dim': 109, 'num_layers': 1, 'dropout': 0.3066417751108878, 'learning_rate': 0.0003371936304367111, 'batch_size': 64, 'epochs': 75}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:38:26,356] Trial 55 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:38:26,988] Trial 56 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:38:27,762] Trial 57 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:38:28,564] Trial 58 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:30<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:30<00:45, 15.04s/it][A

Fold 2 best MSE = 1.1236



[A                                                 
Folds:  40%|████      | 2/5 [02:13<00:45, 15.04s/it][A
Folds:  60%|██████    | 3/5 [02:13<01:43, 51.86s/it][A

Fold 3 best MSE = 0.3624



[A                                                 
Folds:  60%|██████    | 3/5 [03:11<01:43, 51.86s/it][A
Folds:  80%|████████  | 4/5 [03:11<00:54, 54.26s/it][A

Fold 4 best MSE = 10.2033



[A                                                 
Folds:  80%|████████  | 4/5 [04:39<00:54, 54.26s/it][A
Folds: 100%|██████████| 5/5 [04:39<00:00, 66.03s/it][A
                                                    [A

Fold 5 best MSE = 0.9227
[I 2025-05-13 22:43:08,519] Trial 59 finished with value: 3.1529987677931786 and parameters: {'hidden_dim': 115, 'num_layers': 3, 'dropout': 0.18644988426123088, 'learning_rate': 0.0004892150707470191, 'batch_size': 32, 'epochs': 52}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:43:17,977] Trial 60 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:43:18,657] Trial 61 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:43:34,158] Trial 62 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:43:34,762] Trial 63 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:34<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:34<00:52, 17.49s/it][A

Fold 2 best MSE = 1.3114



                                                    [A

[I 2025-05-13 22:44:10,955] Trial 64 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:44:11,564] Trial 65 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:44:25,669] Trial 66 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:44:26,355] Trial 67 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:44:44,377] Trial 68 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:44:45,069] Trial 69 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
[A                                         
Folds:   0%|          | 0/5 [00:32<?, ?it/s][A
Folds:  40%|████      | 2/5 [00:32<00:49, 16.39s/it][A

Fold 2 best MSE = 1.2842



[A                                                 
Folds:  40%|████      | 2/5 [01:28<00:49, 16.39s/it][A
Folds:  60%|██████    | 3/5 [01:28<01:05, 32.73s/it][A

Fold 3 best MSE = 0.3414



[A                                                 
Folds:  60%|██████    | 3/5 [02:06<01:05, 32.73s/it][A
Folds:  80%|████████  | 4/5 [02:06<00:34, 34.69s/it][A

Fold 4 best MSE = 8.3896



[A                                                 
Folds:  80%|████████  | 4/5 [02:58<00:34, 34.69s/it][A
Folds: 100%|██████████| 5/5 [02:58<00:00, 40.81s/it][A
                                                    [A

Fold 5 best MSE = 0.4206
[I 2025-05-13 22:47:43,803] Trial 70 finished with value: 2.6089649349451065 and parameters: {'hidden_dim': 129, 'num_layers': 1, 'dropout': 0.09458752281759475, 'learning_rate': 0.00020686315930554133, 'batch_size': 64, 'epochs': 46}. Best is trial 1 with value: 2.5374038182199.



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:48:03,859] Trial 71 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:48:04,652] Trial 72 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
                                            [A

[I 2025-05-13 22:48:25,173] Trial 73 pruned. 



Folds:   0%|          | 0/5 [00:00<?, ?it/s][A
Folds:  20%|██        | 1/5 [00:00<00:00,  9.33it/s][A
                                                    [A

[I 2025-05-13 22:48:25,979] Trial 74 pruned. 
=== Best Trial ===
MSE   : 2.537404
Params: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}
Total run time: 4295.6 s


In [12]:
if __name__ == "__main__":
    BEST_PARAMS = {
        'hidden_dim': 35,
        'num_layers': 1,
        'dropout': 0.10909498032426036,
        'learning_rate': 0.0002049268011541737,
        'batch_size': 64,
        'epochs': 51
    }
    
    FORECAST_HORIZON = 252
    SEQUENCE_LENGTH = 1323
    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_5.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.046121


In [13]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv(f"final_test_predictions_multioutput_H{FORECAST_HORIZON}.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")

[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'
