In [2]:
pip install numpy pandas tqdm torch scikit-learn optuna

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
 

In [15]:
# ==============================================================
#  LSTM Regression on Yield‑Curve Δ  |  Optuna (50 trials, h=1)
#  • Original loop‑based sequence logic
#  • Duplicate‑step warning fixed (unique global_step)
#  • Clean output: only final fold MSE shown
# ==============================================================

# ---------------------- Imports ---------------------- #
import os, sys, gc, time, random
import numpy as np
import pandas as pd
from tqdm import tqdm
import ast


import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import amp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# ---------------------- Reproducibility ---------------------- #
RNG_SEED = 42
random.seed(RNG_SEED); np.random.seed(RNG_SEED); torch.manual_seed(RNG_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RNG_SEED)

# ---------------------- Device & CuDNN ---------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Device: {device}")
if device.type == "cuda":
    print(f"  • GPU: {torch.cuda.get_device_name(0)}")
    cudnn.benchmark = True

# ---------------------- Config ---------------------- #
FORECAST_HORIZON = 1
TRIALS           = 30
EARLY_STOP       = 20
val_window_num_sequences = 504
holdout_base            = 756
seq_len_map             = {1: 756}

HSPACE = {
    "hidden_dim"   : (32, 192),
    "num_layers"   : [1, 2, 3],
    "dropout"      : (0.0, 0.6),
    "learning_rate": (1e-4, 5e-3),
    "batch_size"   : [32, 64, 128],
    "epochs"       : (40, 80),
}

# ---------------------- Model ---------------------- #
class LSTMRegressor(nn.Module):
    def __init__(self, in_dim, hid, layers, out_dim, drop=0.0):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, hid, layers, batch_first=True,
                            dropout=(drop if layers > 1 else 0.0))
        self.drop = nn.Dropout(drop)
        self.norm = nn.LayerNorm(hid)
        self.fc   = nn.Linear(hid, out_dim, bias=False)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(self.norm(self.drop(hn[-1])))

# ---------------------- Data Utilities ---------------------- #
def gen_seq(X_df, Y_fold, seq_len, h):
    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_fold.reindex(X_df.index).values.astype(np.float32)
    idx   = {ts: i for i, ts in enumerate(X_df.index)}
    X_seq, Y_seq = [], []
    for t in Y_fold.index:
        ti = idx.get(t)
        if ti is None:
            continue
        end = ti - h + 1; start = end - seq_len
        if start < 0 or end > len(X_arr):
            continue
        win = X_arr[start:end]
        if win.shape[0] != seq_len or np.isnan(win).any() or not np.isfinite(Y_arr[ti]).all():
            continue
        X_seq.append(win); Y_seq.append(Y_arr[ti])
    return np.asarray(X_seq, np.float32), np.asarray(Y_seq, np.float32)


def std_fold(Xtr, Xva):
    sc = StandardScaler()
    return (
        pd.DataFrame(sc.fit_transform(Xtr), index=Xtr.index, columns=Xtr.columns),
        pd.DataFrame(sc.transform(Xva),     index=Xva.index, columns=Xva.columns),
    )

def get_forecast_errors_only(file_path):
    df = pd.read_csv(file_path, parse_dates=["eval_date"])
    df["forecast_yields"] = df["forecast_yields"].apply(ast.literal_eval)
    df["true_yields"] = df["true_yields"].apply(ast.literal_eval)

    error_matrix = df.apply(lambda row: [f - t for f, t in zip(row["forecast_yields"], row["true_yields"])], axis=1)
    error_df = pd.DataFrame(error_matrix.tolist(), index=df["eval_date"])
    error_df.columns = ["error_3m", "error_6m", "error_1y", "error_3y", "error_5y", "error_10y"]

    return error_df

# ---------------------- CV ---------------------- #
def expanding_folds(X, Y, h):
    seq_len = seq_len_map[h]; total = len(X); min_train = seq_len + h
    folds, i = [], min_train
    while i + val_window_num_sequences + holdout_base <= total:
        vs, ve = i, i + val_window_num_sequences
        folds.append({
            "X_tr": X.iloc[:i].copy(),
            "Y_tr": Y.iloc[:i].copy(),
            "X_va": X.iloc[vs - seq_len - h + 1: ve - h].copy(),
            "Y_va": Y.iloc[vs:ve].copy(),
            "seq_len": seq_len,
        })
        i += val_window_num_sequences
    return folds

# ---------------------- Optuna Objective ---------------------- #
def objective(trial, folds):
    p = {
        "hid": trial.suggest_int("hidden_dim", *HSPACE["hidden_dim"]),
        "lay": trial.suggest_categorical("num_layers", HSPACE["num_layers"]),
        "drp": trial.suggest_float("dropout", *HSPACE["dropout"]),
        "lr" : trial.suggest_float("learning_rate", *HSPACE["learning_rate"], log=True),
        "bs" : trial.suggest_categorical("batch_size", HSPACE["batch_size"]),
        "ep" : trial.suggest_int("epochs", *HSPACE["epochs"]),
    }
    scaler = amp.GradScaler(); mse_fold = []

    for f_idx, f in enumerate(tqdm(folds, desc="Folds", leave=False)):
        Xtr_s, Xva_s = std_fold(f["X_tr"], f["X_va"])
        Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
        Xva, Yva = gen_seq(Xva_s, f["Y_va"], f["seq_len"], FORECAST_HORIZON)
        if len(Xtr)==0 or len(Xva)==0:
            continue

        model = LSTMRegressor(Xtr.shape[2], p["hid"], p["lay"], Ytr.shape[1], p["drp"]).to(device)
        opt   = torch.optim.Adam(model.parameters(), lr=p["lr"])
        best, pat = np.inf, 0; report_every = max(1, p["ep"]//3)

        tr_loader = DataLoader(TensorDataset(torch.tensor(Xtr), torch.tensor(Ytr)), batch_size=p["bs"], shuffle=True, pin_memory=True)
        va_loader = DataLoader(TensorDataset(torch.tensor(Xva), torch.tensor(Yva)), batch_size=p["bs"], pin_memory=True)

        for epoch in range(p["ep"]):
            model.train()
            for xb, yb in tr_loader:
                xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
                opt.zero_grad(set_to_none=True)
                with amp.autocast(device_type='cuda'):
                    loss = nn.functional.mse_loss(model(xb), yb)
                scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

            model.eval(); preds, gts = [], []
            with torch.no_grad(), amp.autocast(device_type='cuda'):
                for xb, yb in va_loader:
                    preds.append(model(xb.to(device, non_blocking=True)).cpu()); gts.append(yb)
            mse = mean_squared_error(torch.cat(gts).numpy(), torch.cat(preds).numpy())

            global_step = f_idx * p["ep"] + epoch
            if epoch % report_every == 0:
                trial.report(mse, global_step)
                if trial.should_prune():
                    raise optuna.TrialPruned()

            if mse + 1e-6 < best:
                best, pat = mse, 0
            else:
                pat += 1
                if pat >= EARLY_STOP:
                    break
        tqdm.write(f"Fold {f_idx+1} best MSE = {best:.4f}")
        mse_fold.append(best)
    return np.mean(mse_fold) if mse_fold else np.inf

# ---------------------- Main ---------------------- #
if __name__ == "__main__":
    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = get_forecast_errors_only(r"C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Model Fit\Output\DNS_Full_Forecast\dns_kf_total_h5_full_dataset.csv")

    X = X.join(Y, how="left")
    X.dropna(inplace=True)

    seq_h_gap = seq_len_map[FORECAST_HORIZON] + FORECAST_HORIZON
    min_y_date = X.index[seq_h_gap - 1]

    Y = Y[Y.index >= min_y_date]  # Trim Y to prevent misaligned targets
    folds = expanding_folds(X, Y, FORECAST_HORIZON)
    print(f"Generated {len(folds)} folds\n")    

    study = optuna.create_study(
        direction="minimize",
        sampler=TPESampler(seed=RNG_SEED),
        pruner=MedianPruner(8, 15)
    )

    t0 = time.time()
    study.optimize(
        lambda tr: objective(tr, folds),
        n_trials=TRIALS,
        n_jobs=1,
        show_progress_bar=True
    )
    dur = time.time() - t0

    print("=== Best Trial ===")
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

[INFO] Device: cpu


[I 2025-05-15 20:13:49,106] A new study created in memory with name: no-name-ae5451f0-b695-471c-8a7d-617d3bd3248d


Generated 6 folds




Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
Folds:  50%|█████     | 3/6 [00:00<00:00, 15.97it/s][A
Folds:  83%|████████▎ | 5/6 [00:00<00:00,  6.51it/s][A
Folds: 100%|██████████| 6/6 [00:01<00:00,  4.80it/s][A


[I 2025-05-15 20:13:50,240] Trial 0 finished with value: inf and parameters: {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.0936111842654619, 'learning_rate': 0.00018408992080552527, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: inf.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
Folds:  33%|███▎      | 2/6 [00:00<00:00,  5.95it/s][A
Folds:  50%|█████     | 3/6 [00:00<00:00,  6.75it/s][A
Folds:  67%|██████▋   | 4/6 [00:00<00:00,  6.43it/s][A
Folds:  83%|████████▎ | 5/6 [00:00<00:00,  5.33it/s][A
Folds: 100%|██████████| 6/6 [00:01<00:00,  4.19it/s][A


[I 2025-05-15 20:13:51,525] Trial 1 finished with value: inf and parameters: {'hidden_dim': 35, 'num_layers': 1, 'dropout': 0.10909498032426036, 'learning_rate': 0.0002049268011541737, 'batch_size': 64, 'epochs': 51}. Best is trial 0 with value: inf.



Folds:   0%|          | 0/6 [00:00<?, ?it/s][A
Best trial: 0. Best value: inf:   7%|▋         | 2/30 [00:02<00:35,  1.26s/it]


[W 2025-05-15 20:13:51,622] Trial 2 failed with parameters: {'hidden_dim': 130, 'num_layers': 3, 'dropout': 0.27364199053022153, 'learning_rate': 0.0021576967455896826, 'batch_size': 128, 'epochs': 41} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\azorb\AppData\Local\Temp\ipykernel_335664\1775920448.py", line 205, in <lambda>
    lambda tr: objective(tr, folds),
               ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azorb\AppData\Local\Temp\ipykernel_335664\1775920448.py", line 139, in objective
    Xtr, Ytr = gen_seq(Xtr_s, f["Y_tr"], f["seq_len"], FORECAST_HORIZON)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azorb\AppData\Local\Temp\ipykernel_335664\1775920448.py", line 88, in

KeyboardInterrupt: 

In [16]:
# ---------------------- Debug Folds ---------------------- #
def debug_folds(folds, forecast_horizon=1):
    print(f"[DEBUG] Total folds generated: {len(folds)}\n")

    for i, f in enumerate(folds):
        print(f"\n--- Fold {i+1} ---")

        # Print shapes
        print(f"Train X: {f['X_tr'].shape}, Y: {f['Y_tr'].shape}")
        print(f"Valid X: {f['X_va'].shape}, Y: {f['Y_va'].shape}")

        # Show date ranges
        print(f"Train X range: {f['X_tr'].index[0].date()} → {f['X_tr'].index[-1].date()}")
        print(f"Train Y range: {f['Y_tr'].index[0].date()} → {f['Y_tr'].index[-1].date()}")
        print(f"Valid X range: {f['X_va'].index[0].date()} → {f['X_va'].index[-1].date()}")
        print(f"Valid Y range: {f['Y_va'].index[0].date()} → {f['Y_va'].index[-1].date()}")

        # Check alignment
        expected_end_x = f['Y_va'].index[0] - pd.Timedelta(days=forecast_horizon)
        actual_end_x   = f['X_va'].index[-1]
        print(f"Expected X_va end before Y_va start: {expected_end_x.date()}")
        print(f"Actual X_va end: {actual_end_x.date()}")

        # Check overlap
        overlap = set(f['X_va'].index).intersection(f['Y_va'].index)
        print(f"Overlap between X_va and Y_va: {len(overlap)} dates")

        if len(overlap) > 0:
            print("⚠️ Overlap detected between X_va and Y_va – check alignment logic.")
        if actual_end_x >= f['Y_va'].index[0]:
            print("❗ X_va may leak into Y_va – check sequence slicing.")

# Call it
debug_folds(folds, forecast_horizon=FORECAST_HORIZON)


[DEBUG] Total folds generated: 6


--- Fold 1 ---
Train X: (757, 56), Y: (757, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2009-07-21
Train Y range: 2009-07-21 → 2012-06-13
Valid X range: 2006-08-28 → 2011-06-24
Valid Y range: 2012-06-14 → 2014-05-20
Expected X_va end before Y_va start: 2012-06-13
Actual X_va end: 2011-06-24
Overlap between X_va and Y_va: 0 dates

--- Fold 2 ---
Train X: (1261, 56), Y: (1261, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2011-06-27
Train Y range: 2009-07-21 → 2014-05-20
Valid X range: 2008-08-01 → 2013-05-31
Valid Y range: 2014-05-21 → 2016-04-25
Expected X_va end before Y_va start: 2014-05-20
Actual X_va end: 2013-05-31
Overlap between X_va and Y_va: 0 dates

--- Fold 3 ---
Train X: (1765, 56), Y: (1765, 6)
Valid X: (1259, 56), Y: (504, 6)
Train X range: 2006-08-25 → 2013-06-03
Train Y range: 2009-07-21 → 2016-04-25
Valid X range: 2010-07-09 → 2015-05-07
Valid Y range: 2016-04-26 → 2018-03-30
Expected X_va end bef

In [5]:


# Example usage:
df_with_errors = get_forecast_errors_only(r"C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Model Fit\Output\DNS_Full_Forecast\dns_kf_total_h5_full_dataset.csv")

In [8]:
df_with_errors

Unnamed: 0_level_0,error_3m,error_6m,error_1y,error_3y,error_5y,error_10y
eval_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-08-25,0.049399,-0.041120,-0.007011,0.128507,0.121623,0.093890
2006-08-28,0.067899,-0.056128,-0.027910,0.103968,0.091551,0.060019
2006-08-29,0.100689,-0.036597,-0.013761,0.096428,0.070023,0.046928
2006-08-30,0.121152,-0.016889,0.014671,0.121862,0.114195,0.070162
2006-08-31,0.122509,0.014783,0.036736,0.173692,0.134809,0.088192
...,...,...,...,...,...,...
2025-02-27,0.018743,0.026411,0.132412,0.196475,0.234137,0.218851
2025-02-28,0.001930,0.039014,0.164062,0.236158,0.273256,0.247947
2025-03-03,-0.062340,-0.058485,0.141231,0.241750,0.274899,0.268048
2025-03-04,-0.053563,-0.032764,0.151810,0.210346,0.208558,0.178195


In [3]:
    print(f"MSE   : {study.best_value:.6f}")
    print(f"Params: {study.best_trial.params}")
    print(f"Total run time: {dur:.1f} s")

MSE   : 0.018671
Params: {'hidden_dim': 156, 'num_layers': 3, 'dropout': 0.4241144063085703, 'learning_rate': 0.001732053535845956, 'batch_size': 32, 'epochs': 44}
Total run time: 6340.3 s


In [10]:
if __name__ == "__main__":

    FORECAST_HORIZON = 1
    BEST_PARAMS = {
        'hidden_dim': 156,
        'num_layers': 3,
        'dropout': 0.4241144063085703,
        'learning_rate': 0.001732053535845956,
        'batch_size': 32,
        'epochs': 44
    }
    SEQUENCE_LENGTH = 1512
    
    print("[INFO] Running final model evaluation on test set")

    X = pd.read_csv("X_df_filtered_shap.csv", index_col=0, parse_dates=True)
    Y = pd.read_csv("Y_df_change_1.csv", index_col=0, parse_dates=True)

    TEST_SIZE = 756             # 3-year hold-out
    seq_buffer = SEQUENCE_LENGTH + FORECAST_HORIZON - 1
    
    X_train = X.iloc[:-TEST_SIZE]
    Y_train = Y.iloc[:-TEST_SIZE]
    
    X_test_start = -TEST_SIZE - seq_buffer   # keep enough context for sequences
    X_test = X.iloc[X_test_start:]
    Y_test = Y.iloc[-TEST_SIZE:]

    sc = StandardScaler()
    X_train_std = pd.DataFrame(sc.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_std  = pd.DataFrame(sc.transform(X_test),     index=X_test.index,  columns=X_test.columns)

    X_tr_seq, Y_tr_seq = gen_seq(X_train_std, Y_train, SEQUENCE_LENGTH, FORECAST_HORIZON)
    X_te_seq, Y_te_seq = gen_seq(X_test_std,  Y_test,  SEQUENCE_LENGTH, FORECAST_HORIZON)

    if len(X_te_seq) == 0 or len(Y_te_seq) == 0:
        print("[ERROR] No valid test sequences generated. Check alignment or sequence length.")
        sys.exit(1)
    else:
        print("[DEBUG] It's working")

    model = LSTMRegressor(
        in_dim=X_tr_seq.shape[2],
        hid=BEST_PARAMS['hidden_dim'],
        layers=BEST_PARAMS['num_layers'],
        out_dim=Y_tr_seq.shape[1],
        drop=BEST_PARAMS['dropout']
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=BEST_PARAMS['learning_rate'])
    scaler = amp.GradScaler()

    train_loader = DataLoader(TensorDataset(torch.tensor(X_tr_seq), torch.tensor(Y_tr_seq)),
                              batch_size=BEST_PARAMS['batch_size'], shuffle=True, pin_memory=True)

    model.train()
    for epoch in range(BEST_PARAMS['epochs']):
        for xb, yb in train_loader:
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with amp.autocast(device_type="cuda"):
                loss = nn.functional.mse_loss(model(xb), yb)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()

    model.eval(); preds, gts = [], []
    test_loader = DataLoader(TensorDataset(torch.tensor(X_te_seq), torch.tensor(Y_te_seq)),
                             batch_size=BEST_PARAMS['batch_size'], pin_memory=True)

    with torch.no_grad(), amp.autocast(device_type='cuda'):
        for xb, yb in test_loader:
            xb = xb.to(device, non_blocking=True)
            preds.append(model(xb).cpu())
            gts.append(yb)

    if len(preds) == 0 or len(gts) == 0:
        print("[ERROR] No predictions generated. Check test data preprocessing.")
        sys.exit(1)

    y_true = torch.cat(gts).numpy()
    y_pred = torch.cat(preds).numpy()
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n[RESULT] Final Test Set MSE: {mse:.6f}")

[INFO] Running final model evaluation on test set
[DEBUG] It's working

[RESULT] Final Test Set MSE: 0.000339


In [18]:
# ---------------------- Save Multi-Output Results ---------------------- #
maturity_labels = [f"m{i+1}" for i in range(y_true.shape[1])]  # e.g., m1, m2, ..., m6

# Create column-wise dict
results_dict = {
    "date": Y_test.index[-len(y_true):]  # ensure alignment
}

# Add true and predicted values for each maturity
for i, label in enumerate(maturity_labels):
    results_dict[f"{label}_true"] = y_true[:, i]
    results_dict[f"{label}_pred"] = y_pred[:, i]

# Convert to DataFrame
results_df = pd.DataFrame(results_dict).set_index("date")

# Save
results_df.to_csv("final_test_predictions_multioutput.csv")
print("[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'")


[INFO] Multi-output predictions saved to 'final_test_predictions_multioutput.csv'


In [15]:
# ---------------------- Save Results ---------------------- #
import os

Y_test.index[-len(y_true):]

DatetimeIndex(['2022-04-13', '2022-04-14', '2022-04-15', '2022-04-18',
               '2022-04-19', '2022-04-20', '2022-04-21', '2022-04-22',
               '2022-04-25', '2022-04-26',
               ...
               '2025-02-20', '2025-02-21', '2025-02-24', '2025-02-25',
               '2025-02-26', '2025-02-27', '2025-02-28', '2025-03-03',
               '2025-03-04', '2025-03-05'],
              dtype='datetime64[ns]', length=756, freq=None)

In [17]:
len(y_true)

756