In [19]:
pip install pandas numpy scikit-learn tqdm torch geneticalgorithm matplotlib optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/

In [20]:
# ---------------------- Importing Packages ---------------------- #
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, roc_auc_score
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import optuna
import random

# ---------------------- Reproducibility ---------------------- #
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# Select CPU or GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------------------- Fold Configuration Settings ---------------------- #
sequence_length = 252
val_window_num_sequences = 252
holdout_base = 756
forecast_horizons = [1, 5, 21, 63, 252]
num_epochs = 50
patience = 10

# ---------------------- LSTM Classifier ---------------------- #
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.0):
        super().__init__()
        effective_dropout = dropout if num_layers > 1 else 0.0
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=effective_dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

# ---------------------- Helper: Sequence Creation ---------------------- #
def create_sequences(X_df, Y_df, seq_len, forecast_horizon):
    X_df = X_df.dropna()
    Y_df = Y_df.dropna()
    common_idx = X_df.index.intersection(Y_df.index)
    X_df = X_df.loc[common_idx]
    Y_df = Y_df.loc[common_idx]

    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_df.values.astype(np.float32).reshape(-1, 1)

    X_seq, Y_seq = [], []
    max_i = len(X_arr) - seq_len - forecast_horizon + 1
    for i in range(max_i):
        X_seq.append(X_arr[i:i + seq_len])
        Y_seq.append(Y_arr[i + seq_len + forecast_horizon - 1])

    X_seq = np.array(X_seq)
    Y_seq = np.array(Y_seq)
    assert X_seq.shape[0] == Y_seq.shape[0], f"Mismatch: X={X_seq.shape}, Y={Y_seq.shape}"
    return X_seq, Y_seq

# ---------------------- Feature Shifting ---------------------- #
def shift_X_by_horizon(X_df, horizon):
    return X_df.shift(horizon).dropna()

# ---------------------- Standardization ---------------------- #
def standardize_fold(X_train, X_val):
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_val_scaled = pd.DataFrame(scaler.transform(X_val), index=X_val.index, columns=X_val.columns)
    return X_train_scaled, X_val_scaled

# ---------------------- Generate Expanding Folds ---------------------- #
def get_expanding_folds(X_df, Y_df, forecast_horizon, sequence_length, val_window_num_sequences, holdout_base):
    assert X_df.index.equals(Y_df.index)
    total_days = len(X_df)
    val_window = sequence_length + forecast_horizon + val_window_num_sequences - 1
    min_train_window = sequence_length + forecast_horizon + val_window_num_sequences - 1

    fold_starts = []
    i = min_train_window
    while i + val_window + holdout_base <= total_days:
        fold_starts.append(i)
        i += val_window

    folds = []
    for fold_start in fold_starts:
        train_end = fold_start
        val_start = fold_start
        val_end = fold_start + val_window

        X_train = X_df.iloc[:train_end].copy()
        Y_train = Y_df.iloc[:train_end].copy()
        X_val = X_df.iloc[val_start:val_end].copy()
        Y_val = Y_df.iloc[val_start:val_end].copy()

        folds.append({
            "X_train": X_train,
            "Y_train": Y_train,
            "X_val": X_val,
            "Y_val": Y_val,
            "fold_start": fold_start,
            "fold_end": val_end,
        })

    last_val_end = folds[-1]['fold_end']
    holdout_days = total_days - last_val_end
    print(f"[INFO] Generated {len(folds)} folds for forecast horizon {forecast_horizon}")
    return folds, last_val_end, holdout_days

# ---------------------- Optuna Objective ---------------------- #
def optuna_objective(trial, folds, forecast_horizon):
    hidden_dim = trial.suggest_int("hidden_dim", 32, 128)
    num_layers = trial.suggest_int("num_layers", 1, 2)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])

    total_f1 = 0
    for fold in folds:
        X_train_std, X_val_std = standardize_fold(fold["X_train"], fold["X_val"])
        X_train_seq, Y_train_seq = create_sequences(X_train_std, fold["Y_train"], sequence_length, forecast_horizon)
        X_val_seq, Y_val_seq = create_sequences(X_val_std, fold["Y_val"], sequence_length, forecast_horizon)

        X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32).to(device)
        Y_train_tensor = torch.tensor(Y_train_seq, dtype=torch.float32).view(-1, 1).to(device)
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        Y_val_tensor = torch.tensor(Y_val_seq, dtype=torch.float32).view(-1, 1).to(device)

        model = LSTMClassifier(X_train_seq.shape[2], hidden_dim, num_layers, 1, dropout).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        pos_weight_val = (Y_train_tensor == 0).sum() / (Y_train_tensor == 1).sum()
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_val)

        train_loader = DataLoader(TensorDataset(X_train_tensor, Y_train_tensor), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val_tensor, Y_val_tensor), batch_size=batch_size)

        best_f1 = 0
        patience_counter = 0
        for epoch in range(num_epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()

            model.eval()
            preds = []
            with torch.no_grad():
                for xb, _ in val_loader:
                    preds.append(torch.sigmoid(model(xb)))
            pred_tensor = torch.cat(preds, dim=0).squeeze()
            pred_class = (pred_tensor > 0.5).int()
            f1 = f1_score(Y_val_tensor.cpu(), pred_class.cpu())

            if f1 > best_f1:
                best_f1 = f1
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    break

        total_f1 += best_f1

    return -total_f1 / len(folds)  # Minimize negative F1

# ---------------------- Run Optuna for a Single Horizon ---------------------- #
def run_optuna_optimization(folds, forecast_horizon, n_trials=30):
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: optuna_objective(trial, folds, forecast_horizon), n_trials=n_trials)
    return study.best_params, -study.best_value

# ---------------------- Run Optimization for All Horizons ---------------------- #
def run_for_all_horizons(X_df, Y_df_dict):
    all_results = {}
    for h in forecast_horizons:
        print(f"\n=== Forecast Horizon: {h} ===")
        Y_df = Y_df_dict[h]
        X_shifted = shift_X_by_horizon(X_df, h)
        Y_aligned = Y_df.loc[X_shifted.index]
        X_final, Y_final = X_shifted, Y_aligned
        folds, _, _ = get_expanding_folds(X_final, Y_final, h, sequence_length, val_window_num_sequences, holdout_base)
        best_params, best_f1 = run_optuna_optimization(folds, h)
        all_results[h] = {"best_params": best_params, "best_f1": best_f1}
        print(f"[RESULT] Horizon {h}: Best Params = {best_params}, Best F1 = {best_f1:.4f}")
    return all_results

In [21]:
# Load data
X_df = pd.read_csv(r"X_df.csv", index_col=0, parse_dates=True)
Y_df_dict = {
    1: pd.read_csv(r"Y_df_change_dir_1.csv", index_col=0, parse_dates=True),
    5: pd.read_csv(r"Y_df_change_dir_5.csv", index_col=0, parse_dates=True),
    21: pd.read_csv(r"Y_df_change_dir_21.csv", index_col=0, parse_dates=True),
    63: pd.read_csv(r"Y_df_change_dir_63.csv", index_col=0, parse_dates=True),
    252: pd.read_csv(r"Y_df_change_dir_252.csv", index_col=0, parse_dates=True),
}

# Run optimization
results = run_for_all_horizons(X_df, Y_df_dict)

[I 2025-05-11 11:28:07,326] A new study created in memory with name: no-name-af171a09-e814-4e7e-be0d-3b412b82f251



=== Forecast Horizon: 1 ===
[INFO] Generated 8 folds for forecast horizon 1


[I 2025-05-11 11:28:36,273] Trial 0 finished with value: -0.4247086622213306 and parameters: {'hidden_dim': 98, 'num_layers': 1, 'dropout': 0.2276490796352143, 'learning_rate': 0.0004122960660443855, 'batch_size': 128}. Best is trial 0 with value: -0.4247086622213306.
[I 2025-05-11 11:29:32,680] Trial 1 finished with value: -0.4863466531338707 and parameters: {'hidden_dim': 126, 'num_layers': 2, 'dropout': 0.2895212610734162, 'learning_rate': 0.0035656209620512943, 'batch_size': 16}. Best is trial 1 with value: -0.4863466531338707.
[I 2025-05-11 11:29:59,235] Trial 2 finished with value: -0.47561975036998483 and parameters: {'hidden_dim': 69, 'num_layers': 2, 'dropout': 0.32774522966985487, 'learning_rate': 0.0002023610070712248, 'batch_size': 128}. Best is trial 1 with value: -0.4863466531338707.
[I 2025-05-11 11:30:30,935] Trial 3 finished with value: -0.45252246447470207 and parameters: {'hidden_dim': 70, 'num_layers': 2, 'dropout': 0.172800444770923, 'learning_rate': 0.000684207786

[RESULT] Horizon 1: Best Params = {'hidden_dim': 84, 'num_layers': 2, 'dropout': 0.45710576033956674, 'learning_rate': 0.004688535399076738, 'batch_size': 16}, Best F1 = 0.5549

=== Forecast Horizon: 5 ===
[INFO] Generated 8 folds for forecast horizon 5


[I 2025-05-11 11:51:53,612] Trial 0 finished with value: -0.32030847099713206 and parameters: {'hidden_dim': 69, 'num_layers': 2, 'dropout': 0.3148187575916974, 'learning_rate': 0.0004258480821630382, 'batch_size': 16}. Best is trial 0 with value: -0.32030847099713206.
[I 2025-05-11 11:53:18,725] Trial 1 finished with value: -0.5108031206681662 and parameters: {'hidden_dim': 110, 'num_layers': 2, 'dropout': 0.4595409993291274, 'learning_rate': 0.0013881241700925003, 'batch_size': 16}. Best is trial 1 with value: -0.5108031206681662.
[I 2025-05-11 11:54:06,179] Trial 2 finished with value: -0.4090906835667759 and parameters: {'hidden_dim': 54, 'num_layers': 1, 'dropout': 0.19127230712937734, 'learning_rate': 0.0027655499093746216, 'batch_size': 16}. Best is trial 1 with value: -0.5108031206681662.
[I 2025-05-11 11:54:52,791] Trial 3 finished with value: -0.40551460319060095 and parameters: {'hidden_dim': 32, 'num_layers': 2, 'dropout': 0.4940906514754819, 'learning_rate': 0.001408328538

[RESULT] Horizon 5: Best Params = {'hidden_dim': 92, 'num_layers': 1, 'dropout': 0.21557525139220088, 'learning_rate': 0.0004883533160474792, 'batch_size': 16}, Best F1 = 0.5483

=== Forecast Horizon: 21 ===
[INFO] Generated 8 folds for forecast horizon 21


[I 2025-05-11 12:16:11,188] Trial 0 finished with value: -0.44044651986851063 and parameters: {'hidden_dim': 91, 'num_layers': 1, 'dropout': 0.21806228453940268, 'learning_rate': 0.007851756185983515, 'batch_size': 16}. Best is trial 0 with value: -0.44044651986851063.
[I 2025-05-11 12:16:53,270] Trial 1 finished with value: -0.43664169096580063 and parameters: {'hidden_dim': 52, 'num_layers': 1, 'dropout': 0.16366168509839463, 'learning_rate': 0.0004151428371325672, 'batch_size': 16}. Best is trial 0 with value: -0.44044651986851063.
[I 2025-05-11 12:17:26,153] Trial 2 finished with value: -0.5485618193240784 and parameters: {'hidden_dim': 79, 'num_layers': 2, 'dropout': 0.41867948098018837, 'learning_rate': 0.00027072268062802376, 'batch_size': 64}. Best is trial 2 with value: -0.5485618193240784.
[I 2025-05-11 12:18:27,819] Trial 3 finished with value: -0.5671924723740861 and parameters: {'hidden_dim': 96, 'num_layers': 2, 'dropout': 0.45793954832430006, 'learning_rate': 0.000296655

[RESULT] Horizon 21: Best Params = {'hidden_dim': 87, 'num_layers': 2, 'dropout': 0.3618853403120836, 'learning_rate': 0.0005634761029617077, 'batch_size': 64}, Best F1 = 0.5954

=== Forecast Horizon: 63 ===
[INFO] Generated 7 folds for forecast horizon 63


[I 2025-05-11 12:29:56,397] Trial 0 finished with value: -0.4308276583330954 and parameters: {'hidden_dim': 93, 'num_layers': 2, 'dropout': 0.4828864432826748, 'learning_rate': 0.0003728019759414556, 'batch_size': 16}. Best is trial 0 with value: -0.4308276583330954.
[I 2025-05-11 12:30:25,684] Trial 1 finished with value: -0.48518948833402314 and parameters: {'hidden_dim': 58, 'num_layers': 1, 'dropout': 0.2869298427868413, 'learning_rate': 0.00203125102369817, 'batch_size': 16}. Best is trial 1 with value: -0.48518948833402314.
[I 2025-05-11 12:30:40,204] Trial 2 finished with value: -0.4362359558762244 and parameters: {'hidden_dim': 118, 'num_layers': 2, 'dropout': 0.3337429431787884, 'learning_rate': 0.005431819316835627, 'batch_size': 64}. Best is trial 1 with value: -0.48518948833402314.
[I 2025-05-11 12:30:49,395] Trial 3 finished with value: -0.5068014503068874 and parameters: {'hidden_dim': 78, 'num_layers': 2, 'dropout': 0.22938497689400014, 'learning_rate': 0.000991809578944

[RESULT] Horizon 63: Best Params = {'hidden_dim': 109, 'num_layers': 1, 'dropout': 0.39267957978771995, 'learning_rate': 0.00018813096222396772, 'batch_size': 128}, Best F1 = 0.5420

=== Forecast Horizon: 252 ===
[INFO] Generated 5 folds for forecast horizon 252


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-11 12:40:41,091] Trial 0 finished with value: -0.4166321445615794 and parameters: {'hidden_dim': 125, 'num_layers': 2, 'dropout': 0.3018016204054782, 'learning_rate': 0.00063778304976431, 'batch_size': 128}. Best is trial 0 with value: -0.4166321445615794.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  

[RESULT] Horizon 252: Best Params = {'hidden_dim': 125, 'num_layers': 2, 'dropout': 0.3018016204054782, 'learning_rate': 0.00063778304976431, 'batch_size': 128}, Best F1 = 0.4166


In [None]:
# ---------------------- Fold Debugging Script ---------------------- #
# Load feature data
X_df = pd.read_csv(r'C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Data Processing\Output\Independent\X_df.csv', index_col=0, parse_dates=True)
X_df.index = pd.to_datetime(X_df.index)

# Configs
sequence_length = 252
holdout_base = 756
forecast_horizons = [1, 5, 21, 63, 252]

# Run diagnostics for each forecast horizon
for h in forecast_horizons:
    print(f"\n=== Forecast Horizon: {h} ===")

    # Load directional labels for the current horizon
    Y_df = pd.read_csv(fr'C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Data Processing\Output\Dependent\Classification\Y_df_change_dir_{h}.csv', index_col=0, parse_dates=True)
    Y_df.index = pd.to_datetime(Y_df.index)

    # Shift and align
    X_shifted = shift_X_by_horizon(X_df, h)
    common_idx = X_shifted.index.intersection(Y_df.index)
    X_aligned = X_shifted.loc[common_idx]
    Y_aligned = Y_df.loc[common_idx]

    assert X_aligned.index.equals(Y_aligned.index), "Index mismatch after aligning!"

    # Generate folds
    val_window = val_window = sequence_length + h + 252 - 1  # or 1008 + forecast_horizon
    folds, last_val_end, holdout_days = get_expanding_folds(X_aligned, Y_aligned, h, sequence_length, val_window, holdout_base)

    # First fold shapes
    X_train, Y_train = folds[0]["X_train"], folds[0]["Y_train"]
    X_val, Y_val = folds[0]["X_val"], folds[0]["Y_val"]
    print(f"[DEBUG] First fold train X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"[DEBUG] First fold val X: {X_val.shape}, Y: {Y_val.shape}")

    # Standardize
    X_train_std, X_val_std = standardize_fold(X_train, X_val)

    # Sequences
    X_train_seq, Y_train_seq = create_sequences(X_train_std, Y_train, sequence_length, h)
    X_val_seq, Y_val_seq = create_sequences(X_val_std, Y_val, sequence_length, h)

    print(f"[DEBUG] Train sequences X: {X_train_seq.shape}, Y: {Y_train_seq.shape}")
    print(f"[DEBUG] Val sequences X: {X_val_seq.shape}, Y: {Y_val_seq.shape}")

    # Check correspondence of sequence and target alignment
    if len(X_train_seq) > 0:
        print(f"[INFO] First target index: {Y_train.index[sequence_length + h - 1]}")
        print(f"[INFO] Expected Y index from X: {X_train.index[0]} to {X_train.index[sequence_length - 1]}")

In [22]:
results

{1: {'best_params': {'hidden_dim': 84,
   'num_layers': 2,
   'dropout': 0.45710576033956674,
   'learning_rate': 0.004688535399076738,
   'batch_size': 16},
  'best_f1': 0.5548958508540118},
 5: {'best_params': {'hidden_dim': 92,
   'num_layers': 1,
   'dropout': 0.21557525139220088,
   'learning_rate': 0.0004883533160474792,
   'batch_size': 16},
  'best_f1': 0.5482964955974565},
 21: {'best_params': {'hidden_dim': 87,
   'num_layers': 2,
   'dropout': 0.3618853403120836,
   'learning_rate': 0.0005634761029617077,
   'batch_size': 64},
  'best_f1': 0.5953609544051803},
 63: {'best_params': {'hidden_dim': 109,
   'num_layers': 1,
   'dropout': 0.39267957978771995,
   'learning_rate': 0.00018813096222396772,
   'batch_size': 128},
  'best_f1': 0.5419828679402623},
 252: {'best_params': {'hidden_dim': 125,
   'num_layers': 2,
   'dropout': 0.3018016204054782,
   'learning_rate': 0.00063778304976431,
   'batch_size': 128},
  'best_f1': 0.4166321445615794}}

In [27]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

def evaluate_on_holdout(X_df, Y_df_dict, best_params_dict):
    holdout_results = {}
    for h in forecast_horizons:
        print(f"\n[TEST] Forecast Horizon: {h}")
        params = best_params_dict[h]['best_params']
        hidden_dim = int(params['hidden_dim'])
        num_layers = int(params['num_layers'])
        dropout = float(params['dropout'])
        learning_rate = float(params['learning_rate'])
        batch_size = int(params['batch_size'])

        Y_df = Y_df_dict[h]
        X_shifted = shift_X_by_horizon(X_df, h)
        Y_aligned = Y_df.loc[X_shifted.index]
        X_final, Y_final = X_shifted, Y_aligned

        folds, last_val_end, _ = get_expanding_folds(X_final, Y_final, h, sequence_length, val_window_num_sequences, holdout_base)
        X_train, Y_train = X_final.iloc[:last_val_end], Y_final.iloc[:last_val_end]
        X_test, Y_test = X_final.iloc[last_val_end:], Y_final.iloc[last_val_end:]

        print(f"[INFO] Holdout Label Distribution (0s/1s): {np.bincount(Y_test.values.astype(int).flatten())}")

        scaler = StandardScaler()
        X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
        X_test_scaled = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)

        X_train_seq, Y_train_seq = create_sequences(X_train_scaled, Y_train, sequence_length, h)
        X_test_seq, Y_test_seq = create_sequences(X_test_scaled, Y_test, sequence_length, h)

        X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32).to(device)
        Y_train_tensor = torch.tensor(Y_train_seq, dtype=torch.float32).to(device)
        X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32).to(device)
        Y_test_tensor = torch.tensor(Y_test_seq, dtype=torch.float32).to(device)

        model = LSTMClassifier(X_train_seq.shape[2], hidden_dim, num_layers, Y_train_seq.shape[1], dropout).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCEWithLogitsLoss()

        train_loader = DataLoader(TensorDataset(X_train_tensor, Y_train_tensor), batch_size=batch_size, shuffle=True)

        for epoch in range(num_epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()

        model.eval()
        preds = []
        with torch.no_grad():
            for xb in DataLoader(X_test_tensor, batch_size=batch_size):
                preds.append(torch.sigmoid(model(xb)))

        pred_tensor = torch.cat(preds, dim=0).squeeze()
        pred_bin = (pred_tensor > 0.5).int().cpu().numpy()
        y_true = Y_test_tensor.int().cpu().numpy()

        acc = accuracy_score(y_true, pred_bin)
        f1 = f1_score(y_true, pred_bin, average='macro')
        precision = precision_score(y_true, pred_bin, average='macro')
        recall = recall_score(y_true, pred_bin, average='macro')

        holdout_results[h] = {
            "accuracy": acc,
            "f1": f1,
            "precision": precision,
            "recall": recall
        }
        print(f"[RESULT] Horizon {h}: Accuracy = {acc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

    return holdout_results

In [28]:
holdout_results = evaluate_on_holdout(X_df, Y_df_dict, results)


[TEST] Forecast Horizon: 1
[INFO] Generated 8 folds for forecast horizon 1
[INFO] Holdout Label Distribution (0s/1s): [3620 2728]
[RESULT] Horizon 1: Accuracy = 0.6365, F1 = 0.4328, Precision = 0.4679, Recall = 0.4908

[TEST] Forecast Horizon: 5
[INFO] Generated 8 folds for forecast horizon 5
[INFO] Holdout Label Distribution (0s/1s): [2856 3252]
[RESULT] Horizon 5: Accuracy = 0.6063, F1 = 0.6063, Precision = 0.6099, Recall = 0.6098

[TEST] Forecast Horizon: 21
[INFO] Generated 8 folds for forecast horizon 21
[INFO] Holdout Label Distribution (0s/1s): [2072 3076]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[RESULT] Horizon 21: Accuracy = 0.9010, F1 = 0.4740, Precision = 0.4505, Recall = 0.5000

[TEST] Forecast Horizon: 63
[INFO] Generated 7 folds for forecast horizon 63
[INFO] Holdout Label Distribution (0s/1s): [1979 4045]
[RESULT] Horizon 63: Accuracy = 0.3101, F1 = 0.2759, Precision = 0.5606, Recall = 0.5159

[TEST] Forecast Horizon: 252
[INFO] Generated 5 folds for forecast horizon 252
[INFO] Holdout Label Distribution (0s/1s): [ 993 3885]
[RESULT] Horizon 252: Accuracy = 0.0839, F1 = 0.0774, Precision = 0.5000, Recall = 0.0419


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
