In [19]:
pip install pandas numpy scikit-learn tqdm torch geneticalgorithm matplotlib optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/

In [3]:
# ---------------------- Importing Packages ---------------------- #
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import optuna
import random

# ---------------------- Reproducibility ---------------------- #
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# Select CPU or GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------------------- Fold Configuration Settings ---------------------- #
val_window_num_sequences = 252
holdout_base = 756
forecast_horizons = [1, 5, 21, 63, 252]
num_epochs = 50
patience = 10

# ---------------------- LSTM Classifier ---------------------- #
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.0):
        super().__init__()
        effective_dropout = dropout if num_layers > 1 else 0.0
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=effective_dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

# ---------------------- Helper: Sequence Creation ---------------------- #
def create_sequences(X_df, Y_df, seq_len, forecast_horizon):
    X_df = X_df.dropna()
    Y_df = Y_df.dropna()
    common_idx = X_df.index.intersection(Y_df.index)
    X_df = X_df.loc[common_idx]
    Y_df = Y_df.loc[common_idx]

    X_arr = X_df.values.astype(np.float32)
    Y_arr = Y_df.values.astype(np.float32).reshape(-1, 1)

    X_seq, Y_seq = [], []
    max_i = len(X_arr) - seq_len - forecast_horizon + 1
    for i in range(max_i):
        X_seq.append(X_arr[i:i + seq_len])
        Y_seq.append(Y_arr[i + seq_len + forecast_horizon - 1])

    X_seq = np.array(X_seq)
    Y_seq = np.array(Y_seq)
    return X_seq, Y_seq

# ---------------------- Feature Shifting ---------------------- #
def shift_X_by_horizon(X_df, horizon):
    return X_df.shift(horizon).dropna()

# ---------------------- Standardization ---------------------- #
def standardize_fold(X_train, X_val):
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_val_scaled = pd.DataFrame(scaler.transform(X_val), index=X_val.index, columns=X_val.columns)
    return X_train_scaled, X_val_scaled

# ---------------------- Generate Expanding Folds ---------------------- #
def get_expanding_folds(X_df, Y_df, forecast_horizon, sequence_length, val_window_num_sequences, holdout_base):
    assert X_df.index.equals(Y_df.index)
    total_days = len(X_df)
    val_window = sequence_length + forecast_horizon + val_window_num_sequences - 1
    min_train_window = sequence_length + forecast_horizon + val_window_num_sequences - 1

    fold_starts = []
    i = min_train_window
    while i + val_window + holdout_base <= total_days:
        fold_starts.append(i)
        i += val_window

    folds = []
    for fold_start in fold_starts:
        train_end = fold_start
        val_start = fold_start
        val_end = fold_start + val_window

        X_train = X_df.iloc[:train_end].copy()
        Y_train = Y_df.iloc[:train_end].copy()
        X_val = X_df.iloc[val_start:val_end].copy()
        Y_val = Y_df.iloc[val_start:val_end].copy()

        folds.append({
            "X_train": X_train,
            "Y_train": Y_train,
            "X_val": X_val,
            "Y_val": Y_val,
            "fold_start": fold_start,
            "fold_end": val_end,
        })

    last_val_end = folds[-1]['fold_end']
    holdout_days = total_days - last_val_end
    print(f"[INFO] Generated {len(folds)} folds for forecast horizon {forecast_horizon}")
    return folds, last_val_end, holdout_days

# ---------------------- Optuna Objective (Sequence Only) ---------------------- #
def optuna_objective(trial, X_df, Y_df, forecast_horizon):
    sequence_len = trial.suggest_int("sequence_length", 126, 1512, step=63)

    # Regenerate folds based on sampled sequence_len
    folds, _, _ = get_expanding_folds(X_df, Y_df, forecast_horizon, sequence_len, val_window_num_sequences, holdout_base)

    total_f1 = 0
    for fold in folds:
        X_train_raw, X_val_raw = fold["X_train"], fold["X_val"]
        Y_train_raw, Y_val_raw = fold["Y_train"], fold["Y_val"]
        X_train_std, X_val_std = standardize_fold(X_train_raw, X_val_raw)

        X_train_seq, Y_train_seq = create_sequences(X_train_std, Y_train_raw, sequence_len, forecast_horizon)
        X_val_seq, Y_val_seq = create_sequences(X_val_std, Y_val_raw, sequence_len, forecast_horizon)

        if len(X_train_seq) == 0 or len(X_val_seq) == 0:
            continue

        X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32).to(device)
        Y_train_tensor = torch.tensor(Y_train_seq, dtype=torch.float32).view(-1, 1).to(device)
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        Y_val_tensor = torch.tensor(Y_val_seq, dtype=torch.float32).view(-1, 1).to(device)

        model = LSTMClassifier(input_dim=X_train_seq.shape[2], hidden_dim=64, num_layers=1, output_dim=1, dropout=0.2).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        pos_weight_val = (Y_train_tensor == 0).sum() / (Y_train_tensor == 1).sum()
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_val)

        train_loader = DataLoader(TensorDataset(X_train_tensor, Y_train_tensor), batch_size=64, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val_tensor, Y_val_tensor), batch_size=64)

        best_f1 = 0
        patience_counter = 0
        for epoch in range(num_epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()

            model.eval()
            preds = []
            with torch.no_grad():
                for xb, _ in val_loader:
                    preds.append(torch.sigmoid(model(xb)))
            pred_tensor = torch.cat(preds, dim=0).squeeze()
            pred_class = (pred_tensor > 0.5).int()
            f1 = f1_score(Y_val_tensor.cpu(), pred_class.cpu())

            if f1 > best_f1:
                best_f1 = f1
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    break

        total_f1 += best_f1

    return -total_f1 / len(folds)

# ---------------------- Run Optuna ---------------------- #
def run_optuna_optimization(X_df, Y_df, forecast_horizon, n_trials=30):
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: optuna_objective(trial, X_df, Y_df, forecast_horizon), n_trials=n_trials)
    return study.best_params, -study.best_value

# ---------------------- Run All Horizons ---------------------- #
def run_for_all_horizons(X_df, Y_df_dict):
    all_results = {}
    for h in forecast_horizons:
        print(f"\n=== Forecast Horizon: {h} ===")
        Y_df = Y_df_dict[h]
        X_shifted = shift_X_by_horizon(X_df, h)
        Y_aligned = Y_df.loc[X_shifted.index]
        X_final, Y_final = X_shifted, Y_aligned
        best_params, best_f1 = run_optuna_optimization(X_final, Y_final, h)
        all_results[h] = {"best_params": best_params, "best_f1": best_f1}
        print(f"[RESULT] Horizon {h}: Best Params = {best_params}, Best F1 = {best_f1:.4f}")
    return all_results


In [4]:
# Load data
X_df = pd.read_csv(r"X_df_filtered.csv", index_col=0, parse_dates=True)
Y_df_dict = {
    1: pd.read_csv(r"Y_df_change_dir_1.csv", index_col=0, parse_dates=True),
    5: pd.read_csv(r"Y_df_change_dir_5.csv", index_col=0, parse_dates=True),
    21: pd.read_csv(r"Y_df_change_dir_21.csv", index_col=0, parse_dates=True),
    63: pd.read_csv(r"Y_df_change_dir_63.csv", index_col=0, parse_dates=True),
    252: pd.read_csv(r"Y_df_change_dir_252.csv", index_col=0, parse_dates=True),
}

# Run optimization
results = run_for_all_horizons(X_df, Y_df_dict)

[I 2025-05-11 15:13:56,609] A new study created in memory with name: no-name-64794256-b17d-4262-8d63-66f87f1076eb



=== Forecast Horizon: 1 ===
[INFO] Generated 6 folds for forecast horizon 1


[I 2025-05-11 15:14:04,307] Trial 0 finished with value: -0.36302842733773466 and parameters: {'sequence_length': 378}. Best is trial 0 with value: -0.36302842733773466.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:14:07,000] Trial 1 finished with value: -0.27167630057803466 and parameters: {'sequence_length': 1134}. Best is trial 0 with value: -0.36302842733773466.


[INFO] Generated 11 folds for forecast horizon 1


[I 2025-05-11 15:14:22,044] Trial 2 finished with value: -0.4305042404081049 and parameters: {'sequence_length': 126}. Best is trial 2 with value: -0.4305042404081049.


[INFO] Generated 3 folds for forecast horizon 1


[I 2025-05-11 15:14:27,432] Trial 3 finished with value: -0.5412425345082007 and parameters: {'sequence_length': 819}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 5 folds for forecast horizon 1


[I 2025-05-11 15:14:33,645] Trial 4 finished with value: -0.4376490161029947 and parameters: {'sequence_length': 441}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 4 folds for forecast horizon 1


[I 2025-05-11 15:14:42,691] Trial 5 finished with value: -0.42522855241408297 and parameters: {'sequence_length': 630}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 9 folds for forecast horizon 1


[I 2025-05-11 15:14:55,124] Trial 6 finished with value: -0.46355671420252303 and parameters: {'sequence_length': 189}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 3 folds for forecast horizon 1


[I 2025-05-11 15:15:00,840] Trial 7 finished with value: -0.21766607877718988 and parameters: {'sequence_length': 882}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:02,917] Trial 8 finished with value: -0.5348341953148767 and parameters: {'sequence_length': 1134}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 6 folds for forecast horizon 1


[I 2025-05-11 15:15:10,295] Trial 9 finished with value: -0.4132200055640946 and parameters: {'sequence_length': 378}. Best is trial 3 with value: -0.5412425345082007.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:10,915] Trial 10 finished with value: -0.5688622754491018 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:11,557] Trial 11 finished with value: -0.5558739255014327 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:12,109] Trial 12 finished with value: -0.5621301775147929 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:12,602] Trial 13 finished with value: -0.5558739255014327 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:15,896] Trial 14 finished with value: -0.3520363681654004 and parameters: {'sequence_length': 1260}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:19,552] Trial 15 finished with value: -0.4317893637651973 and parameters: {'sequence_length': 1323}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:21,611] Trial 16 finished with value: -0.40583003952569174 and parameters: {'sequence_length': 1008}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:22,308] Trial 17 finished with value: -0.15517241379310345 and parameters: {'sequence_length': 1386}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:23,054] Trial 18 finished with value: 0.0 and parameters: {'sequence_length': 1386}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:25,110] Trial 19 finished with value: -0.554515050167224 and parameters: {'sequence_length': 1197}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:27,035] Trial 20 finished with value: -0.5462860310421287 and parameters: {'sequence_length': 1008}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:28,063] Trial 21 finished with value: -0.5558739255014327 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:28,862] Trial 22 finished with value: -0.5558739255014327 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:29,503] Trial 23 finished with value: -0.041237113402061855 and parameters: {'sequence_length': 1386}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:30,322] Trial 24 finished with value: -0.5558739255014327 and parameters: {'sequence_length': 1512}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:32,741] Trial 25 finished with value: -0.453288740245262 and parameters: {'sequence_length': 1323}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:36,194] Trial 26 finished with value: -0.26099706744868034 and parameters: {'sequence_length': 1008}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 1 folds for forecast horizon 1


[I 2025-05-11 15:15:36,754] Trial 27 finished with value: -0.5 and parameters: {'sequence_length': 1386}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:39,901] Trial 28 finished with value: -0.5631533101045296 and parameters: {'sequence_length': 1197}. Best is trial 10 with value: -0.5688622754491018.


[INFO] Generated 2 folds for forecast horizon 1


[I 2025-05-11 15:15:43,560] Trial 29 finished with value: -0.5545334434351775 and parameters: {'sequence_length': 1197}. Best is trial 10 with value: -0.5688622754491018.
[I 2025-05-11 15:15:43,564] A new study created in memory with name: no-name-779c9600-5750-49b8-8b6a-5b06989af37d


[RESULT] Horizon 1: Best Params = {'sequence_length': 1512}, Best F1 = 0.5689

=== Forecast Horizon: 5 ===
[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:15:45,595] Trial 0 finished with value: -0.6443678011322964 and parameters: {'sequence_length': 1008}. Best is trial 0 with value: -0.6443678011322964.


[INFO] Generated 1 folds for forecast horizon 5


[I 2025-05-11 15:15:46,314] Trial 1 finished with value: -0.5044510385756676 and parameters: {'sequence_length': 1449}. Best is trial 0 with value: -0.6443678011322964.


[INFO] Generated 9 folds for forecast horizon 5


[I 2025-05-11 15:15:54,555] Trial 2 finished with value: -0.49621763548671005 and parameters: {'sequence_length': 189}. Best is trial 0 with value: -0.6443678011322964.


[INFO] Generated 11 folds for forecast horizon 5


[I 2025-05-11 15:16:07,171] Trial 3 finished with value: -0.5358925296028464 and parameters: {'sequence_length': 126}. Best is trial 0 with value: -0.6443678011322964.


[INFO] Generated 5 folds for forecast horizon 5


[I 2025-05-11 15:16:14,191] Trial 4 finished with value: -0.5226267076236087 and parameters: {'sequence_length': 504}. Best is trial 0 with value: -0.6443678011322964.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:16:19,408] Trial 5 finished with value: -0.7076427083182962 and parameters: {'sequence_length': 945}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 8 folds for forecast horizon 5


[I 2025-05-11 15:16:28,288] Trial 6 finished with value: -0.5362521313095723 and parameters: {'sequence_length': 252}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:16:36,206] Trial 7 finished with value: -0.2965009208103131 and parameters: {'sequence_length': 882}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 5 folds for forecast horizon 5


[I 2025-05-11 15:16:43,375] Trial 8 finished with value: -0.4282004979362669 and parameters: {'sequence_length': 441}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 5 folds for forecast horizon 5


[I 2025-05-11 15:16:52,656] Trial 9 finished with value: -0.5139290818125092 and parameters: {'sequence_length': 504}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:16:56,163] Trial 10 finished with value: -0.6212090296970934 and parameters: {'sequence_length': 1323}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:16:57,668] Trial 11 finished with value: -0.30578512396694213 and parameters: {'sequence_length': 1008}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:00,307] Trial 12 finished with value: -0.5181881291218327 and parameters: {'sequence_length': 1071}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:17:03,998] Trial 13 finished with value: -0.650755279972013 and parameters: {'sequence_length': 756}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 4 folds for forecast horizon 5


[I 2025-05-11 15:17:14,140] Trial 14 finished with value: -0.5955053449951409 and parameters: {'sequence_length': 693}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 4 folds for forecast horizon 5


[I 2025-05-11 15:17:25,551] Trial 15 finished with value: -0.5384539480858044 and parameters: {'sequence_length': 693}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:29,489] Trial 16 finished with value: -0.6997971466186625 and parameters: {'sequence_length': 1197}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:34,459] Trial 17 finished with value: -0.48984594741052306 and parameters: {'sequence_length': 1260}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:37,723] Trial 18 finished with value: -0.6981554202062297 and parameters: {'sequence_length': 1197}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 1 folds for forecast horizon 5


[I 2025-05-11 15:17:39,000] Trial 19 finished with value: -0.5044510385756676 and parameters: {'sequence_length': 1449}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:17:46,571] Trial 20 finished with value: -0.455238728305214 and parameters: {'sequence_length': 882}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:50,808] Trial 21 finished with value: -0.5806926235808244 and parameters: {'sequence_length': 1134}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:53,330] Trial 22 finished with value: -0.6991008843869879 and parameters: {'sequence_length': 1197}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:17:56,360] Trial 23 finished with value: -0.5988087307496375 and parameters: {'sequence_length': 1323}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:18:03,897] Trial 24 finished with value: -0.7065499390377966 and parameters: {'sequence_length': 945}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:18:11,221] Trial 25 finished with value: -0.4427158551988752 and parameters: {'sequence_length': 945}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 3 folds for forecast horizon 5


[I 2025-05-11 15:18:15,631] Trial 26 finished with value: -0.6262519802215071 and parameters: {'sequence_length': 819}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:18:18,397] Trial 27 finished with value: -0.2581699346405229 and parameters: {'sequence_length': 1071}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 4 folds for forecast horizon 5


[I 2025-05-11 15:18:29,945] Trial 28 finished with value: -0.5772242414971529 and parameters: {'sequence_length': 693}. Best is trial 5 with value: -0.7076427083182962.


[INFO] Generated 2 folds for forecast horizon 5


[I 2025-05-11 15:18:32,628] Trial 29 finished with value: -0.6443678011322964 and parameters: {'sequence_length': 1008}. Best is trial 5 with value: -0.7076427083182962.
[I 2025-05-11 15:18:32,639] A new study created in memory with name: no-name-81bcd69f-940d-4208-82c3-9748730ccaae


[RESULT] Horizon 5: Best Params = {'sequence_length': 945}, Best F1 = 0.7076

=== Forecast Horizon: 21 ===
[INFO] Generated 6 folds for forecast horizon 21


[I 2025-05-11 15:18:44,209] Trial 0 finished with value: -0.3613831005546149 and parameters: {'sequence_length': 378}. Best is trial 0 with value: -0.3613831005546149.


[INFO] Generated 5 folds for forecast horizon 21


[I 2025-05-11 15:18:54,732] Trial 1 finished with value: -0.47414570524411914 and parameters: {'sequence_length': 441}. Best is trial 1 with value: -0.47414570524411914.


[INFO] Generated 3 folds for forecast horizon 21


[I 2025-05-11 15:19:01,348] Trial 2 finished with value: -0.6530734972458595 and parameters: {'sequence_length': 819}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:19:08,510] Trial 3 finished with value: -0.46643677625294155 and parameters: {'sequence_length': 1197}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 4 folds for forecast horizon 21


[I 2025-05-11 15:19:20,011] Trial 4 finished with value: -0.2912459294655248 and parameters: {'sequence_length': 630}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 4 folds for forecast horizon 21


[I 2025-05-11 15:19:26,319] Trial 5 finished with value: -0.09223300970873786 and parameters: {'sequence_length': 567}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 4 folds for forecast horizon 21


[I 2025-05-11 15:19:35,851] Trial 6 finished with value: -0.26062349229912785 and parameters: {'sequence_length': 630}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:19:38,948] Trial 7 finished with value: -0.1027135729559108 and parameters: {'sequence_length': 1071}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:19:41,665] Trial 8 finished with value: -0.4125874125874126 and parameters: {'sequence_length': 1134}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:19:45,115] Trial 9 finished with value: -0.18813697127838597 and parameters: {'sequence_length': 1260}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 1 folds for forecast horizon 21


[I 2025-05-11 15:19:45,925] Trial 10 finished with value: -0.43962848297213625 and parameters: {'sequence_length': 1512}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 11 folds for forecast horizon 21


[I 2025-05-11 15:20:00,382] Trial 11 finished with value: -0.5175817408445371 and parameters: {'sequence_length': 126}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 8 folds for forecast horizon 21


[I 2025-05-11 15:20:07,924] Trial 12 finished with value: -0.5045219620493387 and parameters: {'sequence_length': 252}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 11 folds for forecast horizon 21


[I 2025-05-11 15:20:24,693] Trial 13 finished with value: -0.6030690222130206 and parameters: {'sequence_length': 126}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 3 folds for forecast horizon 21


[I 2025-05-11 15:20:37,663] Trial 14 finished with value: -0.38947099412215697 and parameters: {'sequence_length': 882}. Best is trial 2 with value: -0.6530734972458595.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:20:41,142] Trial 15 finished with value: -0.6894307902372419 and parameters: {'sequence_length': 945}. Best is trial 15 with value: -0.6894307902372419.


[INFO] Generated 3 folds for forecast horizon 21


[I 2025-05-11 15:20:49,177] Trial 16 finished with value: -0.41223833504527124 and parameters: {'sequence_length': 882}. Best is trial 15 with value: -0.6894307902372419.


[INFO] Generated 3 folds for forecast horizon 21


[I 2025-05-11 15:20:57,060] Trial 17 finished with value: -0.5396552254236181 and parameters: {'sequence_length': 756}. Best is trial 15 with value: -0.6894307902372419.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:02,075] Trial 18 finished with value: 0.0 and parameters: {'sequence_length': 1008}. Best is trial 15 with value: -0.6894307902372419.


[INFO] Generated 1 folds for forecast horizon 21


[I 2025-05-11 15:21:02,719] Trial 19 finished with value: -0.33663366336633666 and parameters: {'sequence_length': 1449}. Best is trial 15 with value: -0.6894307902372419.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:08,858] Trial 20 finished with value: -0.8217762415349887 and parameters: {'sequence_length': 1323}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:14,440] Trial 21 finished with value: -0.7211862764101571 and parameters: {'sequence_length': 1323}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:18,460] Trial 22 finished with value: -0.8042855698931977 and parameters: {'sequence_length': 1323}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:25,171] Trial 23 finished with value: -0.7505588210944614 and parameters: {'sequence_length': 1323}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 1 folds for forecast horizon 21


[I 2025-05-11 15:21:26,589] Trial 24 finished with value: -0.4114285714285714 and parameters: {'sequence_length': 1386}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:30,337] Trial 25 finished with value: -0.8071842993862284 and parameters: {'sequence_length': 1323}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:33,661] Trial 26 finished with value: -0.4287878787878788 and parameters: {'sequence_length': 1197}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 1 folds for forecast horizon 21


[I 2025-05-11 15:21:34,119] Trial 27 finished with value: -0.33663366336633666 and parameters: {'sequence_length': 1449}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 1 folds for forecast horizon 21


[I 2025-05-11 15:21:35,923] Trial 28 finished with value: -0.47435897435897434 and parameters: {'sequence_length': 1512}. Best is trial 20 with value: -0.8217762415349887.


[INFO] Generated 2 folds for forecast horizon 21


[I 2025-05-11 15:21:37,882] Trial 29 finished with value: -0.07575757575757576 and parameters: {'sequence_length': 1071}. Best is trial 20 with value: -0.8217762415349887.
[I 2025-05-11 15:21:37,890] A new study created in memory with name: no-name-bc959923-cd68-4253-a5f1-5d0027cb0826


[RESULT] Horizon 21: Best Params = {'sequence_length': 1323}, Best F1 = 0.8218

=== Forecast Horizon: 63 ===
[INFO] Generated 8 folds for forecast horizon 63


[I 2025-05-11 15:21:45,511] Trial 0 finished with value: -0.3239519958184744 and parameters: {'sequence_length': 189}. Best is trial 0 with value: -0.3239519958184744.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:21:48,156] Trial 1 finished with value: -0.7216161710543734 and parameters: {'sequence_length': 882}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 1 folds for forecast horizon 63


[I 2025-05-11 15:21:48,854] Trial 2 finished with value: -0.6077348066298343 and parameters: {'sequence_length': 1323}. Best is trial 1 with value: -0.7216161710543734.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[INFO] Generated 3 folds for forecast horizon 63


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-11 15:21:55,055] Trial 3 finished with value: -0.3251329188068847 and parameters: {'sequence_length': 819}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 3 folds for forecast horizon 63


[I 2025-05-11 15:22:01,770] Trial 4 finished with value: -0.6338214704658115 and parameters: {'sequence_length': 756}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:22:04,257] Trial 5 finished with value: -0.695875063387424 and parameters: {'sequence_length': 882}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 9 folds for forecast horizon 63


[I 2025-05-11 15:22:13,292] Trial 6 finished with value: -0.6144638219194994 and parameters: {'sequence_length': 126}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 4 folds for forecast horizon 63


[I 2025-05-11 15:22:17,952] Trial 7 finished with value: -0.4697335732855009 and parameters: {'sequence_length': 504}. Best is trial 1 with value: -0.7216161710543734.


[INFO] Generated 5 folds for forecast horizon 63


[I 2025-05-11 15:22:26,068] Trial 8 finished with value: -0.7307860429318718 and parameters: {'sequence_length': 441}. Best is trial 8 with value: -0.7307860429318718.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:22:31,568] Trial 9 finished with value: -0.365168921215417 and parameters: {'sequence_length': 1197}. Best is trial 8 with value: -0.7307860429318718.


[INFO] Generated 5 folds for forecast horizon 63


[I 2025-05-11 15:22:37,716] Trial 10 finished with value: -0.7464539601877374 and parameters: {'sequence_length': 441}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 5 folds for forecast horizon 63


[I 2025-05-11 15:22:45,064] Trial 11 finished with value: -0.7109620969295426 and parameters: {'sequence_length': 441}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 4 folds for forecast horizon 63


[I 2025-05-11 15:22:49,355] Trial 12 finished with value: -0.42021259628049057 and parameters: {'sequence_length': 504}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 6 folds for forecast horizon 63


[I 2025-05-11 15:22:54,491] Trial 13 finished with value: -0.48691776958164174 and parameters: {'sequence_length': 315}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 4 folds for forecast horizon 63


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-11 15:23:05,069] Trial 14 finished with value: -0.37143263111241487 and parameters: {'sequence_length': 630}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:23:08,879] Trial 15 finished with value: -0.7217647058823529 and parameters: {'sequence_length': 1071}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 6 folds for forecast horizon 63


[I 2025-05-11 15:23:14,552] Trial 16 finished with value: -0.40679272696661006 and parameters: {'sequence_length': 315}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 4 folds for forecast horizon 63


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-11 15:23:23,347] Trial 17 finished with value: -0.3728085327955567 and parameters: {'sequence_length': 630}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 6 folds for forecast horizon 63


[I 2025-05-11 15:23:32,725] Trial 18 finished with value: -0.6021337164254955 and parameters: {'sequence_length': 315}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 1 folds for forecast horizon 63


[I 2025-05-11 15:23:35,120] Trial 19 finished with value: -0.2 and parameters: {'sequence_length': 1449}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 4 folds for forecast horizon 63


[I 2025-05-11 15:23:41,539] Trial 20 finished with value: -0.37143263111241487 and parameters: {'sequence_length': 630}. Best is trial 10 with value: -0.7464539601877374.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:23:45,872] Trial 21 finished with value: -0.8958935145851034 and parameters: {'sequence_length': 1134}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:23:48,504] Trial 22 finished with value: -0.16487475372924287 and parameters: {'sequence_length': 1008}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:23:52,559] Trial 23 finished with value: -0.7690841399851079 and parameters: {'sequence_length': 1071}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:23:56,794] Trial 24 finished with value: -0.8480519480519481 and parameters: {'sequence_length': 1134}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:24:02,531] Trial 25 finished with value: -0.3728303471444569 and parameters: {'sequence_length': 1197}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:24:05,246] Trial 26 finished with value: -0.7690841399851079 and parameters: {'sequence_length': 1071}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 1 folds for forecast horizon 63


[I 2025-05-11 15:24:05,694] Trial 27 finished with value: -0.5681818181818182 and parameters: {'sequence_length': 1512}. Best is trial 21 with value: -0.8958935145851034.


[INFO] Generated 2 folds for forecast horizon 63


[I 2025-05-11 15:24:10,885] Trial 28 finished with value: -0.9056603773584906 and parameters: {'sequence_length': 1260}. Best is trial 28 with value: -0.9056603773584906.


[INFO] Generated 1 folds for forecast horizon 63


[I 2025-05-11 15:24:11,705] Trial 29 finished with value: -0.6077348066298343 and parameters: {'sequence_length': 1323}. Best is trial 28 with value: -0.9056603773584906.
[I 2025-05-11 15:24:11,710] A new study created in memory with name: no-name-1254f035-4d12-472d-910c-d71caca60c2b


[RESULT] Horizon 63: Best Params = {'sequence_length': 1260}, Best F1 = 0.9057

=== Forecast Horizon: 252 ===
[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:12,297] Trial 0 finished with value: -0.656 and parameters: {'sequence_length': 1386}. Best is trial 0 with value: -0.656.


[INFO] Generated 5 folds for forecast horizon 252


[I 2025-05-11 15:24:16,941] Trial 1 finished with value: -0.5817908407382092 and parameters: {'sequence_length': 189}. Best is trial 0 with value: -0.656.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:19,759] Trial 2 finished with value: -0.3561253561253561 and parameters: {'sequence_length': 819}. Best is trial 0 with value: -0.656.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:20,503] Trial 3 finished with value: -0.30837004405286345 and parameters: {'sequence_length': 1386}. Best is trial 0 with value: -0.656.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:21,086] Trial 4 finished with value: 0.0 and parameters: {'sequence_length': 1134}. Best is trial 0 with value: -0.656.


[INFO] Generated 5 folds for forecast horizon 252


[I 2025-05-11 15:24:25,210] Trial 5 finished with value: -0.5746337322807911 and parameters: {'sequence_length': 189}. Best is trial 0 with value: -0.656.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:26,021] Trial 6 finished with value: -0.6976744186046512 and parameters: {'sequence_length': 1323}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 3 folds for forecast horizon 252


[I 2025-05-11 15:24:30,326] Trial 7 finished with value: -0.4878380706287683 and parameters: {'sequence_length': 504}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:33,745] Trial 8 finished with value: -0.3851063829787234 and parameters: {'sequence_length': 945}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:35,551] Trial 9 finished with value: -0.6608353808353808 and parameters: {'sequence_length': 693}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:36,222] Trial 10 finished with value: -0.6666666666666666 and parameters: {'sequence_length': 1512}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:37,768] Trial 11 finished with value: -0.32 and parameters: {'sequence_length': 1197}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:39,482] Trial 12 finished with value: -0.6666666666666666 and parameters: {'sequence_length': 1512}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:40,021] Trial 13 finished with value: -0.32 and parameters: {'sequence_length': 1197}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:40,885] Trial 14 finished with value: -0.6287425149700598 and parameters: {'sequence_length': 1512}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:43,360] Trial 15 finished with value: -0.5079681274900398 and parameters: {'sequence_length': 1008}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:44,193] Trial 16 finished with value: -0.6976744186046512 and parameters: {'sequence_length': 1323}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:44,872] Trial 17 finished with value: -0.6976744186046512 and parameters: {'sequence_length': 1323}. Best is trial 6 with value: -0.6976744186046512.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[INFO] Generated 3 folds for forecast horizon 252


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-11 15:24:47,650] Trial 18 finished with value: -0.5161702127659574 and parameters: {'sequence_length': 504}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:51,465] Trial 19 finished with value: -0.14954712522971908 and parameters: {'sequence_length': 1008}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:52,201] Trial 20 finished with value: -0.576271186440678 and parameters: {'sequence_length': 1260}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:52,914] Trial 21 finished with value: -0.6976744186046512 and parameters: {'sequence_length': 1323}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:53,728] Trial 22 finished with value: -0.2222222222222222 and parameters: {'sequence_length': 1071}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:54,326] Trial 23 finished with value: -0.6265060240963856 and parameters: {'sequence_length': 1386}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:24:56,692] Trial 24 finished with value: -0.013422818791946308 and parameters: {'sequence_length': 819}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:57,458] Trial 25 finished with value: -0.576271186440678 and parameters: {'sequence_length': 1260}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:57,914] Trial 26 finished with value: 0.0 and parameters: {'sequence_length': 1134}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:58,671] Trial 27 finished with value: 0.0 and parameters: {'sequence_length': 1386}. Best is trial 6 with value: -0.6976744186046512.


[INFO] Generated 1 folds for forecast horizon 252


[I 2025-05-11 15:24:59,449] Trial 28 finished with value: -0.6994818652849741 and parameters: {'sequence_length': 1323}. Best is trial 28 with value: -0.6994818652849741.


[INFO] Generated 2 folds for forecast horizon 252


[I 2025-05-11 15:25:02,114] Trial 29 finished with value: -0.5061449798639078 and parameters: {'sequence_length': 945}. Best is trial 28 with value: -0.6994818652849741.


[RESULT] Horizon 252: Best Params = {'sequence_length': 1323}, Best F1 = 0.6995


In [None]:
# ---------------------- Fold Debugging Script ---------------------- #
# Load feature data
X_df = pd.read_csv(r'C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Data Processing\Output\Independent\X_df.csv', index_col=0, parse_dates=True)
X_df.index = pd.to_datetime(X_df.index)

# Configs
sequence_length = 252
holdout_base = 756
forecast_horizons = [1, 5, 21, 63, 252]

# Run diagnostics for each forecast horizon
for h in forecast_horizons:
    print(f"\n=== Forecast Horizon: {h} ===")

    # Load directional labels for the current horizon
    Y_df = pd.read_csv(fr'C:\Users\azorb\PycharmProjects\Predicting the Yield Curve\Data Processing\Output\Dependent\Classification\Y_df_change_dir_{h}.csv', index_col=0, parse_dates=True)
    Y_df.index = pd.to_datetime(Y_df.index)

    # Shift and align
    X_shifted = shift_X_by_horizon(X_df, h)
    common_idx = X_shifted.index.intersection(Y_df.index)
    X_aligned = X_shifted.loc[common_idx]
    Y_aligned = Y_df.loc[common_idx]

    assert X_aligned.index.equals(Y_aligned.index), "Index mismatch after aligning!"

    # Generate folds
    val_window = val_window = sequence_length + h + 252 - 1  # or 1008 + forecast_horizon
    folds, last_val_end, holdout_days = get_expanding_folds(X_aligned, Y_aligned, h, sequence_length, val_window, holdout_base)

    # First fold shapes
    X_train, Y_train = folds[0]["X_train"], folds[0]["Y_train"]
    X_val, Y_val = folds[0]["X_val"], folds[0]["Y_val"]
    print(f"[DEBUG] First fold train X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"[DEBUG] First fold val X: {X_val.shape}, Y: {Y_val.shape}")

    # Standardize
    X_train_std, X_val_std = standardize_fold(X_train, X_val)

    # Sequences
    X_train_seq, Y_train_seq = create_sequences(X_train_std, Y_train, sequence_length, h)
    X_val_seq, Y_val_seq = create_sequences(X_val_std, Y_val, sequence_length, h)

    print(f"[DEBUG] Train sequences X: {X_train_seq.shape}, Y: {Y_train_seq.shape}")
    print(f"[DEBUG] Val sequences X: {X_val_seq.shape}, Y: {Y_val_seq.shape}")

    # Check correspondence of sequence and target alignment
    if len(X_train_seq) > 0:
        print(f"[INFO] First target index: {Y_train.index[sequence_length + h - 1]}")
        print(f"[INFO] Expected Y index from X: {X_train.index[0]} to {X_train.index[sequence_length - 1]}")

In [5]:
results

{1: {'best_params': {'sequence_length': 1512}, 'best_f1': 0.5688622754491018},
 5: {'best_params': {'sequence_length': 945}, 'best_f1': 0.7076427083182962},
 21: {'best_params': {'sequence_length': 1323}, 'best_f1': 0.8217762415349887},
 63: {'best_params': {'sequence_length': 1260}, 'best_f1': 0.9056603773584906},
 252: {'best_params': {'sequence_length': 1323},
  'best_f1': 0.6994818652849741}}

In [31]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import warnings

# ---------------------- Evaluate on Holdout Test Set ---------------------- #
def evaluate_on_holdout(X_df, Y_df_dict, best_params_dict):
    holdout_results = {}
    for h in forecast_horizons:
        print(f"\n[TEST] Forecast Horizon: {h}")
        params = best_params_dict[h]['best_params']
        hidden_dim = int(params['hidden_dim'])
        num_layers = int(params['num_layers'])
        dropout = float(params['dropout'])
        learning_rate = float(params['learning_rate'])
        batch_size = int(params['batch_size'])

        Y_df = Y_df_dict[h]
        X_shifted = shift_X_by_horizon(X_df, h)
        Y_aligned = Y_df.loc[X_shifted.index]
        X_final, Y_final = X_shifted, Y_aligned

        folds, last_val_end, _ = get_expanding_folds(X_final, Y_final, h, sequence_length, val_window_num_sequences, holdout_base)
        X_train, Y_train = X_final.iloc[:last_val_end], Y_final.iloc[:last_val_end]
        X_test, Y_test = X_final.iloc[last_val_end:], Y_final.iloc[last_val_end:]

        print(f"[INFO] Holdout Label Distribution (0s/1s): {np.bincount(Y_test.values.astype(int).flatten())}")

        scaler = StandardScaler()
        X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
        X_test_scaled = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)

        X_train_seq, Y_train_seq = create_sequences(X_train_scaled, Y_train, sequence_length, h)
        X_test_seq, Y_test_seq = create_sequences(X_test_scaled, Y_test, sequence_length, h)

        X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32).to(device)
        Y_train_tensor = torch.tensor(Y_train_seq, dtype=torch.float32).to(device)
        X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32).to(device)
        Y_test_tensor = torch.tensor(Y_test_seq, dtype=torch.float32).to(device)

        model = LSTMClassifier(X_train_seq.shape[2], hidden_dim, num_layers, Y_train_seq.shape[1], dropout).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCEWithLogitsLoss()

        train_loader = DataLoader(TensorDataset(X_train_tensor, Y_train_tensor), batch_size=batch_size, shuffle=True)

        for epoch in range(num_epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()

        model.eval()
        preds = []
        with torch.no_grad():
            for xb in DataLoader(X_test_tensor, batch_size=batch_size):
                preds.append(torch.sigmoid(model(xb)))

        pred_tensor = torch.cat(preds, dim=0).squeeze()
        pred_bin = (pred_tensor > 0.5).int().cpu().numpy()
        y_true = Y_test_tensor.int().cpu().numpy()

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            acc = accuracy_score(y_true, pred_bin)
            f1 = f1_score(y_true, pred_bin, average='macro', zero_division=0)
            precision = precision_score(y_true, pred_bin, average='macro', zero_division=0)
            recall = recall_score(y_true, pred_bin, average='macro', zero_division=0)

        holdout_results[h] = {
            "accuracy": acc,
            "f1": f1,
            "precision": precision,
            "recall": recall
        }
        print(f"[RESULT] Horizon {h}: Accuracy = {acc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

    return holdout_results

In [32]:
holdout_results = evaluate_on_holdout(X_df, Y_df_dict, results)


[TEST] Forecast Horizon: 1
[INFO] Generated 8 folds for forecast horizon 1
[INFO] Holdout Label Distribution (0s/1s): [3620 2728]
[RESULT] Horizon 1: Accuracy = 0.6092, F1 = 0.4465, Precision = 0.4627, Recall = 0.4816

[TEST] Forecast Horizon: 5
[INFO] Generated 8 folds for forecast horizon 5
[INFO] Holdout Label Distribution (0s/1s): [2856 3252]
[RESULT] Horizon 5: Accuracy = 0.5984, F1 = 0.5961, Precision = 0.6124, Recall = 0.6073

[TEST] Forecast Horizon: 21
[INFO] Generated 8 folds for forecast horizon 21
[INFO] Holdout Label Distribution (0s/1s): [2072 3076]
[RESULT] Horizon 21: Accuracy = 0.9010, F1 = 0.4740, Precision = 0.4505, Recall = 0.5000

[TEST] Forecast Horizon: 63
[INFO] Generated 7 folds for forecast horizon 63
[INFO] Holdout Label Distribution (0s/1s): [1979 4045]
[RESULT] Horizon 63: Accuracy = 0.6493, F1 = 0.6218, Precision = 0.6311, Recall = 0.6641

[TEST] Forecast Horizon: 252
[INFO] Generated 5 folds for forecast horizon 252
[INFO] Holdout Label Distribution (0s/