# 05 Hybrid Models (GARCH + RNN)

This notebook trains both hybrid architectures with `MSE` training loss:
- Hybrid LSTM
- Hybrid GRU

Evaluation remains on both `MSE` and `QLIKE`.
Hybrid inputs are Pure-RNN features plus `garch_cond_var` from rolling GARCH(1,1)-t forecasts.
Training checkpoints predictions/logs/gates after each split and supports resume.


In [None]:
from __future__ import annotations

from pathlib import Path
import sys

import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / "src").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.evaluation import evaluate_forecasts
from src.models.garch import add_garch_feature
from src.models.rnn import RNNTrainingConfig, run_rolling_experiment
from src.utils import set_seed

set_seed(42)
pd.set_option("display.max_columns", 100)


In [None]:
data_path = PROJECT_ROOT / "data" / "processed" / "sp500_log_returns.csv"
splits_path = PROJECT_ROOT / "data" / "processed" / "rolling_splits.csv"

base_df = pd.read_csv(data_path, parse_dates=["date"])
splits_df = pd.read_csv(
    splits_path,
    parse_dates=[
        "train_start_date",
        "train_end_date",
        "val_start_date",
        "val_end_date",
        "test_start_date",
        "test_end_date",
    ],
)

pred_dir = PROJECT_ROOT / "reports" / "predictions"
pred_dir.mkdir(parents=True, exist_ok=True)

lstm_pred_path = pred_dir / "hybrid_lstm_predictions.csv"
lstm_log_path = pred_dir / "hybrid_lstm_train_logs.csv"
lstm_gate_path = pred_dir / "hybrid_lstm_gate_values.csv"

gru_pred_path = pred_dir / "hybrid_gru_predictions.csv"
gru_log_path = pred_dir / "hybrid_gru_train_logs.csv"
gru_gate_path = pred_dir / "hybrid_gru_gate_values.csv"

base_df.head()


In [None]:
# Rolling GARCH forecasts are computed without lookahead and added as a feature.
hybrid_df = add_garch_feature(
    base_df,
    return_col="log_return",
    out_col="garch_cond_var",
    min_train_size=756,
    refit_every=21,
)

hybrid_data_path = PROJECT_ROOT / "data" / "processed" / "sp500_log_returns_with_garch.csv"
hybrid_df.to_csv(hybrid_data_path, index=False)
print(f"Saved hybrid feature dataset: {hybrid_data_path}")

hybrid_df[["date", "log_return", "garch_cond_var"]].tail()


In [None]:
cfg = RNNTrainingConfig(
    lookback=21,
    hidden_units=8,
    dropout=0.10,
    learning_rate=1e-3,
    batch_size=64,
    epochs=35,
    patience=6,
    seed=42,
)
cfg


In [None]:
hybrid_lstm_pred, hybrid_lstm_logs, hybrid_lstm_gates, hybrid_lstm_last_history = run_rolling_experiment(
    df=hybrid_df,
    splits_df=splits_df,
    architecture="lstm",
    variant="hybrid",
    cfg=cfg,
    verbose_fit=0,
    capture_gates=True,
    predictions_path=lstm_pred_path,
    train_logs_path=lstm_log_path,
    gates_path=lstm_gate_path,
    resume=False,
    collect_last_history=True,
)

hybrid_lstm_metrics = evaluate_forecasts(
    hybrid_lstm_pred,
    group_cols=["variant", "architecture", "train_loss"],
)
hybrid_lstm_metrics


In [None]:
hybrid_gru_pred, hybrid_gru_logs, hybrid_gru_gates, hybrid_gru_last_history = run_rolling_experiment(
    df=hybrid_df,
    splits_df=splits_df,
    architecture="gru",
    variant="hybrid",
    cfg=cfg,
    verbose_fit=0,
    capture_gates=True,
    predictions_path=gru_pred_path,
    train_logs_path=gru_log_path,
    gates_path=gru_gate_path,
    resume=False,
    collect_last_history=True,
)

hybrid_gru_metrics = evaluate_forecasts(
    hybrid_gru_pred,
    group_cols=["variant", "architecture", "train_loss"],
)
hybrid_gru_metrics


In [None]:
print("Saved hybrid prediction, training log, and gate-value files in reports/predictions")
print(f"LSTM files: {lstm_pred_path}, {lstm_log_path}, {lstm_gate_path}")
print(f"GRU files: {gru_pred_path}, {gru_log_path}, {gru_gate_path}")


In [None]:
# Overfitting diagnostics: train vs validation loss across rolling splits.
log_frames = []
for model_name, logs in [("hybrid_lstm", hybrid_lstm_logs), ("hybrid_gru", hybrid_gru_logs)]:
    d = logs.copy()
    d["model_name"] = model_name
    log_frames.append(d)

log_plot = pd.concat(log_frames, ignore_index=True)
required_cols = {
    "split_id",
    "model_name",
    "best_train_loss",
    "best_val_loss",
    "best_gap_val_minus_train",
}
missing = sorted(required_cols.difference(log_plot.columns))
if missing:
    print(
        "Train log is missing aggregate diagnostics columns. "
        "Using last trained split histories instead. "
        f"Missing: {missing}"
    )
else:
    log_plot = log_plot.sort_values(["model_name", "split_id"]).reset_index(drop=True)

    fig, axes = plt.subplots(2, 1, figsize=(14, 9), sharex=True)

    for model_name, d in log_plot.groupby("model_name"):
        axes[0].plot(d["split_id"], d["best_train_loss"], linewidth=1.1, label=f"{model_name} train")
        axes[0].plot(d["split_id"], d["best_val_loss"], linewidth=1.1, linestyle="--", label=f"{model_name} val")

    axes[0].set_ylabel("Loss")
    axes[0].set_title("Best Train/Val Loss by Rolling Split")
    axes[0].grid(alpha=0.2)
    axes[0].legend(ncol=2)

    for model_name, d in log_plot.groupby("model_name"):
        axes[1].plot(d["split_id"], d["best_gap_val_minus_train"], linewidth=1.2, label=model_name)

    axes[1].axhline(0.0, color="gray", linestyle="--", linewidth=1.0)
    axes[1].set_xlabel("Split ID")
    axes[1].set_ylabel("Gap")
    axes[1].set_title("Overfit Gap by Rolling Split (Best Epoch)")
    axes[1].grid(alpha=0.2)
    axes[1].legend()

    plt.tight_layout()
    plt.show()

for model_name, hist in [("hybrid_lstm", hybrid_lstm_last_history), ("hybrid_gru", hybrid_gru_last_history)]:
    if hist is None:
        print(
            f"No last split history for {model_name}. "
            "If resume=True and no new split was trained, set resume=False (or clear outputs) and rerun."
        )
        continue

    hist_df = pd.DataFrame(
        {
            "epoch": range(1, len(hist["loss"]) + 1),
            "train_loss": hist["loss"],
            "val_loss": hist["val_loss"],
        }
    )
    fig, ax = plt.subplots(figsize=(9, 5))
    ax.plot(hist_df["epoch"], hist_df["train_loss"], label="Train Loss", linewidth=1.4)
    ax.plot(hist_df["epoch"], hist_df["val_loss"], label="Validation Loss", linewidth=1.4)
    ax.set_title(f"{model_name} Last Split Loss Curves (split_id={hist['split_id']})")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.grid(alpha=0.2)
    ax.legend()
    plt.tight_layout()
    plt.show()
