# 05a Hybrid Residual Models (GARCH + RNN on Residuals)

This notebook fits residual hybrids:
- GARCH(1,1)-t baseline variance forecast
- Residual target: `residual_var = sq_return - garch_cond_var`
- Train LSTM/GRU to predict residuals
- Final variance forecast: `garch_cond_var + predicted_residual`

Training is checkpointed split-by-split with resume support.


In [None]:
from __future__ import annotations

from pathlib import Path
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / "src").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.evaluation import evaluate_forecasts
from src.models.garch import add_garch_feature
from src.models.rnn import RNNTrainingConfig, run_rolling_experiment
from src.utils import set_seed

set_seed(42)
pd.set_option("display.max_columns", 120)


In [None]:
data_path = PROJECT_ROOT / "data" / "processed" / "sp500_log_returns.csv"
splits_path = PROJECT_ROOT / "data" / "processed" / "rolling_splits.csv"

base_df = pd.read_csv(data_path, parse_dates=["date"])
splits_df = pd.read_csv(
    splits_path,
    parse_dates=[
        "train_start_date",
        "train_end_date",
        "val_start_date",
        "val_end_date",
        "test_start_date",
        "test_end_date",
    ],
)

pred_dir = PROJECT_ROOT / "reports" / "predictions"
pred_dir.mkdir(parents=True, exist_ok=True)

# LSTM paths
lstm_raw_path = pred_dir / "hybrid_residual_lstm_raw_predictions.csv"
lstm_pred_path = pred_dir / "hybrid_residual_lstm_predictions.csv"
lstm_log_path = pred_dir / "hybrid_residual_lstm_train_logs.csv"
lstm_gate_path = pred_dir / "hybrid_residual_lstm_gate_values.csv"

# GRU paths
gru_raw_path = pred_dir / "hybrid_residual_gru_raw_predictions.csv"
gru_pred_path = pred_dir / "hybrid_residual_gru_predictions.csv"
gru_log_path = pred_dir / "hybrid_residual_gru_train_logs.csv"
gru_gate_path = pred_dir / "hybrid_residual_gru_gate_values.csv"

base_df.head()


In [None]:
# Build GARCH feature and residual target without lookahead.
residual_df = add_garch_feature(
    base_df,
    return_col="log_return",
    out_col="garch_cond_var",
    min_train_size=756,
    refit_every=21,
)
residual_df["residual_var"] = residual_df["sq_return"] - residual_df["garch_cond_var"]

residual_data_path = PROJECT_ROOT / "data" / "processed" / "sp500_log_returns_with_garch_residual.csv"
residual_df.to_csv(residual_data_path, index=False)
print(f"Saved residual dataset: {residual_data_path}")

residual_df[["date", "sq_return", "garch_cond_var", "residual_var"]].tail()


In [None]:
cfg = RNNTrainingConfig(
    lookback=21,
    hidden_units=8,
    dropout=0.10,
    learning_rate=1e-3,
    batch_size=64,
    epochs=35,
    patience=6,
    seed=42,
    scale_features=True,
    scale_target=True,
    target_transform="standardize",
    log_garch_features=True,
    eps=1e-8,
    force_linear_output=True,
)
cfg

print("target_transform:", cfg.target_transform)
if cfg.target_transform != "standardize":
    raise ValueError("05a requires target_transform='standardize' for residual_var.")


In [None]:
# 1) Train residual LSTM (linear output for signed residuals).
lstm_raw_pred, lstm_logs, lstm_gates, hybrid_resid_lstm_last_history = run_rolling_experiment(
    df=residual_df,
    splits_df=splits_df,
    architecture="lstm",
    variant="hybrid_residual",
    cfg=cfg,
    target_col="residual_var",
    output_activation="linear",
    verbose_fit=0,
    capture_gates=True,
    predictions_path=lstm_raw_path,
    train_logs_path=lstm_log_path,
    gates_path=lstm_gate_path,
    resume=False,
    collect_last_history=True,
)

# 2) Reconstruct final variance forecasts = garch baseline + residual forecast.
map_df = residual_df[["date", "sq_return", "garch_cond_var"]].copy()

lstm_pred = lstm_raw_pred.merge(map_df, on="date", how="left")
lstm_pred["y_true_residual"] = lstm_pred["y_true_var"]
lstm_pred["y_pred_residual"] = lstm_pred["y_pred_var"]
lstm_pred["y_true_var"] = lstm_pred["sq_return"]
lstm_pred["y_pred_var"] = np.clip(lstm_pred["garch_cond_var"] + lstm_pred["y_pred_residual"], 1e-12, None)
lstm_pred["variant"] = "hybrid_residual"
lstm_pred["architecture"] = "lstm"

lstm_pred = lstm_pred[[
    "date",
    "split_id",
    "variant",
    "architecture",
    "train_loss",
    "y_true_var",
    "y_pred_var",
    "garch_cond_var",
    "y_true_residual",
    "y_pred_residual",
]]

lstm_metrics = evaluate_forecasts(lstm_pred, group_cols=["variant", "architecture", "train_loss"])
lstm_metrics


In [None]:
# 1) Train residual GRU (linear output for signed residuals).
gru_raw_pred, gru_logs, gru_gates, hybrid_resid_gru_last_history = run_rolling_experiment(
    df=residual_df,
    splits_df=splits_df,
    architecture="gru",
    variant="hybrid_residual",
    cfg=cfg,
    target_col="residual_var",
    output_activation="linear",
    verbose_fit=0,
    capture_gates=True,
    predictions_path=gru_raw_path,
    train_logs_path=gru_log_path,
    gates_path=gru_gate_path,
    resume=False,
    collect_last_history=True,
)

# 2) Reconstruct final variance forecasts = garch baseline + residual forecast.
gru_pred = gru_raw_pred.merge(map_df, on="date", how="left")
gru_pred["y_true_residual"] = gru_pred["y_true_var"]
gru_pred["y_pred_residual"] = gru_pred["y_pred_var"]
gru_pred["y_true_var"] = gru_pred["sq_return"]
gru_pred["y_pred_var"] = np.clip(gru_pred["garch_cond_var"] + gru_pred["y_pred_residual"], 1e-12, None)
gru_pred["variant"] = "hybrid_residual"
gru_pred["architecture"] = "gru"

gru_pred = gru_pred[[
    "date",
    "split_id",
    "variant",
    "architecture",
    "train_loss",
    "y_true_var",
    "y_pred_var",
    "garch_cond_var",
    "y_true_residual",
    "y_pred_residual",
]]

gru_metrics = evaluate_forecasts(gru_pred, group_cols=["variant", "architecture", "train_loss"])
gru_metrics


In [None]:
# Save final reconstructed predictions and artifacts.
lstm_pred.to_csv(lstm_pred_path, index=False)
gru_pred.to_csv(gru_pred_path, index=False)

lstm_logs.to_csv(lstm_log_path, index=False)
gru_logs.to_csv(gru_log_path, index=False)

lstm_gates.to_csv(lstm_gate_path, index=False)
gru_gates.to_csv(gru_gate_path, index=False)

print(f"Saved residual LSTM raw predictions: {lstm_raw_path}")
print(f"Saved residual GRU raw predictions: {gru_raw_path}")
print(f"Saved residual LSTM final predictions: {lstm_pred_path}")
print(f"Saved residual GRU final predictions: {gru_pred_path}")
print(f"Saved residual gate files: {lstm_gate_path}, {gru_gate_path}")


In [None]:
# Overfitting diagnostics: train vs validation loss across rolling splits.
log_frames = []
for model_name, logs in [("hybrid_residual_lstm", lstm_logs), ("hybrid_residual_gru", gru_logs)]:
    d = logs.copy()
    d["model_name"] = model_name
    log_frames.append(d)

log_plot = pd.concat(log_frames, ignore_index=True)
required_cols = {
    "split_id",
    "model_name",
    "best_train_loss",
    "best_val_loss",
    "best_gap_val_minus_train",
}
missing = sorted(required_cols.difference(log_plot.columns))
if missing:
    print(
        "Train log is missing aggregate diagnostics columns. "
        "Using last trained split histories instead. "
        f"Missing: {missing}"
    )
else:
    log_plot = log_plot.sort_values(["model_name", "split_id"]).reset_index(drop=True)

    fig, axes = plt.subplots(2, 1, figsize=(14, 9), sharex=True)

    for model_name, d in log_plot.groupby("model_name"):
        axes[0].plot(d["split_id"], d["best_train_loss"], linewidth=1.1, label=f"{model_name} train")
        axes[0].plot(d["split_id"], d["best_val_loss"], linewidth=1.1, linestyle="--", label=f"{model_name} val")

    axes[0].set_ylabel("Loss")
    axes[0].set_title("Best Train/Val Loss by Rolling Split")
    axes[0].grid(alpha=0.2)
    axes[0].legend(ncol=2)

    for model_name, d in log_plot.groupby("model_name"):
        axes[1].plot(d["split_id"], d["best_gap_val_minus_train"], linewidth=1.2, label=model_name)

    axes[1].axhline(0.0, color="gray", linestyle="--", linewidth=1.0)
    axes[1].set_xlabel("Split ID")
    axes[1].set_ylabel("Gap")
    axes[1].set_title("Overfit Gap by Rolling Split (Best Epoch)")
    axes[1].grid(alpha=0.2)
    axes[1].legend()

    plt.tight_layout()
    plt.show()

for model_name, hist in [("hybrid_residual_lstm", hybrid_resid_lstm_last_history), ("hybrid_residual_gru", hybrid_resid_gru_last_history)]:
    if hist is None:
        print(
            f"No last split history for {model_name}. "
            "If resume=True and no new split was trained, set resume=False (or clear outputs) and rerun."
        )
        continue

    hist_df = pd.DataFrame(
        {
            "epoch": range(1, len(hist["loss"]) + 1),
            "train_loss": hist["loss"],
            "val_loss": hist["val_loss"],
        }
    )
    fig, ax = plt.subplots(figsize=(9, 5))
    ax.plot(hist_df["epoch"], hist_df["train_loss"], label="Train Loss", linewidth=1.4)
    ax.plot(hist_df["epoch"], hist_df["val_loss"], label="Validation Loss", linewidth=1.4)
    ax.set_title(f"{model_name} Last Split Loss Curves (split_id={hist['split_id']})")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.grid(alpha=0.2)
    ax.legend()
    plt.tight_layout()
    plt.show()
