# Deep Learning Models for Opening Returns Prediction

This notebook demonstrates:
1. **Temporal Convolutional Network (TCN)** - Causal convolutions for time series
2. **GRU with Attention** - Recurrent network with attention mechanism
3. **Model Comparison** - Compare deep learning models with traditional ML baselines

**Features Used:**
- Short-term daily momentum (1d, 10d, 20d) - properly lagged
- HAR-style realized volatility (1d, 5d)
- Garman-Klass volatility (5d, 10d, 20d)
- Volatility ratios (short/long term)
- Previous session levels (high/low/close distances)
- Overnight returns, VWAP distance, intraday range
- Target time returns and volumes
- Cross-asset correlations

**Target**: Returns during the opening period (session returns)

**Deep Learning Approach**:
- Uses sliding windows of historical features (default: 20 days lookback)
- Temporal models capture sequential dependencies
- Automatic feature extraction through neural network layers

## Setup and Imports

In [None]:
from __future__ import annotations
import sys
from pathlib import Path
from datetime import time, timedelta
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# CTAFlow imports
from CTAFlow.models.intraday_momentum import IntradayMomentum
from CTAFlow.models.base_models import CTALight
from CTAFlow.models.deep_learning import (
    TCNRegressor,
    GRUAttnRegressor,
    fit,
    evaluate,
    convert_IM,
    TrainConfig,
    default_regression_metrics
)
from CTAFlow.data import read_exported_df

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✓ Using device: {device}")
print(f"✓ PyTorch version: {torch.__version__}")
print(f"✓ Imports successful")

## Configuration

In [None]:
# =============================================================================
# SESSION CONFIGURATION
# =============================================================================

# Session timing parameters
SESSION_START = time(hour=8, minute=30)
SESSION_END = time(hour=15, minute=0)
CLOSING_LENGTH = timedelta(minutes=60)
TIMEZONE = "America/New_York"

# Feature engineering time parameters
KEY_RETURN_TIMES = [time(9, 30), time(14, 0)]
OPENING_VOL_PERIOD = timedelta(minutes=60)
TARGET_PERIOD = timedelta(hours=6, minutes=30)  # Full session

# Deep learning specific configuration
LOOKBACK_PERIOD = 20  # Number of days to look back for temporal models
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 2e-3
USE_GPU = torch.cuda.is_available()

# Data split
TRAIN_SPLIT = 0.7  # 70% train
VAL_SPLIT = 0.15   # 15% validation
TEST_SPLIT = 0.15  # 15% test

print("="*70)
print("CONFIGURATION")
print("="*70)
print(f"Session: {SESSION_START} to {SESSION_END}")
print(f"Lookback period: {LOOKBACK_PERIOD} days")
print(f"Batch size: {BATCH_SIZE}")
print(f"Epochs: {EPOCHS}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Device: {device}")
print(f"Data split: {TRAIN_SPLIT:.0%} train / {VAL_SPLIT:.0%} val / {TEST_SPLIT:.0%} test")
print("="*70)

## Load Data

In [None]:
# Specify your ticker and data path
TICKER = "ES"  # Change to your ticker
DATA_PATH = Path("path/to/your/data")  # Update this path

# Load intraday data
csv_path = DATA_PATH / f"{TICKER}_5min.csv"
print(f"Loading {TICKER} from {csv_path}")

intraday_data = read_exported_df(csv_path)

print(f"\n✓ Loaded {len(intraday_data):,} bars")
print(f"✓ Date range: {intraday_data.index[0].date()} to {intraday_data.index[-1].date()}")
print(f"\nData preview:")
intraday_data.head()

In [None]:
# Optional: Load supplementary data for cross-asset features
# Uncomment and modify as needed

# SUPPLEMENTARY_TICKERS = ["NQ", "YM"]  # Related instruments
# supplementary_data = {
#     ticker: read_exported_df(DATA_PATH / f"{ticker}_5min.csv")
#     for ticker in SUPPLEMENTARY_TICKERS
# }
# print(f"✓ Loaded {len(supplementary_data)} supplementary instruments")

supplementary_data = None  # Set to None if not using

## Helper Functions

In [None]:
def prepare_daily_data(intraday_df: pd.DataFrame) -> pd.DataFrame:
    """Create daily OHLC data from intraday bars."""
    if not isinstance(intraday_df.index, pd.DatetimeIndex):
        intraday_df.index = pd.to_datetime(intraday_df.index)

    daily = intraday_df.resample('1D').agg({
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last',
        'Volume': 'sum'
    }).dropna()

    return daily


def build_features(model, daily_df, intraday_data):
    """Build comprehensive feature set for deep learning models.
    
    Creates the same features as the tree-based models notebook,
    but formatted for temporal sequence modeling.
    """
    model.training_data = pd.DataFrame(index=daily_df.index)

    # Daily momentum features (lagged)
    model.add_daily_momentum_features(daily_df, lookbacks=(1, 10, 20))

    # HAR volatility features
    model.har_volatility_features(intraday_df=intraday_data, horizons=(1, 5))

    # Opening range volatility
    model.opening_range_volatility(
        intraday_df=intraday_data,
        period_length=OPENING_VOL_PERIOD,
    )

    # Garman-Klass volatility
    model.gk_vol(intraday_df=intraday_data, lookbacks=(5, 10, 20), add_as_feature=True)

    # Volatility ratios
    model.vol_ratios(
        intraday_df=intraday_data,
        short_long_pairs=((5, 20), (10, 60)),
        add_as_feature=True
    )

    # Session end start time for features
    session_end_start = KEY_RETURN_TIMES[-1]

    # Previous session levels
    model.prev_hl(target_time=session_end_start, intraday_df=intraday_data, add_as_feature=True)

    # Overnight returns
    model.overnight_returns(intraday_df=intraday_data, add_as_feature=True)

    # VWAP distance
    model.vwap_distance(target_time=KEY_RETURN_TIMES, intraday_df=intraday_data, add_as_feature=True)

    # Intraday range
    model.intraday_range(
        target_time=session_end_start,
        intraday_df=intraday_data,
        lookback_days=20,
        add_as_feature=True
    )

    # Target time returns
    model.target_time_returns(
        session_end_start,
        period_length=TARGET_PERIOD,
        add_as_feature=True
    )

    model.target_time_returns(
        KEY_RETURN_TIMES,
        period_length=timedelta(minutes=60),
        add_as_feature=True
    )

    # Target volumes
    model.target_time_volume(
        session_end_start,
        period_length=TARGET_PERIOD,
        add_as_feature=True
    )

    model.target_time_volume(
        KEY_RETURN_TIMES,
        period_length=timedelta(minutes=60),
        add_as_feature=True
    )

    # Bid-ask imbalance (with proxy)
    try:
        model.bid_ask_volume_imbalance(
            target_time=session_end_start,
            intraday_df=intraday_data,
            use_proxy=True,
            add_as_feature=True
        )
    except KeyError:
        print("Note: Bid/ask imbalance features skipped")

    print(f"\n✓ Feature engineering complete")
    print(f"  Total features: {len(model.feature_names)}")
    print(f"  Training data shape: {model.training_data.shape}")
    return model.training_data


print("✓ Helper functions defined")

## Prepare Data for Deep Learning

In [None]:
# Initialize IntradayMomentum model
model = IntradayMomentum(
    intraday_data=intraday_data,
    session_open=SESSION_START,
    session_end=SESSION_END,
    closing_length=CLOSING_LENGTH,
    tz=TIMEZONE,
    base_model=CTALight,
    task='regression',
    supplementary_intraday_data=supplementary_data,
    use_gpu=False  # Not needed for feature engineering
)

# Use pre-calculated target returns
target_returns = model.target_data

# Prepare daily data
daily_df = prepare_daily_data(intraday_data)

print(f"✓ Daily data: {len(daily_df)} days")
print(f"✓ Target returns: {len(target_returns)} days")
print(f"\nTarget Statistics:")
print(f"  Mean: {target_returns.mean():.6f}")
print(f"  Std:  {target_returns.std():.6f}")
print(f"  Min:  {target_returns.min():.6f}")
print(f"  Max:  {target_returns.max():.6f}")

In [None]:
# Build features
X = build_features(model, daily_df, intraday_data)

# Align features and target
common_idx = X.index.intersection(target_returns.index)
X = X.loc[common_idx]
y = target_returns.loc[common_idx]

print(f"\n✓ Aligned data: {len(X)} samples, {X.shape[1]} features")

In [None]:
# Convert to window datasets for deep learning
# This creates sliding windows of historical data
(train_ds, train_dl), (val_ds, val_dl) = convert_IM(
    model,
    lookback_period=LOOKBACK_PERIOD,
    batch_size=BATCH_SIZE,
    val_split=True,
    val_size=VAL_SPLIT / (TRAIN_SPLIT + VAL_SPLIT)  # Adjust for train+val only
)

# Create separate test set
test_idx = int(len(X) * (TRAIN_SPLIT + VAL_SPLIT))
from CTAFlow.data import make_window_dataset

test_ds, test_dl = make_window_dataset(
    X.iloc[test_idx:],
    y.iloc[test_idx:],
    lookback=LOOKBACK_PERIOD,
    batch_size=BATCH_SIZE
)

print(f"\n✓ Dataset creation complete")
print(f"  Train batches: {len(train_dl)}")
print(f"  Val batches:   {len(val_dl)}")
print(f"  Test batches:  {len(test_dl)}")
print(f"  Input shape per sample: (features={X.shape[1]}, lookback={LOOKBACK_PERIOD})")

## Model 1: Temporal Convolutional Network (TCN)

TCN uses causal (non-leaking) convolutions with dilations to capture long-range dependencies.
- **Pros**: Parallelizable, stable gradients, long effective receptive field
- **Cons**: Less interpretable than tree models, requires more data

In [None]:
print("="*70)
print("MODEL 1: TEMPORAL CONVOLUTIONAL NETWORK (TCN)")
print("="*70)

# Create TCN model
tcn_model = TCNRegressor(
    in_channels=X.shape[1],  # Number of features
    channels=(64, 64, 64),   # Hidden layer sizes
    kernel_size=3,
    dropout=0.2
)

print(f"\nModel architecture:")
print(tcn_model)
print(f"\nTotal parameters: {sum(p.numel() for p in tcn_model.parameters()):,}")

In [None]:
# Training configuration
tcn_config = TrainConfig(
    epochs=EPOCHS,
    lr=LEARNING_RATE,
    weight_decay=1e-3,
    grad_clip=1.0,
    use_amp=True,  # Automatic mixed precision for faster training
    device=device,
    log_every=10,
    early_stop_patience=20,
    early_stop_min_delta=1e-5,
    scheduler="plateau",
    plateau_patience=5,
    plateau_factor=0.5
)

# Loss function
loss_fn = nn.HuberLoss(delta=1.0)  # Robust to outliers

print("\nTraining TCN model...")
print(f"Configuration: {tcn_config}")

# Train model
tcn_model, tcn_history = fit(
    model=tcn_model,
    train_loader=train_dl,
    val_loader=val_dl,
    loss_fn=loss_fn,
    cfg=tcn_config,
    metrics_fn=default_regression_metrics
)

print("\n✓ TCN training complete")

In [None]:
# Evaluate TCN on test set
tcn_metrics = evaluate(
    model=tcn_model,
    loader=test_dl,
    loss_fn=loss_fn,
    device=device,
    metrics_fn=default_regression_metrics,
    use_amp=tcn_config.use_amp
)

print("\nTCN Test Metrics:")
print(f"  Loss (Huber): {tcn_metrics['loss']:.6f}")
print(f"  MSE:          {tcn_metrics['mse']:.6f}")
print(f"  MAE:          {tcn_metrics['mae']:.6f}")
print(f"  Correlation:  {tcn_metrics['corr']:.4f}")
print(f"  Dir Accuracy: {tcn_metrics['dir_acc']:.2%}")

## Model 2: GRU with Attention

GRU (Gated Recurrent Unit) with attention mechanism to focus on important time steps.
- **Pros**: Captures sequential dependencies, attention provides interpretability
- **Cons**: Sequential processing (slower), can have vanishing gradients

In [None]:
print("="*70)
print("MODEL 2: GRU WITH ATTENTION")
print("="*70)

# Create GRU model
gru_model = GRUAttnRegressor(
    in_channels=X.shape[1],  # Number of features
    hidden=128,              # Hidden state size
    layers=2,                # Number of GRU layers
    dropout=0.2
)

print(f"\nModel architecture:")
print(gru_model)
print(f"\nTotal parameters: {sum(p.numel() for p in gru_model.parameters()):,}")

In [None]:
# Training configuration (same as TCN)
gru_config = TrainConfig(
    epochs=EPOCHS,
    lr=LEARNING_RATE,
    weight_decay=1e-3,
    grad_clip=1.0,
    use_amp=True,
    device=device,
    log_every=10,
    early_stop_patience=20,
    early_stop_min_delta=1e-5,
    scheduler="plateau",
    plateau_patience=5,
    plateau_factor=0.5
)

print("\nTraining GRU model...")

# Train model
gru_model, gru_history = fit(
    model=gru_model,
    train_loader=train_dl,
    val_loader=val_dl,
    loss_fn=loss_fn,
    cfg=gru_config,
    metrics_fn=default_regression_metrics
)

print("\n✓ GRU training complete")

In [None]:
# Evaluate GRU on test set
gru_metrics = evaluate(
    model=gru_model,
    loader=test_dl,
    loss_fn=loss_fn,
    device=device,
    metrics_fn=default_regression_metrics,
    use_amp=gru_config.use_amp
)

print("\nGRU Test Metrics:")
print(f"  Loss (Huber): {gru_metrics['loss']:.6f}")
print(f"  MSE:          {gru_metrics['mse']:.6f}")
print(f"  MAE:          {gru_metrics['mae']:.6f}")
print(f"  Correlation:  {gru_metrics['corr']:.4f}")
print(f"  Dir Accuracy: {gru_metrics['dir_acc']:.2%}")

## Baseline: LightGBM for Comparison

In [None]:
print("="*70)
print("BASELINE: LIGHTGBM REGRESSOR")
print("="*70)

# Create baseline LightGBM model for comparison
lgbm_model = IntradayMomentum(
    intraday_data=intraday_data,
    session_open=SESSION_START,
    session_end=SESSION_END,
    closing_length=CLOSING_LENGTH,
    tz=TIMEZONE,
    base_model=CTALight,
    task='regression',
    supplementary_intraday_data=supplementary_data,
    use_gpu=False
)

# Build features (same as deep learning models)
X_lgbm = build_features(lgbm_model, daily_df, intraday_data)
common_idx_lgbm = X_lgbm.index.intersection(target_returns.index)
X_lgbm = X_lgbm.loc[common_idx_lgbm]
y_lgbm = target_returns.loc[common_idx_lgbm]

# Use same split as deep learning models
train_val_idx = int(len(X_lgbm) * (TRAIN_SPLIT + VAL_SPLIT))
val_idx = int(len(X_lgbm) * TRAIN_SPLIT)

X_train_lgbm = X_lgbm.iloc[:val_idx]
y_train_lgbm = y_lgbm.iloc[:val_idx]
X_val_lgbm = X_lgbm.iloc[val_idx:train_val_idx]
y_val_lgbm = y_lgbm.iloc[val_idx:train_val_idx]
X_test_lgbm = X_lgbm.iloc[train_val_idx:]
y_test_lgbm = y_lgbm.iloc[train_val_idx:]

print(f"Data split:")
print(f"  Train: {len(X_train_lgbm)} samples")
print(f"  Val:   {len(X_val_lgbm)} samples")
print(f"  Test:  {len(X_test_lgbm)} samples")

# Train LightGBM
print(f"\nTraining LightGBM...")
lgbm_model.fit(
    X_train_lgbm, y_train_lgbm,
    eval_set=(X_val_lgbm, y_val_lgbm),
    early_stopping_rounds=50,
    num_boost_round=500
)

# Evaluate
lgbm_metrics = lgbm_model.evaluate(X_test_lgbm, y_test_lgbm)

print("\nLightGBM Test Metrics:")
print(f"  R²:           {lgbm_metrics['r2']:.4f}")
print(f"  RMSE:         {lgbm_metrics['rmse']:.6f}")
print(f"  MAE:          {lgbm_metrics['mae']:.6f}")
print(f"  Dir Accuracy: {lgbm_metrics['directional_accuracy']:.2%}")

## Training History Visualization

In [None]:
# Plot training and validation loss
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# TCN training history
tcn_train_loss = [m['loss'] for m in tcn_history['train']]
tcn_val_loss = [m['loss'] for m in tcn_history['val']]
axes[0].plot(tcn_train_loss, label='Train Loss', linewidth=2)
axes[0].plot(tcn_val_loss, label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=11)
axes[0].set_ylabel('Loss (Huber)', fontsize=11)
axes[0].set_title('TCN Training History', fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# GRU training history
gru_train_loss = [m['loss'] for m in gru_history['train']]
gru_val_loss = [m['loss'] for m in gru_history['val']]
axes[1].plot(gru_train_loss, label='Train Loss', linewidth=2, color='green')
axes[1].plot(gru_val_loss, label='Val Loss', linewidth=2, color='orange')
axes[1].set_xlabel('Epoch', fontsize=11)
axes[1].set_ylabel('Loss (Huber)', fontsize=11)
axes[1].set_title('GRU Training History', fontsize=12, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Model Comparison

In [None]:
# Get predictions from all models
tcn_model.eval()
gru_model.eval()

with torch.no_grad():
    tcn_preds = []
    gru_preds = []
    test_targets = []
    
    for xb, yb in test_dl:
        xb = xb.to(device)
        tcn_preds.append(tcn_model(xb).cpu().numpy())
        gru_preds.append(gru_model(xb).cpu().numpy())
        test_targets.append(yb.numpy())
    
    tcn_preds = np.concatenate(tcn_preds)
    gru_preds = np.concatenate(gru_preds)
    test_targets = np.concatenate(test_targets)

lgbm_preds = lgbm_model.predict(X_test_lgbm).values

print("✓ Predictions generated for all models")

In [None]:
# Comparison visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# TCN scatter plot
axes[0, 0].scatter(test_targets, tcn_preds, alpha=0.6, s=30, label='Predictions', edgecolors='black', linewidth=0.5)
axes[0, 0].plot([test_targets.min(), test_targets.max()], [test_targets.min(), test_targets.max()], 
                'r--', lw=2, label='Perfect Prediction')
axes[0, 0].set_xlabel('Actual Returns', fontsize=11)
axes[0, 0].set_ylabel('Predicted Returns', fontsize=11)
axes[0, 0].set_title(f'TCN (Corr={tcn_metrics["corr"]:.4f}, Dir Acc={tcn_metrics["dir_acc"]:.2%})', 
                     fontsize=12, fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# GRU scatter plot
axes[0, 1].scatter(test_targets, gru_preds, alpha=0.6, s=30, color='green', label='Predictions', 
                   edgecolors='black', linewidth=0.5)
axes[0, 1].plot([test_targets.min(), test_targets.max()], [test_targets.min(), test_targets.max()], 
                'r--', lw=2, label='Perfect Prediction')
axes[0, 1].set_xlabel('Actual Returns', fontsize=11)
axes[0, 1].set_ylabel('Predicted Returns', fontsize=11)
axes[0, 1].set_title(f'GRU (Corr={gru_metrics["corr"]:.4f}, Dir Acc={gru_metrics["dir_acc"]:.2%})', 
                     fontsize=12, fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Performance comparison bar chart
model_names = ['TCN', 'GRU', 'LightGBM']
correlations = [tcn_metrics['corr'], gru_metrics['corr'], 
                np.corrcoef(test_targets[:len(lgbm_preds)], lgbm_preds)[0, 1]]
colors = ['steelblue', 'green', 'orange']
bars = axes[1, 0].bar(model_names, correlations, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)
axes[1, 0].set_ylabel('Correlation', fontsize=11)
axes[1, 0].set_title('Model Performance Comparison (Correlation)', fontsize=12, fontweight='bold')
axes[1, 0].grid(True, alpha=0.3, axis='y')
axes[1, 0].set_ylim([0, max(correlations) * 1.2])
for bar, corr in zip(bars, correlations):
    height = bar.get_height()
    axes[1, 0].text(bar.get_x() + bar.get_width()/2., height,
                    f'{corr:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold')

# Directional accuracy comparison
dir_accs = [tcn_metrics['dir_acc'], gru_metrics['dir_acc'], lgbm_metrics['directional_accuracy']]
bars2 = axes[1, 1].bar(model_names, dir_accs, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)
axes[1, 1].set_ylabel('Directional Accuracy', fontsize=11)
axes[1, 1].set_title('Model Performance Comparison (Direction)', fontsize=12, fontweight='bold')
axes[1, 1].grid(True, alpha=0.3, axis='y')
axes[1, 1].set_ylim([0, 1])
axes[1, 1].axhline(y=0.5, color='red', linestyle='--', linewidth=2, label='Random (50%)')
for bar, acc in zip(bars2, dir_accs):
    height = bar.get_height()
    axes[1, 1].text(bar.get_x() + bar.get_width()/2., height,
                    f'{acc:.2%}', ha='center', va='bottom', fontsize=10, fontweight='bold')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

## Summary Statistics

In [None]:
print("="*70)
print("MODEL COMPARISON SUMMARY")
print("="*70)

print("\nDEEP LEARNING MODELS:")
print("-" * 70)
print(f"TCN:")
print(f"  MSE:          {tcn_metrics['mse']:.6f}")
print(f"  MAE:          {tcn_metrics['mae']:.6f}")
print(f"  Correlation:  {tcn_metrics['corr']:.4f}")
print(f"  Dir Accuracy: {tcn_metrics['dir_acc']:.2%}")

print(f"\nGRU with Attention:")
print(f"  MSE:          {gru_metrics['mse']:.6f}")
print(f"  MAE:          {gru_metrics['mae']:.6f}")
print(f"  Correlation:  {gru_metrics['corr']:.4f}")
print(f"  Dir Accuracy: {gru_metrics['dir_acc']:.2%}")

print("\nBASELINE MODEL:")
print("-" * 70)
print(f"LightGBM:")
print(f"  R²:           {lgbm_metrics['r2']:.4f}")
print(f"  RMSE:         {lgbm_metrics['rmse']:.6f}")
print(f"  MAE:          {lgbm_metrics['mae']:.6f}")
print(f"  Dir Accuracy: {lgbm_metrics['directional_accuracy']:.2%}")

print("\n" + "="*70)
print("KEY FINDINGS:")
print("="*70)
print("✓ Deep learning models capture temporal dependencies through sequence modeling")
print("✓ TCN uses causal convolutions - parallelizable and stable")
print("✓ GRU with attention - sequential processing with interpretable attention weights")
print("✓ Both models use sliding windows of historical features (lookback period)")
print("✓ Compare correlation and directional accuracy across all models")
print("✓ Deep learning models may require more data and tuning than tree-based models")
print("✓ Consider ensemble approaches combining deep learning and tree-based models")

print("\n" + "="*70)
print("NEXT STEPS:")
print("="*70)
print("1. Experiment with different architectures (more layers, hidden units)")
print("2. Try different lookback periods (10, 30, 60 days)")
print("3. Add regularization techniques (dropout, weight decay, early stopping)")
print("4. Implement ensemble methods combining multiple models")
print("5. Add feature engineering specific to sequential data (technical indicators)")
print("6. Explore other architectures (LSTM, Transformer, hybrid models)")

## Save Models (Optional)

In [None]:
# Save trained models
save_path = Path("./saved_models")
save_path.mkdir(exist_ok=True)

torch.save({
    'model_state_dict': tcn_model.state_dict(),
    'config': tcn_config,
    'metrics': tcn_metrics,
    'feature_names': model.feature_names,
}, save_path / 'tcn_model.pt')

torch.save({
    'model_state_dict': gru_model.state_dict(),
    'config': gru_config,
    'metrics': gru_metrics,
    'feature_names': model.feature_names,
}, save_path / 'gru_model.pt')

print(f"✓ Models saved to {save_path}")