## 1. Setup & Dependencies

In [18]:
# Check if running on Colab
try:
    from google.colab import drive
    IN_COLAB = True
    print("Running on Google Colab")
except ImportError:
    IN_COLAB = False
    print("Running locally")

# Set memory optimization and check GPU
import torch
torch.cuda.empty_cache() if torch.cuda.is_available() else None
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    torch.cuda.set_per_process_memory_fraction(0.95)  # Use 95% of GPU memory

Running locally
Device: cpu


In [19]:
# Install dependencies
!pip install -q pandas numpy scipy pyarrow matplotlib seaborn scikit-learn statsmodels tqdm joblib ipykernel torch

import os
import sys
import json
import pickle
import time
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm import tqdm
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from multiprocessing import Pool, cpu_count
import threading

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm

print(f"âœ“ All dependencies installed")
print(f"âœ“ CPU cores available: {cpu_count()}")
print(f"âœ“ Threading support enabled")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.3[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
âœ“ All dependencies installed
âœ“ CPU cores available: 12
âœ“ Threading support enabled


In [22]:
# Mount Google Drive
BASE_DIR = '..'

# Create output directories
MODELS_DIR = os.path.join(BASE_DIR, 'model_weights')
RESULTS_DIR = os.path.join(BASE_DIR, 'results')
DATA_DIR = os.path.join(BASE_DIR, 'data')

for d in [MODELS_DIR, RESULTS_DIR, DATA_DIR]:
    os.makedirs(d, exist_ok=True)

print(f"Base directory: {BASE_DIR}")
print(f"Models directory: {MODELS_DIR}")
print(f"Results directory: {RESULTS_DIR}")

Base directory: ..
Models directory: ../model_weights
Results directory: ../results


## 2. Data Loading & Preprocessing

In [23]:
def load_and_preprocess_data(raw_dir, ticker='AMZN'):
    """
    Load raw LOBSTER data and apply basic preprocessing.
    Returns preprocessed DataFrame with datetime and normalized prices.
    """
    import glob

    msg_files = sorted(glob.glob(os.path.join(raw_dir, '*message*.csv')))
    all_dfs = []

    for msg_file in tqdm(msg_files, desc='Loading LOBSTER files'):
        # Parse filename
        filename = os.path.basename(msg_file)
        parts = filename.split('_')
        date = parts[1]

        # Load message and orderbook
        df_msg = pd.read_csv(msg_file, header=None,
                            names=['time', 'event_type', 'order_id', 'size', 'price', 'direction'])

        ob_file = msg_file.replace('message', 'orderbook')
        if not os.path.exists(ob_file):
            print(f"Warning: Orderbook file missing for {msg_file}")
            continue

        level = 10
        cols = []
        for i in range(1, level+1):
            cols.extend([f'ask_price_{i}', f'ask_size_{i}', f'bid_price_{i}', f'bid_size_{i}'])

        df_ob = pd.read_csv(ob_file, header=None, names=cols)

        # Merge
        if len(df_msg) != len(df_ob):
            print(f"Length mismatch for {date}: {len(df_msg)} vs {len(df_ob)}, skipping")
            continue

        df = pd.concat([df_msg, df_ob], axis=1)

        # Price normalization (LOBSTER scale is 10,000)
        price_cols = [c for c in df.columns if 'price' in c]
        df[price_cols] = df[price_cols] / 10000.0

        # Time filtering (09:30:00 = 34200, 16:00:00 = 57600)
        df = df[(df['time'] >= 34200) & (df['time'] <= 57600)].copy()

        # Drop crossed markets
        df = df[df['bid_price_1'] < df['ask_price_1']]

        # Add datetime
        base_date = pd.to_datetime(date)
        df['datetime'] = base_date + pd.to_timedelta(df['time'], unit='s')

        all_dfs.append(df)

    if not all_dfs:
        raise ValueError(f"No valid data files found in {raw_dir}")

    return pd.concat(all_dfs, ignore_index=True)

# Check if we have raw data
raw_dir = os.path.join(DATA_DIR, 'raw')
if os.path.exists(raw_dir) and len(os.listdir(raw_dir)) > 0:
    print(f"Loading data from {raw_dir}...")
    df = load_and_preprocess_data(raw_dir)
    print(f"Loaded {len(df)} events")
else:
    print(f"Raw data directory {raw_dir} not found or empty.")
    print("Please upload LOBSTER CSV files to data/raw/ directory.")
    df = None

Loading data from ../data/raw...


Loading LOBSTER files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1/1 [00:02<00:00,  2.16s/it]

Loaded 269748 events





## 3. Feature Engineering

In [24]:
def compute_ofi(df, max_level=5):
    """Compute single-level and multi-level OFI."""
    result = {}
    cumul = np.zeros(len(df), dtype=np.float64)

    for lvl in range(1, max_level + 1):
        bp = df[f"bid_price_{lvl}"].values
        bs = df[f"bid_size_{lvl}"].values
        ap = df[f"ask_price_{lvl}"].values
        as_ = df[f"ask_size_{lvl}"].values

        n = len(df)
        ofi = np.zeros(n, dtype=np.float64)

        for t in range(1, n):
            delta_bid = bs[t] if bp[t] == bp[t-1] else (bs[t] if bp[t] > bp[t-1] else -bs[t-1])
            delta_ask = as_[t] if ap[t] == ap[t-1] else (-as_[t] if ap[t] < ap[t-1] else as_[t-1])
            ofi[t] = delta_bid - delta_ask

        result[f"ofi_{lvl}"] = ofi
        cumul = cumul + ofi
        result[f"ofi_cumul_{lvl}"] = cumul.copy()

    return pd.DataFrame(result, index=df.index)

def compute_microstructure(df, max_level=5):
    """Compute microstructure features."""
    features = {}

    features['mid_price'] = (df['ask_price_1'] + df['bid_price_1']) / 2.0
    features['spread'] = df['ask_price_1'] - df['bid_price_1']

    total = df['bid_size_1'] + df['ask_size_1']
    features['volume_imbalance'] = (df['bid_size_1'] - df['ask_size_1']) / total.replace(0, np.nan)
    features['volume_imbalance'] = features['volume_imbalance'].fillna(0.0)

    bid_depth = sum(df[f'bid_size_{i}'] for i in range(1, max_level + 1))
    ask_depth = sum(df[f'ask_size_{i}'] for i in range(1, max_level + 1))
    features['bid_depth'] = bid_depth
    features['ask_depth'] = ask_depth
    features['total_depth'] = bid_depth + ask_depth

    return pd.DataFrame(features, index=df.index)

def make_labels(df, horizons=[10, 20, 50, 100], alpha=0.5):
    """Generate regression and 3-class classification labels."""
    m = (df['ask_price_1'].values + df['bid_price_1'].values) / 2.0

    # Regression
    reg_labels = {}
    for k in horizons:
        delta = np.full_like(m, np.nan)
        if k < len(m):
            delta[:len(m)-k] = m[k:] - m[:len(m)-k]
        reg_labels[f'delta_mid_{k}'] = delta

    # Classification (3-class: DOWN=0, STAT=1, UP=2)
    cls_labels = {}
    for k in horizons:
        future_m = np.full_like(m, np.nan)
        if k < len(m):
            # Smooth with k-event moving average
            cumsum = np.cumsum(m)
            # The original line caused a ValueError due to shape mismatch:
            # future_m[:len(m)-k] = (cumsum[k:] - np.concatenate([[0], cumsum[:-k]])) / k
            # Corrected to compute the k-event moving average for future mid-prices:
            future_m[:len(m)-k] = (cumsum[k:] - cumsum[:-k]) / k

        pct_change = (future_m - m) / m
        valid = pct_change[~np.isnan(pct_change)]
        sigma = np.std(valid)
        threshold = alpha * sigma

        labels = np.full(len(m), np.nan)
        labels[pct_change > threshold] = 2   # UP
        labels[pct_change < -threshold] = 0  # DOWN
        mask = (~np.isnan(pct_change)) & np.isnan(labels)
        labels[mask] = 1  # STATIONARY

        cls_labels[f'label_{k}'] = labels

    reg_df = pd.DataFrame(reg_labels, index=df.index)
    cls_df = pd.DataFrame(cls_labels, index=df.index)
    return reg_df, cls_df

if df is not None:
    print("Computing OFI...")
    ofi_df = compute_ofi(df)

    print("Computing microstructure features...")
    micro_df = compute_microstructure(df)

    print("Generating labels...")
    horizons = [10, 20, 50, 100]
    y_reg, y_cls = make_labels(df, horizons=horizons)

    # Combine features
    lob_cols = []
    for lvl in range(1, 6):
        lob_cols.extend([f'ask_price_{lvl}', f'ask_size_{lvl}', f'bid_price_{lvl}', f'bid_size_{lvl}'])

    X = pd.concat([ofi_df, micro_df, df[lob_cols]], axis=1)
    feature_names = list(X.columns)

    # Remove tail rows with NaN labels
    max_horizon = max(horizons)
    valid_end = len(df) - max_horizon

    X_data = X.values[:valid_end].astype(np.float32)
    y_reg_data = y_reg.values[:valid_end].astype(np.float32)
    y_cls_data = y_cls.values[:valid_end].astype(np.float32)

    print(f"âœ“ Features shape: {X_data.shape}")
    print(f"âœ“ Regression labels shape: {y_reg_data.shape}")
    print(f"âœ“ Classification labels shape: {y_cls_data.shape}")
    print(f"Feature names ({len(feature_names)}): {feature_names[:10]}...")

Computing OFI...
Computing microstructure features...
Generating labels...
âœ“ Features shape: (269648, 36)
âœ“ Regression labels shape: (269648, 4)
âœ“ Classification labels shape: (269648, 4)
Feature names (36): ['ofi_1', 'ofi_cumul_1', 'ofi_2', 'ofi_cumul_2', 'ofi_3', 'ofi_cumul_3', 'ofi_4', 'ofi_cumul_4', 'ofi_5', 'ofi_cumul_5']...


## 4. Train/Val/Test Split

In [25]:
if df is not None:
    # Temporal split (no future leakage)
    train_frac, val_frac = 0.6, 0.2

    n = len(X_data)
    t1 = int(n * train_frac)
    t2 = int(n * (train_frac + val_frac))

    X_train, y_reg_train, y_cls_train = X_data[:t1], y_reg_data[:t1], y_cls_data[:t1]
    X_val, y_reg_val, y_cls_val = X_data[t1:t2], y_reg_data[t1:t2], y_cls_data[t1:t2]
    X_test, y_reg_test, y_cls_test = X_data[t2:], y_reg_data[t2:], y_cls_data[t2:]

    # Normalization (fit on train only)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    print(f"Train: {X_train_scaled.shape[0]} | Val: {X_val_scaled.shape[0]} | Test: {X_test_scaled.shape[0]}")
    print(f"Features: {X_train_scaled.shape[1]}")

Train: 161788 | Val: 53930 | Test: 53930
Features: 36


## 5. PyTorch Models & Utilities

In [26]:
# PyTorch Dataset
class LOBDataset(Dataset):
    def __init__(self, X, y_reg, y_cls, seq_len=100):
        self.X = X
        self.y_reg = y_reg
        self.y_cls = y_cls
        self.seq_len = seq_len

    def __len__(self):
        return len(self.X) - self.seq_len

    def __getitem__(self, idx):
        x = self.X[idx:idx + self.seq_len]
        target_idx = idx + self.seq_len - 1
        yr = self.y_reg[target_idx]
        yc = self.y_cls[target_idx]

        return {
            'x': torch.tensor(x, dtype=torch.float32),
            'y_reg': torch.tensor(yr, dtype=torch.float32),
            'y_cls': torch.tensor(yc, dtype=torch.long),
        }

class FlatDataset(Dataset):
    def __init__(self, X, y_reg, y_cls):
        self.X = X
        self.y_reg = y_reg
        self.y_cls = y_cls

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {
            'x': torch.tensor(self.X[idx], dtype=torch.float32),
            'y_reg': torch.tensor(self.y_reg[idx], dtype=torch.float32),
            'y_cls': torch.tensor(self.y_cls[idx], dtype=torch.long),
        }

print("âœ“ Dataset classes defined")

âœ“ Dataset classes defined


In [27]:
class TemporalAttentionSeq2Seq(nn.Module):
    """Seq2Seq with temporal attention + multi-scale encoder (Custom)"""
    def __init__(self, n_features, n_horizons=4, hidden_size=64, dropout=0.3):
        super().__init__()
        
        # Multi-scale encoder
        self.encoder_fine = nn.LSTM(n_features, hidden_size, 2, batch_first=True, dropout=dropout)
        self.encoder_coarse = nn.LSTM(n_features, hidden_size, 1, batch_first=True)
        
        # Temporal attention
        self.attn = BahdanauAttention(hidden_size)
        self.decoder = nn.LSTM(hidden_size * 2, hidden_size, 1, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        
        self.heads = nn.ModuleList([
            nn.Sequential(nn.Linear(hidden_size, 32), nn.ReLU(), nn.Linear(32, 3))
            for _ in range(n_horizons)
        ])
    
    def forward(self, x):
        enc_fine, (h_fine, c_fine) = self.encoder_fine(x)
        enc_coarse, (h_coarse, c_coarse) = self.encoder_coarse(x)
        context, _ = self.attn(h_fine[-1], enc_fine)
        multi_ctx = torch.cat([context, h_coarse[-1]], dim=-1)
        dec_in = multi_ctx.unsqueeze(1)
        # Use only last layer's hidden state for decoder (decoder has 1 layer, encoder_fine has 2)
        dec_out, _ = self.decoder(dec_in, (h_fine[-1:], c_fine[-1:]))
        out_h = self.dropout(dec_out[:, -1, :])
        out = torch.stack([head(out_h) for head in self.heads], dim=1)
        return out

In [28]:
# Training utilities
def compute_metrics(y_true, y_pred, y_true_cls=None, y_pred_cls=None):
    """Compute regression and classification metrics."""
    from sklearn.metrics import accuracy_score, f1_score

    metrics = {}

    # Regression
    if y_true is not None and y_pred is not None:
        mask = ~np.isnan(y_true)
        if mask.sum() > 0:
            residuals = y_true[mask] - y_pred[mask]
            metrics['mse'] = float(np.mean(residuals ** 2))
            metrics['rmse'] = float(np.sqrt(metrics['mse']))
            metrics['mae'] = float(np.mean(np.abs(residuals)))
            ss_res = np.sum(residuals ** 2)
            ss_tot = np.sum((y_true[mask] - np.mean(y_true[mask])) ** 2)
            metrics['r2'] = float(1.0 - ss_res / max(ss_tot, 1e-12))

    # Classification
    if y_true_cls is not None and y_pred_cls is not None:
        mask = ~np.isnan(y_true_cls)
        if mask.sum() > 0:
            metrics['accuracy'] = float(accuracy_score(y_true_cls[mask].astype(int), y_pred_cls[mask]))
            metrics['f1_macro'] = float(f1_score(y_true_cls[mask].astype(int), y_pred_cls[mask], average='macro', zero_division=0))

    return metrics

print("âœ“ Metric computation defined")

âœ“ Metric computation defined


## 6. Model Training & Evaluation Loop

In [29]:
if df is not None:
    # ALL 10 MODEL CONFIGURATIONS
    model_configs = {
        # Linear (parallel: can train all at once)
        'ols': {'type': 'linear', 'parallel': True},
        'ridge': {'type': 'linear', 'parallel': True, 'alpha': 1.0},
        'lasso': {'type': 'linear', 'parallel': True, 'alpha': 0.001},
        
        # Deep (GPU-friendly, train sequentially to avoid OOM, but parallelize internally)
        'mlp': {'type': 'flat', 'epochs': 25, 'batch_size': 1024, 'lr': 1e-3},
        'lstm': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
        'cnn': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
        'deeplob': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
        'seq2seq': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
        'transformer': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
        'temporal_attention': {'type': 'seq', 'seq_len': 100, 'epochs': 25, 'batch_size': 512, 'lr': 1e-3},
    }
    
    all_results = {}
    
    # ===== TRAIN LINEAR MODELS IN PARALLEL =====
    print("\n" + "="*80)
    print("TRAINING LINEAR MODELS (PARALLELIZED)")
    print("="*80)
    
    def train_linear_model(args):
        """Train a single linear model (OLS, Ridge, or Lasso)."""
        model_name, model_type, X_tr, y_reg_tr, X_te, y_reg_te, horizons, config = args
        try:
            models_dict = {}
            for h_idx, h in enumerate(horizons):
                y = y_reg_tr[:, h_idx]
                mask = ~np.isnan(y)
                X_h = X_tr[mask]
                y_h = y[mask]
                
                if model_name == 'ols':
                    X_const = sm.add_constant(X_h)
                    model = sm.OLS(y_h, X_const).fit(disp=0)
                elif model_name == 'ridge':
                    from sklearn.linear_model import Ridge
                    model = Ridge(alpha=config.get('alpha', 1.0))
                    model.fit(X_h, y_h)
                elif model_name == 'lasso':
                    from sklearn.linear_model import Lasso
                    model = Lasso(alpha=config.get('alpha', 0.001), max_iter=5000)
                    model.fit(X_h, y_h)
                
                models_dict[h] = model
            
            # Evaluate
            preds_test = np.zeros((len(X_te), len(horizons)))
            for h_idx, h in enumerate(horizons):
                if model_name == 'ols':
                    X_const = sm.add_constant(X_te)
                    preds_test[:, h_idx] = models_dict[h].predict(X_const)
                else:
                    preds_test[:, h_idx] = models_dict[h].predict(X_te)
            
            metrics = {}
            for h_idx, h in enumerate(horizons):
                m = compute_metrics(y_reg_te[:, h_idx], preds_test[:, h_idx])
                for k, v in m.items():
                    metrics[f'h{h}_{k}'] = v
            
            return model_name, models_dict, metrics
        except Exception as e:
            print(f"âœ— {model_name} failed: {e}")
            return model_name, None, {}
    
    # Prepare linear model training tasks
    linear_tasks = [
        ('ols', 'linear', X_train_scaled, y_reg_train, X_test_scaled, y_reg_test, horizons, model_configs['ols']),
        ('ridge', 'linear', X_train_scaled, y_reg_train, X_test_scaled, y_reg_test, horizons, model_configs['ridge']),
        ('lasso', 'linear', X_train_scaled, y_reg_train, X_test_scaled, y_reg_test, horizons, model_configs['lasso']),
    ]
    
    # Train linear models in parallel
    with ThreadPoolExecutor(max_workers=3) as executor:
        linear_futures = {executor.submit(train_linear_model, task): task[0] for task in linear_tasks}
        
        for future in as_completed(linear_futures):
            model_name, models_dict, metrics = future.result()
            if models_dict is not None:
                all_results[model_name] = metrics
                # Save model
                save_path = os.path.join(MODELS_DIR, f'{model_name}_models.pkl')
                with open(save_path, 'wb') as f:
                    pickle.dump(models_dict, f)
                print(f"âœ“ {model_name:15s} | RÂ² h10={metrics.get('h10_r2', 0):.4f} | Saved")


TRAINING LINEAR MODELS (PARALLELIZED)
âœ“ ridge           | RÂ² h10=0.0379 | Saved
âœ“ lasso           | RÂ² h10=0.0104 | Saved
âœ“ ols             | RÂ² h10=0.0371 | Saved


In [30]:
if df is not None:
    # ===== CONFIGURATION: SKIP ALREADY-TRAINED MODELS =====
    SKIP_TRAINED_MODELS = True  # Set to False to retrain all models
    
    # ===== TRAIN DEEP LEARNING MODELS SEQUENTIALLY (GPU memory management) =====
    print("\n" + "="*80)
    print("TRAINING DEEP LEARNING MODELS (MEMORY-OPTIMIZED SEQUENTIAL)")
    print("="*80)
    
    def train_deep_model(model, train_loader, val_loader, test_loader, model_name, cfg, device):
        """Generic training loop with memory optimization."""
        model = model.to(device)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = Adam(model.parameters(), lr=cfg['lr'], weight_decay=1e-5)
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
        
        best_val_loss = float('inf')
        patience_counter = 0
        max_patience = 5
        best_model_state = None
        
        epochs = cfg['epochs']
        for epoch in range(epochs):
            # ===== TRAINING =====
            model.train()
            train_loss = 0
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
            
            for batch_idx, batch in enumerate(train_loader):
                x = batch['x'].to(device)
                y_cls = batch['y_cls'].to(device)
                
                logits = model(x)
                loss = 0
                for h_idx in range(logits.shape[1]):
                    loss = loss + criterion(logits[:, h_idx, :], y_cls[:, h_idx])
                loss = loss / logits.shape[1]
                
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                
                train_loss += loss.item()
                
                # Clear cache every 5 batches
                if (batch_idx + 1) % 5 == 0:
                    torch.cuda.empty_cache() if torch.cuda.is_available() else None
            
            train_loss /= len(train_loader)
            
            # ===== VALIDATION =====
            model.eval()
            val_loss = 0
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
            
            with torch.no_grad():
                for batch in val_loader:
                    x = batch['x'].to(device)
                    y_cls = batch['y_cls'].to(device)
                    logits = model(x)
                    loss = 0
                    for h_idx in range(logits.shape[1]):
                        loss = loss + criterion(logits[:, h_idx, :], y_cls[:, h_idx])
                    loss = loss / logits.shape[1]
                    val_loss += loss.item()
            
            val_loss /= len(val_loader)
            scheduler.step(val_loss)
            
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                best_model_state = model.state_dict().copy()
            else:
                patience_counter += 1
                if patience_counter >= max_patience:
                    print(f"  Early stopping at epoch {epoch+1}")
                    break
            
            if (epoch + 1) % max(1, epochs // 5) == 0:
                print(f"  [{model_name}] Epoch {epoch+1:2d}/{epochs} | train_loss={train_loss:.5f} | val_loss={val_loss:.5f}")
        
        # Load best model
        model.load_state_dict(best_model_state)
        model.eval()
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        
        # ===== EVALUATION =====
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for batch in test_loader:
                x = batch['x'].to(device)
                y_cls = batch['y_cls'].to(device)
                logits = model(x)
                preds = logits.argmax(dim=-1).cpu().numpy()
                all_preds.append(preds)
                all_labels.append(y_cls.cpu().numpy())
        
        all_preds = np.concatenate(all_preds, axis=0)
        all_labels = np.concatenate(all_labels, axis=0)
        
        metrics = {}
        for h_idx, h in enumerate(horizons):
            m = compute_metrics(None, None, all_labels[:, h_idx], all_preds[:, h_idx])
            for k, v in m.items():
                metrics[f'h{h}_{k}'] = v
        
        # Save model
        model_path = os.path.join(MODELS_DIR, f'{model_name}_weights.pt')
        torch.save(model.state_dict(), model_path)
        
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        return metrics, model
    
    # ===== DEFINE ALL 7 DEEP MODELS =====
    deep_models_def = {
        'mlp': MLPClassifier,
        'lstm': LSTMClassifier,
        'cnn': CNNClassifier,
        'deeplob': DeepLOBNet,
        'seq2seq': Seq2SeqAttention,
        'transformer': TransformerClassifier,
        'temporal_attention': TemporalAttentionSeq2Seq,
    }
    
    # ===== TRAIN EACH DEEP MODEL =====
    deep_models = {}
    deep_model_names = list(deep_models_def.keys())
    
    for idx, model_name in enumerate(deep_model_names, 1):
        # Check if model already trained
        model_path = os.path.join(MODELS_DIR, f'{model_name}_weights.pt')
        if SKIP_TRAINED_MODELS and os.path.exists(model_path):
            print(f"\n[{idx}/{len(deep_models_def)}] {model_name.upper()} - SKIPPED (already trained)")
            # Load existing model
            model = deep_models_def[model_name](n_features=X_train_scaled.shape[1], n_horizons=len(horizons))
            model.load_state_dict(torch.load(model_path, map_location=device))
            deep_models[model_name] = model
            continue
        
        print(f"\n[{idx}/{len(deep_models_def)}] Training {model_name.upper()}...")
        
        try:
            cfg = model_configs[model_name]
            
            # Create datasets
            if cfg['type'] == 'flat':
                ds_train = FlatDataset(X_train_scaled, y_reg_train, y_cls_train)
                ds_val = FlatDataset(X_val_scaled, y_reg_val, y_cls_val)
                ds_test = FlatDataset(X_test_scaled, y_reg_test, y_cls_test)
            else:  # seq
                ds_train = LOBDataset(X_train_scaled, y_reg_train, y_cls_train, seq_len=cfg['seq_len'])
                ds_val = LOBDataset(X_val_scaled, y_reg_val, y_cls_val, seq_len=cfg['seq_len'])
                ds_test = LOBDataset(X_test_scaled, y_reg_test, y_cls_test, seq_len=cfg['seq_len'])
            
            # Create dataloaders with shuffle=True for training
            train_loader = DataLoader(ds_train, batch_size=cfg['batch_size'], shuffle=True, num_workers=0)
            val_loader = DataLoader(ds_val, batch_size=cfg['batch_size'], shuffle=False, num_workers=0)
            test_loader = DataLoader(ds_test, batch_size=cfg['batch_size'], shuffle=False, num_workers=0)
            
            # Create model
            model = deep_models_def[model_name](n_features=X_train_scaled.shape[1], n_horizons=len(horizons))
            
            # Train
            metrics, model_obj = train_deep_model(model, train_loader, val_loader, test_loader, model_name, cfg, device)
            
            all_results[model_name] = metrics
            deep_models[model_name] = model_obj
            
            print(f"âœ“ {model_name:20s} | Accuracy h10={metrics.get('h10_accuracy', 0):.4f} | F1={metrics.get('h10_f1_macro', 0):.4f}")
        
        except Exception as e:
            print(f"âœ— {model_name} failed: {e}")
            import traceback
            traceback.print_exc()


TRAINING DEEP LEARNING MODELS (MEMORY-OPTIMIZED SEQUENTIAL)


NameError: name 'MLPClassifier' is not defined

In [13]:
if df is not None:
    # Train LSTM
    print("\n[2/3] Training LSTM...")
    try:
        cfg = model_configs['lstm']
        ds_train = LOBDataset(X_train_scaled, y_reg_train, y_cls_train, seq_len=cfg['seq_len'])
        ds_val = LOBDataset(X_val_scaled, y_reg_val, y_cls_val, seq_len=cfg['seq_len'])
        ds_test = LOBDataset(X_test_scaled, y_reg_test, y_cls_test, seq_len=cfg['seq_len'])
        
        train_loader = DataLoader(ds_train, batch_size=cfg['batch_size'], shuffle=False)
        val_loader = DataLoader(ds_val, batch_size=cfg['batch_size'], shuffle=False)
        test_loader = DataLoader(ds_test, batch_size=cfg['batch_size'], shuffle=False)
        
        lstm_model = LSTMClassifier(n_features=X_train_scaled.shape[1], n_horizons=len(horizons))
        lstm_metrics, lstm_model = train_deep_model(
            lstm_model, train_loader, val_loader, test_loader, 'lstm',
            epochs=cfg['epochs'], lr=cfg['lr']
        )
        all_results['lstm'] = lstm_metrics
        print(f"âœ“ LSTM Accuracy h10={lstm_metrics.get('h10_accuracy', 0):.4f}")
    except Exception as e:
        print(f"âœ— LSTM failed: {e}")
    
    # Train CNN
    print("\n[3/3] Training CNN...")
    try:
        cfg = model_configs['cnn']
        ds_train = LOBDataset(X_train_scaled, y_reg_train, y_cls_train, seq_len=cfg['seq_len'])
        ds_val = LOBDataset(X_val_scaled, y_reg_val, y_cls_val, seq_len=cfg['seq_len'])
        ds_test = LOBDataset(X_test_scaled, y_reg_test, y_cls_test, seq_len=cfg['seq_len'])
        
        train_loader = DataLoader(ds_train, batch_size=cfg['batch_size'], shuffle=False)
        val_loader = DataLoader(ds_val, batch_size=cfg['batch_size'], shuffle=False)
        test_loader = DataLoader(ds_test, batch_size=cfg['batch_size'], shuffle=False)
        
        cnn_model = CNNClassifier(n_features=X_train_scaled.shape[1], n_horizons=len(horizons))
        cnn_metrics, cnn_model = train_deep_model(
            cnn_model, train_loader, val_loader, test_loader, 'cnn',
            epochs=cfg['epochs'], lr=cfg['lr']
        )
        all_results['cnn'] = cnn_metrics
        print(f"âœ“ CNN Accuracy h10={cnn_metrics.get('h10_accuracy', 0):.4f}")
    except Exception as e:
        print(f"âœ— CNN failed: {e}")

## 7. Results Summary & Visualization

In [14]:
if df is not None and all_results:
    # Create comprehensive summary table
    print("\n" + "="*100)
    print("RESULTS SUMMARY - ALL 10 MODELS")
    print("="*100)
    
    # Linear models: RÂ²
    print("\n[LINEAR MODELS] - Regression RÂ² across horizons:")
    print("-" * 80)
    print(f"{'Model':15s} | h10_RÂ²    h20_RÂ²    h50_RÂ²    h100_RÂ²")
    print("-" * 80)
    for model_name in ['ols', 'ridge', 'lasso']:
        if model_name in all_results:
            metrics = all_results[model_name]
            r2_vals = [metrics.get(f'h{h}_r2', 0) for h in horizons]
            print(f"{model_name:15s} | {r2_vals[0]:8.4f} {r2_vals[1]:8.4f} {r2_vals[2]:8.4f} {r2_vals[3]:8.4f}")
    
    # Deep models: Classification Accuracy
    print("\n[DEEP LEARNING MODELS] - Classification Accuracy across horizons:")
    print("-" * 80)
    print(f"{'Model':20s} | h10_Acc   h20_Acc   h50_Acc   h100_Acc  | h10_F1    h20_F1    h50_F1    h100_F1")
    print("-" * 80)
    for model_name in ['mlp', 'lstm', 'cnn', 'deeplob', 'seq2seq', 'transformer', 'temporal_attention']:
        if model_name in all_results:
            metrics = all_results[model_name]
            acc_vals = [metrics.get(f'h{h}_accuracy', 0) for h in horizons]
            f1_vals = [metrics.get(f'h{h}_f1_macro', 0) for h in horizons]
            print(f"{model_name:20s} | {acc_vals[0]:8.4f} {acc_vals[1]:8.4f} {acc_vals[2]:8.4f} {acc_vals[3]:8.4f} | {f1_vals[0]:8.4f} {f1_vals[1]:8.4f} {f1_vals[2]:8.4f} {f1_vals[3]:8.4f}")
    
    # Save detailed results
    results_path = os.path.join(RESULTS_DIR, f'results_all_10_models_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json')
    with open(results_path, 'w') as f:
        json.dump(all_results, f, indent=2, default=str)
    
    print(f"\nâœ“ Detailed results saved to {results_path}")

In [15]:
if df is not None and all_results:
    # Comprehensive visualization
    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    
    # 1. Linear Models RÂ²
    ax = axes[0, 0]
    for model_name in ['ols', 'ridge', 'lasso']:
        if model_name in all_results:
            metrics = all_results[model_name]
            r2_vals = [metrics.get(f'h{h}_r2', 0) for h in horizons]
            ax.plot(horizons, r2_vals, marker='o', label=model_name, linewidth=2)
    
    ax.set_xlabel('Prediction Horizon (events)', fontsize=11, fontweight='bold')
    ax.set_ylabel('RÂ²', fontsize=11, fontweight='bold')
    ax.set_title('Linear Models: Regression Performance', fontsize=12, fontweight='bold')
    ax.legend(loc='best')
    ax.grid(alpha=0.3)
    
    # 2. Deep Models Accuracy
    ax = axes[0, 1]
    for model_name in ['mlp', 'lstm', 'cnn', 'deeplob', 'seq2seq', 'transformer', 'temporal_attention']:
        if model_name in all_results:
            metrics = all_results[model_name]
            acc_vals = [metrics.get(f'h{h}_accuracy', 0) for h in horizons]
            ax.plot(horizons, acc_vals, marker='s', label=model_name, linewidth=2)
    
    ax.set_xlabel('Prediction Horizon (events)', fontsize=11, fontweight='bold')
    ax.set_ylabel('Accuracy', fontsize=11, fontweight='bold')
    ax.set_title('Deep Learning Models: Classification Accuracy', fontsize=12, fontweight='bold')
    ax.legend(loc='best', fontsize=9)
    ax.grid(alpha=0.3)
    
    # 3. F1 Macro Score (Deep Models)
    ax = axes[1, 0]
    for model_name in ['mlp', 'lstm', 'cnn', 'deeplob', 'seq2seq', 'transformer', 'temporal_attention']:
        if model_name in all_results:
            metrics = all_results[model_name]
            f1_vals = [metrics.get(f'h{h}_f1_macro', 0) for h in horizons]
            ax.plot(horizons, f1_vals, marker='^', label=model_name, linewidth=2)
    
    ax.set_xlabel('Prediction Horizon (events)', fontsize=11, fontweight='bold')
    ax.set_ylabel('F1 Score (Macro)', fontsize=11, fontweight='bold')
    ax.set_title('Deep Learning Models: F1-Macro Score', fontsize=12, fontweight='bold')
    ax.legend(loc='best', fontsize=9)
    ax.grid(alpha=0.3)
    
    # 4. Best Model at Each Horizon (Accuracy)
    ax = axes[1, 1]
    best_models = []
    best_accs = []
    for h in horizons:
        best_acc = -1
        best_model = None
        for model_name in list(all_results.keys()):
            acc = all_results[model_name].get(f'h{h}_accuracy', 0)
            if acc > best_acc:
                best_acc = acc
                best_model = model_name
        best_models.append(best_model)
        best_accs.append(best_acc)
    
    colors = plt.cm.tab20(np.linspace(0, 1, len(set(best_models))))
    color_map = {model: colors[i] for i, model in enumerate(set(best_models))}
    bar_colors = [color_map[m] for m in best_models]
    
    ax.bar([str(h) for h in horizons], best_accs, color=bar_colors)
    ax.set_xlabel('Prediction Horizon', fontsize=11, fontweight='bold')
    ax.set_ylabel('Best Accuracy', fontsize=11, fontweight='bold')
    ax.set_title('Best Model Per Horizon', fontsize=12, fontweight='bold')
    ax.grid(alpha=0.3, axis='y')
    
    # Add model names on bars
    for i, (h, acc, model) in enumerate(zip(horizons, best_accs, best_models)):
        ax.text(i, acc + 0.01, model, ha='center', fontsize=9, fontweight='bold')
    
    plt.tight_layout()
    
    plot_path = os.path.join(RESULTS_DIR, f'performance_all_10_models.png')
    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
    print(f"âœ“ Comparison plot saved to {plot_path}")
    plt.show()

## 8. Model Weights Summary

In [16]:
# List all saved model files
print("\n" + "="*60)
print("SAVED MODEL FILES")
print("="*60)

if os.path.exists(MODELS_DIR):
    model_files = os.listdir(MODELS_DIR)
    if model_files:
        print(f"\nModels saved in: {MODELS_DIR}\n")
        for f in sorted(model_files):
            fpath = os.path.join(MODELS_DIR, f)
            size_mb = os.path.getsize(fpath) / (1024 * 1024)
            print(f"  âœ“ {f:40s} ({size_mb:6.2f} MB)")
    else:
        print("No model files saved yet")
else:
    print(f"Models directory not found: {MODELS_DIR}")


SAVED MODEL FILES
No model files saved yet


## 9. Model Loading & Inference Example

In [17]:
# Load and test all saved models
print("\n" + "="*80)
print("MODEL LOADING & INFERENCE EXAMPLES - ALL 10 MODELS")
print("="*80)

# 1. Linear Models
print("\n[LINEAR MODELS] - Loading & Testing:")
print("-" * 60)

for model_name in ['ols', 'ridge', 'lasso']:
    try:
        model_path = os.path.join(MODELS_DIR, f'{model_name}_models.pkl')
        if os.path.exists(model_path):
            with open(model_path, 'rb') as f:
                models_loaded = pickle.load(f)
            print(f"âœ“ {model_name:15s} - {len(models_loaded)} horizon models loaded")
            
            # Inference on sample
            sample_idx = 0
            sample = X_test_scaled[sample_idx:sample_idx+1]
            for h, model in list(models_loaded.items())[:1]:  # Test first horizon
                if model_name == 'ols':
                    sample_const = sm.add_constant(sample)
                    pred = model.predict(sample_const)[0]
                else:
                    pred = model.predict(sample)[0]
                print(f"   Sample prediction (h={h}): Î”mid = {pred:8.6f}")
    except Exception as e:
        print(f"âœ— {model_name} loading failed: {e}")

# 2. Deep Models
print("\n[DEEP LEARNING MODELS] - Loading & Testing:")
print("-" * 60)

deep_models_def = {
    'mlp': MLPClassifier,
    'lstm': LSTMClassifier,
    'cnn': CNNClassifier,
    'deeplob': DeepLOBNet,
    'seq2seq': Seq2SeqAttention,
    'transformer': TransformerClassifier,
    'temporal_attention': TemporalAttentionSeq2Seq,
}

for model_name in deep_models_def.keys():
    try:
        model_path = os.path.join(MODELS_DIR, f'{model_name}_weights.pt')
        if os.path.exists(model_path):
            model = deep_models_def[model_name](n_features=X_train_scaled.shape[1], n_horizons=len(horizons))
            model.load_state_dict(torch.load(model_path, map_location=device))
            model.to(device)
            model.eval()
            
            print(f"âœ“ {model_name:20s} - Model loaded and moved to {device}")
            
            # Inference on sample sequence/flat
            if model_name == 'mlp':
                sample = torch.tensor(X_test_scaled[0:1], dtype=torch.float32).to(device)
            else:
                sample = torch.tensor(X_test_scaled[100:200], dtype=torch.float32).unsqueeze(0).to(device)
            
            with torch.no_grad():
                logits = model(sample)
            preds = logits.argmax(dim=-1).cpu().numpy()[0]
            
            class_names = {0: 'DOWN', 1: 'STAT', 2: 'UP'}
            print(f"   Predictions: {[class_names[p] for p in preds[:2]]} (first 2 horizons)")
    except Exception as e:
        print(f"âœ— {model_name} loading failed: {e}")


MODEL LOADING & INFERENCE EXAMPLES - ALL 10 MODELS

[LINEAR MODELS] - Loading & Testing:
------------------------------------------------------------

[DEEP LEARNING MODELS] - Loading & Testing:
------------------------------------------------------------


NameError: name 'MLPClassifier' is not defined

## 10. Metadata & Experiment Info

In [None]:
# Save experiment metadata
metadata = {
    'timestamp': datetime.now().isoformat(),
    'device': str(device),
    'data': {
        'n_total': len(df) if df is not None else 0,
        'n_train': len(X_train_scaled) if df is not None else 0,
        'n_val': len(X_val_scaled) if df is not None else 0,
        'n_test': len(X_test_scaled) if df is not None else 0,
        'n_features': X_train_scaled.shape[1] if df is not None else 0,
    },
    'horizons': horizons,
    'models_trained': list(all_results.keys()) if all_results else [],
    'feature_names': feature_names if df is not None else [],
}

metadata_path = os.path.join(RESULTS_DIR, 'metadata.json')
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2, default=str)

print(f"\nâœ“ Metadata saved to {metadata_path}")
print(f"\nðŸ“Š Experiment Summary:")
print(f"  Training samples: {metadata['data']['n_train']}")
print(f"  Test samples: {metadata['data']['n_test']}")
print(f"  Features: {metadata['data']['n_features']}")
print(f"  Horizons: {metadata['horizons']}")
print(f"  Models: {len(metadata['models_trained'])} completed")
print(f"  Device: {metadata['device']}")