# 🚀 Meme Stock Deep Learning - Colab A100 Optimized

## 📋 Overview
- **Target**: IC ≥ 0.05 for 1-day return prediction
- **Models**: MLP, LSTM, Transformer, Ensemble
- **GPU**: A100 40GB optimized with FP16
- **Data**: 5.4K samples, 47 tabular + 49 sequence features

## 1️⃣ Setup & Installation

In [None]:
#!/usr/bin/env python3
"""
🚀 Meme Stock Deep Learning Pipeline - A100 GPU Optimized
Fixed version with robust data loading and error handling
"""

# Install required packages
import subprocess
import sys

def install_packages():
    """Install required packages for A100 GPU"""
    packages = [
        "pytorch-tabnet",
        "transformers",
        "optuna",
        "plotly",
        "scikit-learn",
        "scipy"
    ]
    
    for package in packages:
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
            print(f"✅ {package} installed")
        except:
            print(f"⚠️ {package} may already be installed")

# Only install in Colab
try:
    import google.colab
    IN_COLAB = True
    print("🎯 Running in Google Colab")
    install_packages()
except ImportError:
    IN_COLAB = False
    print("💻 Running locally")

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import json
import glob
import warnings
from datetime import datetime
from typing import Tuple, List, Dict, Optional
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ML/DL libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.cuda.amp import autocast, GradScaler

# Sklearn
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import spearmanr, pearsonr

# TabNet
from pytorch_tabnet.tab_model import TabNetRegressor

print("✅ All libraries imported successfully")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2️⃣ GPU Setup & Optimization

In [None]:
def load_all_data():
    """Load all data with robust error handling"""
    
    # Find metadata file
    metadata_files = glob.glob('*metadata*.json')
    if not metadata_files:
        # Try colab_datasets directory
        metadata_files = glob.glob('data/colab_datasets/*metadata*.json')
    
    if not metadata_files:
        raise FileNotFoundError("❌ No metadata file found!")
    
    # Load metadata
    with open(metadata_files[0], 'r') as f:
        metadata = json.load(f)
    
    timestamp = metadata['timestamp']
    print(f"📊 Loading data with timestamp: {timestamp}")
    
    # Try different paths
    paths_to_try = ['', 'data/colab_datasets/']
    
    for path in paths_to_try:
        try:
            # Load tabular data
            train_df = pd.read_csv(f'{path}tabular_train_{timestamp}.csv')
            val_df = pd.read_csv(f'{path}tabular_val_{timestamp}.csv')
            test_df = pd.read_csv(f'{path}tabular_test_{timestamp}.csv')
            
            # Load sequence data with allow_pickle=True to handle object arrays
            sequences_data = np.load(f'{path}sequences_{timestamp}.npz', allow_pickle=True)
            
            print(f"✅ Data loaded from {path if path else 'current directory'}")
            break
        except FileNotFoundError:
            continue
    else:
        raise FileNotFoundError("❌ Could not find data files!")
    
    # Convert dates
    for df in [train_df, val_df, test_df]:
        df['date'] = pd.to_datetime(df['date'])
    
    print(f"\n📈 Data loaded successfully:")
    print(f"   Train: {len(train_df):,} samples")
    print(f"   Val: {len(val_df):,} samples")
    print(f"   Test: {len(test_df):,} samples")
    print(f"   Features: {len(metadata['tabular_features'])}")
    print(f"   Tickers: {metadata['tickers']}")
    
    return train_df, val_df, test_df, sequences_data, metadata

# Load data
train_df, val_df, test_df, sequences_data, metadata = load_all_data()

## 3️⃣ Data Loading

In [None]:
def prepare_sequence_data_fixed(sequences_data, metadata):
    """Prepare sequence data with robust error handling"""
    
    print("🔄 Preparing sequence data...")
    
    all_sequences = []
    all_targets = []
    all_dates = []
    target_features = 48  # Most tickers have 48 features
    
    # Process each ticker
    for ticker in metadata['tickers']:
        seq_key = f'{ticker}_sequences'
        
        if seq_key in sequences_data:
            sequences = sequences_data[seq_key]
            targets = sequences_data[f'{ticker}_targets_1d']
            dates = sequences_data[f'{ticker}_dates']
            
            # Handle object dtype (string columns)
            if sequences.dtype == object:
                print(f"   ⚠️ {ticker}: Cleaning object dtype...")
                
                # Find numeric columns only
                numeric_cols = []
                for i in range(sequences.shape[2]):
                    try:
                        test_col = sequences[:, :, i].astype(np.float32)
                        if np.any(np.isfinite(test_col)):
                            numeric_cols.append(i)
                    except:
                        continue
                
                if numeric_cols:
                    sequences = sequences[:, :, numeric_cols].astype(np.float32)
                else:
                    print(f"   ❌ {ticker}: No numeric columns, skipping")
                    continue
            else:
                sequences = sequences.astype(np.float32)
            
            # Clean NaN/Inf
            sequences = np.nan_to_num(sequences, nan=0.0, posinf=0.0, neginf=0.0)
            
            # Fix AMC (or any ticker with 47 features) by adding a zero column
            if sequences.shape[2] == 47:
                print(f"   🔧 {ticker}: Adding 1 zero column (47 -> 48 features)")
                zero_column = np.zeros((sequences.shape[0], sequences.shape[1], 1), dtype=np.float32)
                sequences = np.concatenate([sequences, zero_column], axis=2)
            
            all_sequences.append(sequences)
            all_targets.extend(targets)
            all_dates.extend(dates)
            
            print(f"   ✅ {ticker}: {sequences.shape}")
    
    if not all_sequences:
        print("❌ No valid sequences found")
        return None, None, None
    
    # Stack all sequences (now all have 48 features)
    X_seq = np.vstack(all_sequences).astype(np.float32)
    y_seq = np.array(all_targets, dtype=np.float32)
    
    print(f"\n✅ Sequence data prepared:")
    print(f"   X_seq: {X_seq.shape}")
    print(f"   y_seq: {y_seq.shape}")
    
    # Split by date
    dates_array = np.array([pd.to_datetime(d) for d in all_dates])
    
    train_end = pd.to_datetime('2023-02-02')
    val_end = pd.to_datetime('2023-07-15')
    
    train_mask = dates_array <= train_end
    val_mask = (dates_array > train_end) & (dates_array <= val_end)
    test_mask = dates_array > val_end
    
    X_train_seq = X_seq[train_mask]
    X_val_seq = X_seq[val_mask]
    X_test_seq = X_seq[test_mask]
    
    y_train_seq = y_seq[train_mask]
    y_val_seq = y_seq[val_mask]
    y_test_seq = y_seq[test_mask]
    
    print(f"\n📊 Sequence data split:")
    print(f"   Train: {X_train_seq.shape}")
    print(f"   Val: {X_val_seq.shape}")
    print(f"   Test: {X_test_seq.shape}")
    
    return (X_train_seq, X_val_seq, X_test_seq,
            y_train_seq, y_val_seq, y_test_seq)

# Prepare sequence data
try:
    # Load with allow_pickle=True to handle object arrays
    sequences_data_dict = dict(sequences_data)  # Convert to dict if needed
    seq_data = prepare_sequence_data_fixed(sequences_data_dict, metadata)
    if seq_data[0] is not None:
        X_train_seq, X_val_seq, X_test_seq, y_train_seq, y_val_seq, y_test_seq = seq_data
        USE_SEQUENCE_MODELS = True
        print("✅ Sequence models enabled")
    else:
        USE_SEQUENCE_MODELS = False
        print("⚠️ Sequence models disabled")
except Exception as e:
    print(f"⚠️ Sequence preparation failed: {e}")
    USE_SEQUENCE_MODELS = False

In [None]:
def load_all_data():
    """Load all data with robust error handling"""
    
    # Find metadata file
    metadata_files = glob.glob('*metadata*.json')
    if not metadata_files:
        # Try colab_datasets directory
        metadata_files = glob.glob('data/colab_datasets/*metadata*.json')
    
    if not metadata_files:
        raise FileNotFoundError("❌ No metadata file found!")
    
    # Load metadata
    with open(metadata_files[0], 'r') as f:
        metadata = json.load(f)
    
    timestamp = metadata['timestamp']
    print(f"📊 Loading data with timestamp: {timestamp}")
    
    # Try different paths
    paths_to_try = ['', 'data/colab_datasets/']
    
    for path in paths_to_try:
        try:
            # Load tabular data
            train_df = pd.read_csv(f'{path}tabular_train_{timestamp}.csv')
            val_df = pd.read_csv(f'{path}tabular_val_{timestamp}.csv')
            test_df = pd.read_csv(f'{path}tabular_test_{timestamp}.csv')
            
            # Load sequence data
            sequences_data = np.load(f'{path}sequences_{timestamp}.npz')
            
            print(f"✅ Data loaded from {path if path else 'current directory'}")
            break
        except FileNotFoundError:
            continue
    else:
        raise FileNotFoundError("❌ Could not find data files!")
    
    # Convert dates
    for df in [train_df, val_df, test_df]:
        df['date'] = pd.to_datetime(df['date'])
    
    print(f"\n📈 Data loaded successfully:")
    print(f"   Train: {len(train_df):,} samples")
    print(f"   Val: {len(val_df):,} samples")
    print(f"   Test: {len(test_df):,} samples")
    print(f"   Features: {len(metadata['tabular_features'])}")
    print(f"   Tickers: {metadata['tickers']}")
    
    return train_df, val_df, test_df, sequences_data, metadata

# Load data
train_df, val_df, test_df, sequences_data, metadata = load_all_data()

## 4️⃣ Data Preparation

In [None]:
def prepare_tabular_data(train_df, val_df, test_df, target='y1d'):
    """Prepare tabular data for modeling"""
    
    # Feature columns (exclude metadata and targets)
    exclude_cols = ['date', 'ticker', 'ticker_type', 'y1d', 'y5d', 
                   'alpha_1d', 'alpha_5d', 'direction_1d', 'direction_5d']
    feature_cols = [col for col in train_df.columns if col not in exclude_cols]
    
    print(f"📊 Using {len(feature_cols)} features")
    
    # Prepare features
    X_train = train_df[feature_cols].fillna(0).values.astype(np.float32)
    X_val = val_df[feature_cols].fillna(0).values.astype(np.float32)
    X_test = test_df[feature_cols].fillna(0).values.astype(np.float32)
    
    # Prepare targets
    y_train = train_df[target].values.astype(np.float32)
    y_val = val_df[target].values.astype(np.float32)
    y_test = test_df[target].values.astype(np.float32)
    
    # Scale features
    scaler = RobustScaler()  # Robust to outliers
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    print(f"✅ Tabular data prepared")
    print(f"   X_train: {X_train_scaled.shape}")
    print(f"   y_train: {y_train.shape}")
    
    return (X_train_scaled, X_val_scaled, X_test_scaled,
            y_train, y_val, y_test, feature_cols, scaler)

# Prepare tabular data
X_train, X_val, X_test, y_train, y_val, y_test, feature_cols, scaler = prepare_tabular_data(
    train_df, val_df, test_df
)

In [None]:
def prepare_sequence_data_fixed(sequences_data, metadata):
    """Prepare sequence data with robust error handling"""
    
    print("🔄 Preparing sequence data...")
    
    all_sequences = []
    all_targets = []
    all_dates = []
    
    # Process each ticker
    for ticker in metadata['tickers']:
        seq_key = f'{ticker}_sequences'
        
        if seq_key in sequences_data:
            sequences = sequences_data[seq_key]
            targets = sequences_data[f'{ticker}_targets_1d']
            dates = sequences_data[f'{ticker}_dates']
            
            # Handle object dtype (string columns)
            if sequences.dtype == object:
                print(f"   ⚠️ {ticker}: Cleaning object dtype...")
                
                # Find numeric columns only
                numeric_cols = []
                for i in range(sequences.shape[2]):
                    try:
                        test_col = sequences[:, :, i].astype(np.float32)
                        if np.any(np.isfinite(test_col)):
                            numeric_cols.append(i)
                    except:
                        continue
                
                if numeric_cols:
                    sequences = sequences[:, :, numeric_cols].astype(np.float32)
                else:
                    print(f"   ❌ {ticker}: No numeric columns, skipping")
                    continue
            else:
                sequences = sequences.astype(np.float32)
            
            # Clean NaN/Inf
            sequences = np.nan_to_num(sequences, nan=0.0, posinf=0.0, neginf=0.0)
            
            all_sequences.append(sequences)
            all_targets.extend(targets)
            all_dates.extend(dates)
            
            print(f"   ✅ {ticker}: {sequences.shape}")
    
    if not all_sequences:
        print("❌ No valid sequences found")
        return None, None, None
    
    # Stack all sequences
    X_seq = np.vstack(all_sequences).astype(np.float32)
    y_seq = np.array(all_targets, dtype=np.float32)
    
    print(f"\n✅ Sequence data prepared:")
    print(f"   X_seq: {X_seq.shape}")
    print(f"   y_seq: {y_seq.shape}")
    
    # Split by date
    dates_array = np.array([pd.to_datetime(d) for d in all_dates])
    
    train_end = pd.to_datetime('2023-02-02')
    val_end = pd.to_datetime('2023-07-15')
    
    train_mask = dates_array <= train_end
    val_mask = (dates_array > train_end) & (dates_array <= val_end)
    test_mask = dates_array > val_end
    
    X_train_seq = X_seq[train_mask]
    X_val_seq = X_seq[val_mask]
    X_test_seq = X_seq[test_mask]
    
    y_train_seq = y_seq[train_mask]
    y_val_seq = y_seq[val_mask]
    y_test_seq = y_seq[test_mask]
    
    print(f"\n📊 Sequence data split:")
    print(f"   Train: {X_train_seq.shape}")
    print(f"   Val: {X_val_seq.shape}")
    print(f"   Test: {X_test_seq.shape}")
    
    return (X_train_seq, X_val_seq, X_test_seq,
            y_train_seq, y_val_seq, y_test_seq)

# Prepare sequence data
try:
    seq_data = prepare_sequence_data_fixed(sequences_data, metadata)
    if seq_data[0] is not None:
        X_train_seq, X_val_seq, X_test_seq, y_train_seq, y_val_seq, y_test_seq = seq_data
        USE_SEQUENCE_MODELS = True
        print("✅ Sequence models enabled")
    else:
        USE_SEQUENCE_MODELS = False
        print("⚠️ Sequence models disabled")
except Exception as e:
    print(f"⚠️ Sequence preparation failed: {e}")
    USE_SEQUENCE_MODELS = False

## 5️⃣ Model Definitions

In [None]:
class DeepMLP(nn.Module):
    """Deep MLP with BatchNorm and Dropout"""
    
    def __init__(self, input_dim, hidden_dims=[512, 256, 128, 64], dropout=0.3):
        super(DeepMLP, self).__init__()
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_dim = hidden_dim
        
        # Output layer
        layers.append(nn.Linear(prev_dim, 1))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)


class LSTMModel(nn.Module):
    """LSTM for sequence modeling"""
    
    def __init__(self, input_size, hidden_size=256, num_layers=2, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers,
            batch_first=True, dropout=dropout if num_layers > 1 else 0
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        # Use last timestep
        last_output = lstm_out[:, -1, :]
        return self.fc(last_output)


class TransformerModel(nn.Module):
    """Transformer for sequence modeling"""
    
    def __init__(self, input_size, d_model=256, nhead=8, num_layers=4, dropout=0.2):
        super(TransformerModel, self).__init__()
        
        # Input projection
        self.input_projection = nn.Linear(input_size, d_model)
        
        # Positional encoding
        self.pos_encoder = nn.Parameter(torch.randn(1, 20, d_model))
        
        # Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        # Output layers
        self.fc = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        # Project input
        x = self.input_projection(x)
        
        # Add positional encoding
        x = x + self.pos_encoder[:, :x.size(1), :]
        
        # Transformer encoding
        x = self.transformer(x)
        
        # Global pooling
        x = x.mean(dim=1)
        
        return self.fc(x)

print("✅ Model architectures defined")

## 6️⃣ Training Functions

In [None]:
def calculate_ic_metrics(y_true, y_pred):
    """Calculate Information Coefficient metrics"""
    
    # Remove NaN values
    mask = np.isfinite(y_true) & np.isfinite(y_pred)
    if mask.sum() < 2:
        return {'ic': 0, 'rank_ic': 0, 'hit_rate': 0.5}
    
    y_true_clean = y_true[mask]
    y_pred_clean = y_pred[mask]
    
    # Calculate correlations
    ic, _ = pearsonr(y_pred_clean, y_true_clean)
    rank_ic, _ = spearmanr(y_pred_clean, y_true_clean)
    
    # Hit rate (directional accuracy)
    hit_rate = np.mean(np.sign(y_pred_clean) == np.sign(y_true_clean))
    
    return {
        'ic': ic if not np.isnan(ic) else 0,
        'rank_ic': rank_ic if not np.isnan(rank_ic) else 0,
        'hit_rate': hit_rate
    }


def train_model_with_amp(model, train_loader, val_loader, 
                         epochs=100, lr=0.001, device='cuda',
                         model_name="Model"):
    """Train model with mixed precision (AMP) for A100"""
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)
    
    # Mixed precision training
    scaler = GradScaler()
    
    train_losses = []
    val_ics = []
    best_ic = -float('inf')
    best_model_state = None
    patience_counter = 0
    
    print(f"\n🎯 Training {model_name}...")
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            
            # Mixed precision forward pass
            with autocast():
                outputs = model(batch_X).squeeze()
                loss = criterion(outputs, batch_y)
            
            # Backward pass
            scaler.scale(loss).backward()
            
            # Gradient clipping
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_predictions = []
        val_actuals = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X = batch_X.to(device)
                outputs = model(batch_X).squeeze()
                
                val_predictions.extend(outputs.cpu().numpy())
                val_actuals.extend(batch_y.numpy())
        
        # Calculate validation IC
        val_metrics = calculate_ic_metrics(
            np.array(val_actuals), 
            np.array(val_predictions)
        )
        val_ic = val_metrics['rank_ic']
        
        train_losses.append(train_loss / len(train_loader))
        val_ics.append(val_ic)
        
        # Learning rate scheduling
        scheduler.step(val_ic)
        
        # Early stopping
        if val_ic > best_ic:
            best_ic = val_ic
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
        
        # Print progress
        if epoch % 20 == 0:
            print(f"  Epoch {epoch:3d}: Loss={train_loss/len(train_loader):.4f}, "
                  f"Val IC={val_ic:.4f}, Best IC={best_ic:.4f}")
        
        # Early stopping
        if patience_counter >= 30:
            print(f"  Early stopping at epoch {epoch}")
            break
    
    # Load best model
    model.load_state_dict(best_model_state)
    print(f"✅ {model_name} training completed. Best IC: {best_ic:.4f}")
    
    return model, train_losses, val_ics, best_ic

print("✅ Training functions defined")

## 7️⃣ Model Training

In [None]:
# Training configuration
BATCH_SIZE = 256 if device.type == 'cuda' else 64
EPOCHS = 200
LEARNING_RATE = 0.001

print(f"📊 Training Configuration:")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Epochs: {EPOCHS}")
print(f"   Learning Rate: {LEARNING_RATE}")
print(f"   Device: {device}")

In [None]:
# Prepare data loaders
train_dataset = TensorDataset(
    torch.FloatTensor(X_train),
    torch.FloatTensor(y_train)
)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = TensorDataset(
    torch.FloatTensor(X_val),
    torch.FloatTensor(y_val)
)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"✅ Data loaders created")
print(f"   Train batches: {len(train_loader)}")
print(f"   Val batches: {len(val_loader)}")

In [None]:
# Train MLP
mlp_model = DeepMLP(X_train.shape[1], hidden_dims=[512, 256, 128, 64])
mlp_model, mlp_losses, mlp_ics, mlp_best_ic = train_model_with_amp(
    mlp_model, train_loader, val_loader,
    epochs=EPOCHS, lr=LEARNING_RATE, device=device,
    model_name="MLP"
)

In [None]:
# Train LSTM (if sequence data available)
if USE_SEQUENCE_MODELS:
    # Prepare sequence data loaders
    train_seq_dataset = TensorDataset(
        torch.FloatTensor(X_train_seq),
        torch.FloatTensor(y_train_seq)
    )
    train_seq_loader = DataLoader(train_seq_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    val_seq_dataset = TensorDataset(
        torch.FloatTensor(X_val_seq),
        torch.FloatTensor(y_val_seq)
    )
    val_seq_loader = DataLoader(val_seq_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Train LSTM
    lstm_model = LSTMModel(X_train_seq.shape[2], hidden_size=256, num_layers=2)
    lstm_model, lstm_losses, lstm_ics, lstm_best_ic = train_model_with_amp(
        lstm_model, train_seq_loader, val_seq_loader,
        epochs=EPOCHS, lr=LEARNING_RATE, device=device,
        model_name="LSTM"
    )
else:
    print("⚠️ LSTM training skipped (no sequence data)")

In [None]:
# Train Transformer (if sequence data available)
if USE_SEQUENCE_MODELS:
    # Train Transformer
    transformer_model = TransformerModel(
        X_train_seq.shape[2], d_model=256, nhead=8, num_layers=4
    )
    transformer_model, trans_losses, trans_ics, trans_best_ic = train_model_with_amp(
        transformer_model, train_seq_loader, val_seq_loader,
        epochs=EPOCHS, lr=LEARNING_RATE*0.5, device=device,
        model_name="Transformer"
    )
else:
    print("⚠️ Transformer training skipped (no sequence data)")

In [None]:
# Train TabNet
print("\n🎯 Training TabNet...")

tabnet_model = TabNetRegressor(
    n_d=32, n_a=32,
    n_steps=5,
    gamma=1.5,
    cat_idxs=[],
    cat_dims=[],
    cat_emb_dim=1,
    lambda_sparse=1e-4,
    momentum=0.3,
    clip_value=2.0,
    optimizer_fn=torch.optim.AdamW,
    optimizer_params=dict(lr=0.002, weight_decay=1e-5),
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
    scheduler_params=dict(patience=10, factor=0.5),
    mask_type="entmax",
    seed=42,
    verbose=0,
    device_name='cuda' if device.type == 'cuda' else 'cpu'
)

# Reshape targets to 2D for TabNet (required format)
y_train_2d = y_train.reshape(-1, 1)
y_val_2d = y_val.reshape(-1, 1)

# Train TabNet
tabnet_model.fit(
    X_train, y_train_2d,
    eval_set=[(X_val, y_val_2d)],
    eval_metric=['rmse'],
    max_epochs=100,
    patience=20,
    batch_size=256 if device.type == 'cuda' else 64
)

# Evaluate TabNet
tabnet_pred_val = tabnet_model.predict(X_val).flatten()  # Flatten back to 1D
tabnet_metrics = calculate_ic_metrics(y_val, tabnet_pred_val)
print(f"✅ TabNet training completed. Val IC: {tabnet_metrics['rank_ic']:.4f}")

## 8️⃣ Model Evaluation

In [None]:
def evaluate_model(model, X_test, y_test, model_name, device, is_sequence=False):
    """Evaluate model on test set"""
    
    if hasattr(model, 'predict'):
        # TabNet
        y_pred = model.predict(X_test)
        # Ensure predictions are 1D
        if len(y_pred.shape) > 1:
            y_pred = y_pred.flatten()
    else:
        # PyTorch model
        model.eval()
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X_test).to(device)
            y_pred = model(X_tensor).cpu().numpy().flatten()
    
    # Ensure y_test is also 1D
    if len(y_test.shape) > 1:
        y_test = y_test.flatten()
    
    # Calculate metrics
    metrics = calculate_ic_metrics(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    
    results = {
        'model': model_name,
        'ic': metrics['ic'],
        'rank_ic': metrics['rank_ic'],
        'hit_rate': metrics['hit_rate'],
        'rmse': rmse,
        'mae': mae
    }
    
    return results, y_pred

# Evaluate all models
results = []

# MLP
mlp_results, mlp_predictions = evaluate_model(
    mlp_model, X_test, y_test, 'MLP', device
)
results.append(mlp_results)

# TabNet
tabnet_results, tabnet_predictions = evaluate_model(
    tabnet_model, X_test, y_test, 'TabNet', device
)
results.append(tabnet_results)

# LSTM and Transformer
if USE_SEQUENCE_MODELS:
    lstm_results, lstm_predictions = evaluate_model(
        lstm_model, X_test_seq, y_test_seq, 'LSTM', device, is_sequence=True
    )
    results.append(lstm_results)
    
    trans_results, trans_predictions = evaluate_model(
        transformer_model, X_test_seq, y_test_seq, 'Transformer', device, is_sequence=True
    )
    results.append(trans_results)

# Display results
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('rank_ic', ascending=False)

print("\n" + "="*60)
print("📊 MODEL EVALUATION RESULTS")
print("="*60)
print(results_df.to_string(index=False))
print("="*60)

## 9️⃣ Ensemble Model

In [None]:
def create_ensemble_predictions(models_predictions, weights=None):
    """Create ensemble predictions"""
    
    if weights is None:
        # Equal weights
        weights = [1/len(models_predictions)] * len(models_predictions)
    
    # Weighted average
    ensemble_pred = np.zeros_like(models_predictions[0])
    for pred, weight in zip(models_predictions, weights):
        ensemble_pred += pred * weight
    
    return ensemble_pred

# Create ensemble
if USE_SEQUENCE_MODELS:
    # For sequence models, we need to align predictions
    # Use only tabular models for now
    ensemble_predictions = create_ensemble_predictions(
        [mlp_predictions, tabnet_predictions],
        weights=[0.6, 0.4]  # Give more weight to MLP
    )
else:
    ensemble_predictions = create_ensemble_predictions(
        [mlp_predictions, tabnet_predictions],
        weights=[0.6, 0.4]
    )

# Evaluate ensemble
ensemble_metrics = calculate_ic_metrics(y_test, ensemble_predictions)
ensemble_rmse = np.sqrt(mean_squared_error(y_test, ensemble_predictions))

print("\n🎯 ENSEMBLE RESULTS:")
print(f"   IC: {ensemble_metrics['ic']:.4f}")
print(f"   Rank IC: {ensemble_metrics['rank_ic']:.4f}")
print(f"   Hit Rate: {ensemble_metrics['hit_rate']:.3%}")
print(f"   RMSE: {ensemble_rmse:.4f}")

## 🔟 Visualization

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# MLP training
axes[0].plot(mlp_losses, label='Train Loss', alpha=0.7)
axes[0].set_title('MLP Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# IC evolution
axes[1].plot(mlp_ics, label='MLP', alpha=0.7)
if USE_SEQUENCE_MODELS:
    axes[1].plot(lstm_ics, label='LSTM', alpha=0.7)
    axes[1].plot(trans_ics, label='Transformer', alpha=0.7)
axes[1].axhline(y=0.03, color='r', linestyle='--', label='Target IC')
axes[1].set_title('Validation IC Evolution')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Rank IC')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Predictions vs Actual scatter plot
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# MLP predictions
axes[0].scatter(y_test, mlp_predictions, alpha=0.5, s=10)
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[0].set_title(f'MLP Predictions (IC={mlp_results["rank_ic"]:.4f})')
axes[0].set_xlabel('Actual Returns')
axes[0].set_ylabel('Predicted Returns')
axes[0].grid(True, alpha=0.3)

# Ensemble predictions
axes[1].scatter(y_test, ensemble_predictions, alpha=0.5, s=10)
axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[1].set_title(f'Ensemble Predictions (IC={ensemble_metrics["rank_ic"]:.4f})')
axes[1].set_xlabel('Actual Returns')
axes[1].set_ylabel('Predicted Returns')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 📊 Final Summary

In [None]:
# Final summary
print("\n" + "="*60)
print("🏆 FINAL RESULTS SUMMARY")
print("="*60)

# Best single model
best_model = results_df.iloc[0]
print(f"\n📌 Best Single Model: {best_model['model']}")
print(f"   Rank IC: {best_model['rank_ic']:.4f}")
print(f"   IC: {best_model['ic']:.4f}")
print(f"   Hit Rate: {best_model['hit_rate']:.3%}")
print(f"   RMSE: {best_model['rmse']:.4f}")

# Ensemble performance
print(f"\n🎯 Ensemble Model:")
print(f"   Rank IC: {ensemble_metrics['rank_ic']:.4f}")
print(f"   IC: {ensemble_metrics['ic']:.4f}")
print(f"   Hit Rate: {ensemble_metrics['hit_rate']:.3%}")
print(f"   RMSE: {ensemble_rmse:.4f}")

# Go/No-Go decision
best_ic = max(best_model['rank_ic'], ensemble_metrics['rank_ic'])
meets_threshold = best_ic >= 0.03

print("\n" + "="*60)
if meets_threshold:
    print("✅ GO DECISION: Model meets success criteria (IC ≥ 0.03)")
else:
    print(f"⚠️ CONTINUE: Current best IC={best_ic:.4f} < 0.03 threshold")
print("="*60)

# Save results
results_dict = {
    'timestamp': datetime.now().strftime('%Y%m%d_%H%M%S'),
    'models': results,
    'ensemble': {
        'ic': ensemble_metrics['ic'],
        'rank_ic': ensemble_metrics['rank_ic'],
        'hit_rate': ensemble_metrics['hit_rate'],
        'rmse': ensemble_rmse
    },
    'best_ic': best_ic,
    'meets_threshold': meets_threshold
}

# Save to JSON
import json
with open('deep_learning_results.json', 'w') as f:
    json.dump(results_dict, f, indent=2, default=str)

print("\n✅ Results saved to deep_learning_results.json")