# Deep Learning Trading Model (Stability Optimized)

This notebook implements a deep learning model for trading using:

- LSTM + CNN + Attention architecture
- Multi-symbol training with market regime awareness
- Numerical stability improvements
- Technical indicators and market context
- Custom loss functions with stability

In [1]:
# Install required packages
!pip install tensorflow pandas numpy scikit-learn yfinance plotly ta

import tensorflow as tf
import numpy as np
import pandas as pd
import ta
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dense, Dropout, BatchNormalization,
    Conv1D, LayerNormalization, Activation, Add, Concatenate
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau, Callback
from sklearn.preprocessing import RobustScaler
import yfinance as yf
import plotly.graph_objects as go
from datetime import datetime
import pickle

# Use float32 for better stability
tf.keras.backend.set_floatx('float32')

# Configure memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

print("\nGPU Configuration:")
print("Number of GPUs:", len(gpus))
print("Data Type:", tf.keras.backend.floatx())
print("TensorFlow version:", tf.__version__)

# Verify GPU is being used
print("\nGPU Available:", tf.test.is_built_with_cuda())
print("GPU Device:", tf.test.gpu_device_name())

In [2]:
def calculate_indicators(df):
    """Calculate technical indicators using the ta library."""
    # Initialize indicators
    bb_indicator = ta.volatility.BollingerBands(df['Close'])
    
    # Moving averages
    df['sma_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['sma_50'] = ta.trend.sma_indicator(df['Close'], window=50)
    df['sma_200'] = ta.trend.sma_indicator(df['Close'], window=200)
    
    df['ema_10'] = ta.trend.ema_indicator(df['Close'], window=10)
    df['ema_20'] = ta.trend.ema_indicator(df['Close'], window=20)
    df['ema_50'] = ta.trend.ema_indicator(df['Close'], window=50)
    
    # Momentum
    df['rsi'] = ta.momentum.rsi(df['Close'], window=14)
    df['macd'] = ta.trend.macd_diff(df['Close'])
    df['mom_1d'] = df['Close'].pct_change(1)
    df['mom_5d'] = df['Close'].pct_change(5)
    df['mom_10d'] = df['Close'].pct_change(10)
    df['mom_20d'] = df['Close'].pct_change(20)
    
    # Volatility
    df['atr'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'])
    df['atr_pct'] = df['atr'] / df['Close']
    df['bb_high'] = bb_indicator.bollinger_hband()
    df['bb_mid'] = bb_indicator.bollinger_mavg()
    df['bb_low'] = bb_indicator.bollinger_lband()
    df['bb_width'] = (df['bb_high'] - df['bb_low']) / df['bb_mid']
    
    # Volume
    df['volume_ma_20'] = ta.trend.sma_indicator(df['Volume'], window=20)
    df['volume_ma_50'] = ta.trend.sma_indicator(df['Volume'], window=50)
    df['volume_ratio'] = df['Volume'] / df['volume_ma_20']
    df['volume_trend'] = df['Volume'].pct_change(5)
    
    # Price patterns
    df['high_low_range'] = (df['High'] - df['Low']) / df['Close']
    df['close_to_high'] = (df['High'] - df['Close']) / df['Close']
    df['close_to_low'] = (df['Close'] - df['Low']) / df['Close']
    
    # Trend strength
    df['trend_strength'] = np.where(
        df['Close'] > df['sma_50'],
        (df['Close'] - df['sma_50']) / df['sma_50'],
        -(df['sma_50'] - df['Close']) / df['sma_50']
    )
    
    return df.dropna()

In [3]:
def calculate_cross_asset_features(tech_data):
    """Calculate cross-asset relationships between tech stocks."""
    features = {}
    
    # Calculate returns for correlation
    returns = {}
    for symbol in tech_data:
        returns[symbol] = tech_data[symbol]['Close'].pct_change()
    
    # Rolling correlations and relative strength
    for symbol in tech_data:
        # Initialize features for this symbol
        features[symbol] = pd.DataFrame(index=tech_data[symbol].index)
        
        # Calculate correlations with other stocks
        for other in tech_data:
            if other != symbol:
                # Rolling correlation
                corr = returns[symbol].rolling(20).corr(returns[other])
                features[symbol][f'corr_{other}'] = corr
                
                # Relative strength
                rel_strength = (tech_data[symbol]['Close'] / 
                               tech_data[symbol]['Close'].shift(20)) / \
                              (tech_data[other]['Close'] / 
                               tech_data[other]['Close'].shift(20))
                features[symbol][f'rel_strength_{other}'] = rel_strength
                
                # Relative volume
                rel_volume = (tech_data[symbol]['Volume'] / 
                             tech_data[symbol]['volume_ma_20']) / \
                            (tech_data[other]['Volume'] / 
                             tech_data[other]['volume_ma_20'])
                features[symbol][f'rel_volume_{other}'] = rel_volume
        
        # Sector-wide features
        tech_returns = pd.DataFrame([returns[s] for s in tech_data]).T
        
        # Stock's return vs sector average
        sector_avg_return = tech_returns.mean(axis=1)
        features[symbol]['sector_relative_return'] = \
            returns[symbol] - sector_avg_return
        
        # Stock's momentum vs sector average
        stock_mom = tech_data[symbol]['mom_20d']
        sector_mom = pd.DataFrame([tech_data[s]['mom_20d'] 
                                  for s in tech_data]).T.mean(axis=1)
        features[symbol]['sector_relative_momentum'] = \
            stock_mom - sector_mom
        
        # Stock's volatility vs sector average
        stock_vol = tech_data[symbol]['atr_pct']
        sector_vol = pd.DataFrame([tech_data[s]['atr_pct'] 
                                  for s in tech_data]).T.mean(axis=1)
        features[symbol]['sector_relative_volatility'] = \
            stock_vol - sector_vol
        
        # Number of sector stocks above their SMAs
        sma_signals = pd.DataFrame([
            tech_data[s]['Close'] > tech_data[s]['sma_50']
            for s in tech_data
        ]).T
        features[symbol]['sector_sma_strength'] = \
            sma_signals.sum(axis=1) / len(tech_data)
    
    return features

In [4]:
def calculate_market_regimes(data):
    """Calculate market regime features from ETF data."""
    # Get market context data
    spy_data = data['SPY'].copy()
    qqq_data = data['QQQ'].copy()
    uvxy_data = data['UVXY'].copy()
    sqqq_data = data['SQQQ'].copy()
    tlt_data = data['TLT'].copy()
    
    # Market trend regime
    spy_data['market_trend'] = spy_data['Close'].pct_change(20)
    spy_data['market_regime'] = pd.qcut(
        spy_data['market_trend'],
        q=5,
        labels=['strong_down', 'down', 'neutral', 'up', 'strong_up']
    )
    
    # Tech sector regime
    qqq_data['tech_trend'] = qqq_data['Close'].pct_change(20)
    qqq_data['tech_regime'] = pd.qcut(
        qqq_data['tech_trend'],
        q=5,
        labels=['tech_strong_down', 'tech_down', 'tech_neutral', 'tech_up', 'tech_strong_up']
    )
    
    # Volatility regime
    uvxy_data['volatility'] = uvxy_data['Close'].pct_change(5)
    uvxy_data['volatility_regime'] = pd.qcut(
        uvxy_data['volatility'],
        q=5,
        labels=['very_low', 'low', 'normal', 'high', 'very_high']
    )
    
    # Bear market signals
    sqqq_data['bear_signal'] = sqqq_data['Close'].pct_change(10)
    sqqq_data['bear_regime'] = pd.qcut(
        sqqq_data['bear_signal'],
        q=5,
        labels=['strong_bull', 'bull', 'neutral', 'bear', 'strong_bear']
    )
    
    # Interest rate regime
    tlt_data['rates_trend'] = tlt_data['Close'].pct_change(20)
    tlt_data['rates_regime'] = pd.qcut(
        tlt_data['rates_trend'],
        q=5,
        labels=['rates_up_strong', 'rates_up', 'rates_neutral', 'rates_down', 'rates_down_strong']
    )
    
    # Additional market context features
    context = {
        'market_regime': spy_data['market_regime'],
        'tech_regime': qqq_data['tech_regime'],
        'volatility_regime': uvxy_data['volatility_regime'],
        'bear_regime': sqqq_data['bear_regime'],
        'rates_regime': tlt_data['rates_regime'],
        
        # Relative strength between markets
        'tech_vs_spy': (qqq_data['Close'] / qqq_data['Close'].shift(20)) / \
                      (spy_data['Close'] / spy_data['Close'].shift(20)),
                      
        # Volatility trends
        'vol_trend': uvxy_data['Close'].pct_change(5).rolling(5).mean(),
        
        # Market breadth
        'market_momentum': spy_data['mom_20d'],
        'tech_momentum': qqq_data['mom_20d'],
        
        # Risk measures
        'market_risk': spy_data['atr_pct'].rolling(10).mean(),
        'tech_risk': qqq_data['atr_pct'].rolling(10).mean()
    }
    
    return pd.DataFrame(context)

In [5]:
def download_and_prepare_data(tech_stocks, market_etfs, start_date='2023-01-01'):
    """Download and prepare data for multiple symbols with cross-asset relationships."""
    all_data = {}
    all_symbols = tech_stocks + market_etfs
    
    # Download and process all data
    for symbol in all_symbols:
        print(f"Processing {symbol}...")
        ticker = yf.Ticker(symbol)
        df = ticker.history(start=start_date)
        df = calculate_indicators(df)
        all_data[symbol] = df
    
    # Calculate cross-asset features for tech stocks
    tech_data = {symbol: all_data[symbol] for symbol in tech_stocks}
    cross_features = calculate_cross_asset_features(tech_data)
    
    # Calculate market regimes and context
    market_context = calculate_market_regimes(all_data)
    
    # Combine all features
    processed_data = []
    for symbol in all_symbols:
        df = all_data[symbol].copy()
        
        # Add instrument type
        df['instrument_type'] = 'stock' if symbol in tech_stocks else 'etf'
        df['symbol'] = symbol
        
        # Add cross-asset features for tech stocks
        if symbol in tech_stocks:
            df = pd.concat([df, cross_features[symbol]], axis=1)
        
        # Add market context
        df = pd.concat([df, market_context], axis=1)
        
        processed_data.append(df)
    
    # Combine all data
    combined_data = pd.concat(processed_data, axis=0)
    
    return combined_data

# Define instruments
tech_stocks = ['AAPL', 'MSFT', 'GOOGL', 'NVDA', 'AMD', 'META']
market_etfs = ['SPY', 'QQQ', 'UVXY', 'SQQQ', 'TLT']

# Download and prepare data
data = download_and_prepare_data(tech_stocks, market_etfs)

# Plot sample data
fig = go.Figure()
for symbol in tech_stocks:
    symbol_data = data[data['symbol'] == symbol]
    fig.add_trace(go.Scatter(
        x=symbol_data.index,
        y=symbol_data['Close'],
        name=symbol
    ))
fig.update_layout(title='Tech Stock Prices', xaxis_title='Date', yaxis_title='Price')
fig.show()

In [6]:
def prepare_sequences(data, seq_length=60):
    """Prepare sequences with stability checks."""
    # Separate numerical and categorical columns
    datetime_cols = ['datetime']
    categorical_cols = ['symbol', 'instrument_type', 'market_regime', 'tech_regime', 
                       'volatility_regime', 'bear_regime', 'rates_regime']
    numerical_cols = [col for col in data.columns 
                     if col not in datetime_cols + categorical_cols]
    
    # Use RobustScaler for better outlier handling
    scaler = RobustScaler()
    
    # Scale numerical features
    numerical_data = pd.DataFrame(
        scaler.fit_transform(data[numerical_cols].astype(float)),
        columns=numerical_cols,
        index=data.index
    )
    
    # Process categorical features
    categorical_data = pd.DataFrame(index=data.index)
    for col in categorical_cols:
        if col in data.columns:
            # One-hot encode
            dummies = pd.get_dummies(data[col], prefix=col)
            categorical_data = pd.concat([categorical_data, dummies], axis=1)
    
    # Combine features
    processed_data = pd.concat([numerical_data, categorical_data], axis=1)
    
    # Convert to float32 for better numerical stability
    tensor_data = tf.cast(processed_data.values, tf.float32)
    
    # Create sequences
    sequences = []
    targets = []
    
    for i in range(len(tensor_data) - seq_length):
        seq = tensor_data[i:(i + seq_length)]
        target = tensor_data[i + seq_length]
        
        # Skip sequence if it contains any NaN or Inf
        if tf.reduce_any(tf.math.is_nan(seq)) or tf.reduce_any(tf.math.is_inf(seq)):
            continue
            
        sequences.append(seq)
        targets.append(target)
    
    if not sequences:
        raise ValueError("No valid sequences created")
    
    # Convert to tensors
    X = tf.stack(sequences)
    y = tf.stack(targets)
    
    # Calculate returns (using Close price index)
    close_idx = numerical_cols.index('Close')
    returns = (y[:, close_idx] - X[:, -1, close_idx]) / (X[:, -1, close_idx] + 1e-7)
    
    # Clip returns to prevent extreme values
    returns = tf.clip_by_value(returns, -0.1, 0.1)
    
    # Create direction labels
    directions = tf.cast(returns > 0, tf.float32)
    
    # Split train/val
    train_size = int(len(X) * 0.8)
    X_train, X_val = X[:train_size], X[train_size:]
    returns_train, returns_val = returns[:train_size], returns[train_size:]
    directions_train, directions_val = directions[:train_size], directions[train_size:]
    
    # Create datasets with prefetch
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (X_train, {
            'return_prediction': returns_train,
            'direction_prediction': directions_train
        })
    )
    
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (X_val, {
            'return_prediction': returns_val,
            'direction_prediction': directions_val
        })
    )
    
    print(f"Created {len(sequences)} valid sequences")
    print(f"Feature dimension: {X.shape[2]}")
    
    return train_dataset, val_dataset, scaler, X.shape[2]

# Prepare data for training
train_dataset, val_dataset, scaler, n_features = prepare_sequences(data)

In [7]:
class StabilityMonitorCallback(Callback):
    """Monitor numerical stability during training."""
    def __init__(self):
        super().__init__()
        self.nan_count = 0
        self.max_nans = 5  # Maximum number of NaN batches before stopping
        
    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        # Check for NaN/Inf values
        for metric, value in logs.items():
            if np.isnan(value) or np.isinf(value):
                self.nan_count += 1
                print(f"❌ {metric} has {value} at batch {batch}")
                if self.nan_count >= self.max_nans:
                    print("Too many NaN values, stopping training")
                    self.model.stop_training = True
                    return
                
                # Log recent values for debugging
                if hasattr(self.model, 'history') and self.model.history is not None:
                    print(f"Recent values: {self.model.history.history.get(metric, [])[-5:]}")
        
        # Reset counter if batch was good
        if all(not (np.isnan(value) or np.isinf(value)) for value in logs.values()):
            self.nan_count = 0

In [8]:
def custom_return_loss(y_true, y_pred):
    """Stable return prediction loss."""
    epsilon = 1e-7
    
    # Ensure inputs are float32
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Clip values
    y_pred = tf.clip_by_value(y_pred, -0.1, 0.1)
    y_true = tf.clip_by_value(y_true, -0.1, 0.1)
    
    # Use Huber loss for robustness
    huber = tf.keras.losses.Huber(delta=0.1)
    loss = huber(y_true, y_pred)
    
    # Check for NaN/Inf
    if tf.reduce_any(tf.math.is_nan(loss)) or tf.reduce_any(tf.math.is_inf(loss)):
        tf.print("\nWarning: NaN/Inf in return loss", loss)
        return tf.constant(0.1, dtype=tf.float32)  # Fallback value
    
    return loss

def custom_direction_loss(y_true, y_pred):
    """Stable direction prediction loss."""
    epsilon = 1e-7
    
    # Convert returns to direction and ensure float32
    y_true_dir = tf.cast(y_true, tf.float32)  # Already binary
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Ensure predictions are between epsilon and 1-epsilon
    y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
    
    # Binary cross entropy with stability
    bce = -(y_true_dir * tf.math.log(y_pred + epsilon) +
            (1 - y_true_dir) * tf.math.log(1 - y_pred + epsilon))
    loss = tf.reduce_mean(bce)
    
    # Check for NaN/Inf
    if tf.reduce_any(tf.math.is_nan(loss)) or tf.reduce_any(tf.math.is_inf(loss)):
        tf.print("\nWarning: NaN/Inf in direction loss", loss)
        return tf.constant(0.1, dtype=tf.float32)  # Fallback value
    
    return loss

In [9]:
def build_model(seq_length, n_features):
    """Build model optimized for stability."""
    # Input layer with batch normalization
    inputs = Input(shape=(seq_length, n_features))
    x = BatchNormalization()(inputs)
    
    # Project input to match CNN output dimension
    x_proj = Conv1D(128, 1, padding='same',
                    kernel_initializer='he_normal',
                    kernel_regularizer=tf.keras.regularizers.l2(1e-5))(x)
    x_proj = LayerNormalization()(x_proj)
    
    # LSTM branch with layer normalization
    lstm = LSTM(128, return_sequences=True, 
               recurrent_initializer='glorot_uniform',
               kernel_regularizer=tf.keras.regularizers.l2(1e-5),
               recurrent_regularizer=tf.keras.regularizers.l2(1e-5))(x)
    lstm = LayerNormalization()(lstm)
    lstm = Dropout(0.2)(lstm)
    
    lstm2 = LSTM(128, return_sequences=True,
                recurrent_initializer='glorot_uniform',
                kernel_regularizer=tf.keras.regularizers.l2(1e-5),
                recurrent_regularizer=tf.keras.regularizers.l2(1e-5))(lstm)
    lstm2 = LayerNormalization()(lstm2)
    lstm2 = Dropout(0.2)(lstm2)
    
    # CNN branch with residual connections
    conv1 = Conv1D(128, 3, padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=tf.keras.regularizers.l2(1e-5))(x)
    conv1 = LayerNormalization()(conv1)
    conv1 = Activation('relu')(conv1)
    conv1 = Add()([conv1, x_proj])  # Residual connection with projected input
    
    conv2 = Conv1D(128, 3, padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=tf.keras.regularizers.l2(1e-5))(conv1)
    conv2 = LayerNormalization()(conv2)
    conv2 = Activation('relu')(conv2)
    conv2 = Add()([conv2, conv1])  # Residual connection
    
    # Multi-head attention
    attention = tf.keras.layers.MultiHeadAttention(
        num_heads=8,
        key_dim=32
    )(lstm2, lstm2)
    attention = LayerNormalization()(attention)
    
    # Combine branches
    concat = Concatenate()([conv2, attention])
    
    # Global features
    pooled = tf.keras.layers.GlobalAveragePooling1D()(concat)
    
    # Dense layers with batch normalization
    dense1 = Dense(256, kernel_initializer='he_normal',
                 kernel_regularizer=tf.keras.regularizers.l2(1e-5))(pooled)
    dense1 = BatchNormalization()(dense1)
    dense1 = Activation('relu')(dense1)
    dense1 = Dropout(0.3)(dense1)
    
    dense2 = Dense(128, kernel_initializer='he_normal',
                 kernel_regularizer=tf.keras.regularizers.l2(1e-5))(dense1)
    dense2 = BatchNormalization()(dense2)
    dense2 = Activation('relu')(dense2)
    dense2 = Dropout(0.2)(dense2)
    
    # Output heads with stability
    return_pred = Dense(1, name='return_prediction',
                      kernel_initializer='glorot_normal',
                      kernel_regularizer=tf.keras.regularizers.l2(1e-5))(dense2)
    direction_pred = Dense(1, activation='sigmoid', name='direction_prediction',
                         kernel_initializer='glorot_normal',
                         kernel_regularizer=tf.keras.regularizers.l2(1e-5))(dense2)
    
    model = Model(inputs=inputs, outputs=[return_pred, direction_pred])
    return model

In [None]:
# Build and compile model with stability improvements
model = build_model(60, n_features)

# Use a lower learning rate and gradient clipping
optimizer = Adam(learning_rate=0.0001, clipnorm=0.5)

model.compile(
    optimizer=optimizer,
    loss={
        'return_prediction': custom_return_loss,
        'direction_prediction': custom_direction_loss
    },
    loss_weights={
        'return_prediction': 0.3,
        'direction_prediction': 0.7
    },
    metrics={
        'return_prediction': [
            tf.keras.metrics.MeanAbsoluteError(name='mae'),
            tf.keras.metrics.MeanSquaredError(name='mse')
        ],
        'direction_prediction': [
            tf.keras.metrics.BinaryAccuracy(name='accuracy', threshold=0.5)
        ]
    }
)

model.summary()

# Train with stability improvements
callbacks = [
    ModelCheckpoint(
        'deep_trading_model_a100.keras',
        monitor='val_loss',
        save_best_only=True,
        mode='min',
        verbose=1
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=15,  # Increased patience
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    ),
    StabilityMonitorCallback(),
    TensorBoard(
        log_dir=f'./logs/{datetime.now().strftime("%Y%m%d-%H%M%S")}',
        histogram_freq=1,
        update_freq='batch'
    )
]

# Use smaller batch size
batch_size = 16  # Reduced from 32
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    callbacks=callbacks,
    verbose=1
)