# Part 2: LSTM Training (Top 10)
Enhanced LSTM for multi-asset training across 10 cryptocurrencies.

### CELL 1: Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import tensorflow as tf
print(f"ðŸ”¥ GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"   TensorFlow version: {tf.__version__}")

!pip install -q pandas numpy scikit-learn tensorflow

### CELL 2: Load Master Dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load combined data for all 10 cryptos
df = pd.read_csv('/content/drive/MyDrive/crypto_bot/data/master_top10_2y.csv')

print(f"ðŸ“Š Loaded master dataset:")
print(f"   Total rows: {len(df):,}")
print(f"   Features: {len(df.columns)}")
print(f"   Symbols: {', '.join(df['symbol'].unique())}")
print(f"   Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")

### CELL 3: Prepare Sequences (All Symbols)

In [None]:
from tqdm.notebook import tqdm

def create_sequences_multiasset(df, lookback=60, forecast=24):
    """
    Create LSTM sequences from multiple assets
    """
    
    feature_cols = [
        'close', 'volume', 'rsi', 'macd', 'macd_signal',
        'sma_20', 'sma_50', 'bb_upper', 'bb_lower',
        'atr_percent', 'volume_ratio', 'momentum',
        'stoch_k', 'adx', 'williams_r', 'cci'
    ]
    
    X_all = []
    y_all = []
    
    # Process each symbol separately
    for symbol in tqdm(df['symbol'].unique(), desc="Processing symbols"):
        symbol_df = df[df['symbol'] == symbol].reset_index(drop=True)
        
        # Scale features
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(symbol_df[feature_cols])
        
        # Create sequences
        for i in range(lookback, len(scaled_data) - forecast):
            X_all.append(scaled_data[i-lookback:i])
            
            # Target: binary (price up or down in 24h)
            current_price = symbol_df.iloc[i]['close']
            future_price = symbol_df.iloc[i + forecast]['close']
            y_all.append(1 if future_price > current_price else 0)
    
    return np.array(X_all), np.array(y_all)

print("ðŸ”„ Creating sequences from all 10 cryptocurrencies...")
print("   This will take 5-10 minutes...")

X, y = create_sequences_multiasset(df, lookback=60, forecast=24)

print(f"\nâœ… Sequences created:")
print(f"   X shape: {X.shape}")  # (samples, 60, 16)
print(f"   y shape: {y.shape}")
print(f"   Positive rate: {y.mean():.1%}")

# Train/val/test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

print(f"\nðŸ“Š Data splits:")
print(f"   Train: {len(X_train):,} ({len(X_train)/len(X)*100:.1f}%)")
print(f"   Val: {len(X_val):,} ({len(X_val)/len(X)*100:.1f}%)")
print(f"   Test: {len(X_test):,} ({len(X_test)/len(X)*100:.1f}%)")

### CELL 4: Build Enhanced LSTM Model

In [None]:
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.mixed_precision import Policy

# Mixed precision
policy = Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

def build_lstm_top10(input_shape):
    """Enhanced LSTM for multi-asset training"""
    
    model = models.Sequential([
        # Deeper architecture for more complex patterns
        layers.LSTM(256, return_sequences=True, input_shape=input_shape),
        layers.Dropout(0.3),
        layers.BatchNormalization(),
        
        layers.LSTM(128, return_sequences=True),
        layers.Dropout(0.3),
        layers.BatchNormalization(),
        
        layers.LSTM(64),
        layers.Dropout(0.3),
        
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        
        layers.Dense(1, activation='sigmoid', dtype='float32')
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc'), 
                 tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
    )
    
    return model

model = build_lstm_top10(input_shape=(X_train.shape[1], X_train.shape[2]))
model.summary()

### CELL 5: Train LSTM Model

In [None]:
from datetime import datetime

checkpoint_path = '/content/drive/MyDrive/crypto_bot/models/lstm_top10_best.h5'

model_callbacks = [
    callbacks.EarlyStopping(
        patience=20,
        restore_best_weights=True,
        monitor='val_auc',
        mode='max'
    ),
    callbacks.ModelCheckpoint(
        checkpoint_path,
        save_best_only=True,
        monitor='val_auc',
        mode='max'
    ),
    callbacks.ReduceLROnPlateau(
        factor=0.5,
        patience=7,
        min_lr=1e-7,
        monitor='val_loss'
    )
]

print("ðŸš€ Training LSTM on Top 10 Cryptocurrencies...")
print("   Training set: {:,} samples".format(len(X_train)))
print("   This will take 2-3 hours on GPU...\n")

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=120,
    batch_size=512,  # Large batch for efficiency
    callbacks=model_callbacks,
    verbose=1
)

### CELL 6: Evaluate & Save

In [None]:
# Evaluate on test set
results = model.evaluate(X_test, y_test, verbose=0)

print("\n" + "="*60)
print("ðŸ“Š LSTM MODEL RESULTS (TOP 10 CRYPTOS)")
print("="*60)
print(f"Test Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.2%}")
print(f"Test AUC: {results[2]:.4f}")
print(f"Test Precision: {results[3]:.4f}")
print(f"Test Recall: {results[4]:.4f}")

# Save final model
final_path = '/content/drive/MyDrive/crypto_bot/models/lstm_top10_final.h5'
model.save(final_path)

print(f"\nâœ… Model saved to: {final_path}")
print("âœ… LSTM training complete!")
print("\nðŸš€ Next: Run notebook 03_train_xgboost_top10.ipynb")