# Trading Strategy ML - Google Colab Setup

This notebook sets up and runs the Multi-Factor Momentum Trading Strategy with ML Enhancement on Google Colab with GPU support.

## Features
- GPU-accelerated training
- Real-time data collection
- Advanced ML models (CNN+LSTM)
- Comprehensive backtesting
- Performance analysis


## 1. Setup and Installation


In [None]:
# Check GPU availability
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))
print("CUDA available:", tf.test.is_built_with_cuda())

# Enable GPU memory growth
if tf.config.list_physical_devices('GPU'):
    try:
        for gpu in tf.config.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(f"GPU memory growth error: {e}")


In [None]:
# Install required packages
!pip install -q yfinance alpha-vantage pandas-datareader
!pip install -q TA-Lib
!pip install -q tensorflow-gpu
!pip install -q torch torchvision
!pip install -q scikit-learn xgboost optuna lightgbm
!pip install -q backtrader zipline-reloaded arch quantlib
!pip install -q plotly matplotlib seaborn streamlit
!pip install -q empyrical ffn pyfolio
!pip install -q python-dotenv requests tqdm joblib

print("All packages installed successfully!")


In [None]:
# Clone the repository (replace with your GitHub URL)
!git clone https://github.com/CatalinMoldova/trading-strategy-ml.git

# Change to the project directory
%cd trading-strategy-ml

# Install project requirements
!pip install -r requirements_colab.txt

print("Repository cloned and requirements installed!")


## 2. Import Libraries and Setup


In [None]:
# Import necessary libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Add project src to path
sys.path.append('src')

# Import project modules
from data_pipeline.market_data_collector import MarketDataCollector
from data_pipeline.indicator_engine import IndicatorEngine
from data_pipeline.feature_engineer import FeatureEngineer
from ml_models.cnn_lstm_model import CNNLSTMModel
from ml_models.random_forest_model import RandomForestModel
from ml_models.ensemble_predictor import EnsemblePredictor
from strategy.signal_generator import SignalGenerator
from strategy.position_sizer import PositionSizer
from strategy.risk_manager import RiskManager
from backtesting.backtest_engine import BacktestEngine
from backtesting.performance_analyzer import PerformanceAnalyzer

print("All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU devices: {tf.config.list_physical_devices('GPU')}")


## 3. Data Collection and Preparation


In [None]:
# Initialize data collector
collector = MarketDataCollector()

# Define symbols to trade
symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'NVDA', 'META', 'NFLX']

# Collect historical data
print("Collecting historical data...")
data = {}
for symbol in symbols:
    try:
        df = collector.get_historical_data(symbol, period='2y', interval='1d')
        data[symbol] = df
        print(f"✓ {symbol}: {len(df)} records")
    except Exception as e:
        print(f"✗ {symbol}: Error - {e}")

print(f"\nData collection complete! Collected data for {len(data)} symbols.")


## 4. Model Training with GPU


In [None]:
# Train CNN+LSTM model with GPU
print("Training CNN+LSTM model...")

# Initialize model
cnn_lstm = CNNLSTMModel(
    time_steps=60,
    n_features=20,  # Adjust based on your features
    learning_rate=0.001
)

# Build model
model = cnn_lstm.build_model()
print(f"Model built with {model.count_params()} parameters")

# Prepare training data (simplified example)
# In practice, you'd use your actual processed data
X_train = np.random.randn(1000, 60, 20)  # Example data
y_train = np.random.randn(1000, 1)        # Example targets

# Train model
history = cnn_lstm.train(
    X_train, y_train,
    epochs=10,  # Reduced for demo
    batch_size=32,
    validation_split=0.2
)

print("CNN+LSTM training complete!")

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.show()


## 5. Save Your Work


In [None]:
# Save models to multiple locations for permanent storage

# 1. Save locally first
cnn_lstm.save_model('cnn_lstm_model.h5')
print("✓ Model saved locally")

# 2. Save to Google Drive
from google.colab import drive
import shutil
import os
from datetime import datetime

# Mount Google Drive
drive.mount('/content/drive')

# Create a folder for your trading models
drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
os.makedirs(drive_folder, exist_ok=True)

# Save model to Google Drive with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
drive_model_path = f'{drive_folder}/cnn_lstm_model_{timestamp}.h5'
shutil.copy('cnn_lstm_model.h5', drive_model_path)
print(f"✓ Model saved to Google Drive: {drive_model_path}")

# 3. Save to GitHub (push to repository)
import subprocess

# Configure git (if not already done)
subprocess.run(['git', 'config', '--global', 'user.email', 'your-email@example.com'], check=False)
subprocess.run(['git', 'config', '--global', 'user.name', 'CatalinMoldova'], check=False)

# Create models directory in git
os.makedirs('models', exist_ok=True)

# Copy model to git directory
git_model_path = f'models/cnn_lstm_model_{timestamp}.h5'
shutil.copy('cnn_lstm_model.h5', git_model_path)

# Add, commit, and push to GitHub
subprocess.run(['git', 'add', git_model_path], check=True)
subprocess.run(['git', 'commit', '-m', f'Add trained CNN+LSTM model - {timestamp}'], check=True)
subprocess.run(['git', 'push', 'origin', 'main'], check=True)
print(f"✓ Model pushed to GitHub: {git_model_path}")

# 4. Download to local machine as backup
from google.colab import files
files.download('cnn_lstm_model.h5')
print("✓ Model downloaded to your local machine")

print("\n🎉 Model saved in 4 locations:")
print("1. Local Colab environment")
print("2. Google Drive (permanent)")
print("3. GitHub repository (permanent)")
print("4. Your local machine")


## 6. Load Saved Models


In [None]:
# Load models from different storage locations

def load_model_from_location(location_type, model_path=None):
    """
    Load a trained model from different storage locations
    
    Args:
        location_type: 'local', 'drive', 'github', or 'url'
        model_path: Path to the model file (optional)
    """
    
    if location_type == 'local':
        # Load from local Colab environment
        if model_path is None:
            model_path = 'cnn_lstm_model.h5'
        model = tf.keras.models.load_model(model_path)
        print(f"✓ Model loaded from local: {model_path}")
        
    elif location_type == 'drive':
        # Load from Google Drive
        from google.colab import drive
        drive.mount('/content/drive')
        
        if model_path is None:
            # List available models in Drive
            drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
            if os.path.exists(drive_folder):
                models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
                if models:
                    model_path = os.path.join(drive_folder, models[-1])  # Load latest
                    print(f"Available models: {models}")
                else:
                    print("No models found in Google Drive")
                    return None
            else:
                print("Trading_Strategy_ML folder not found in Google Drive")
                return None
        
        model = tf.keras.models.load_model(model_path)
        print(f"✓ Model loaded from Google Drive: {model_path}")
        
    elif location_type == 'github':
        # Load from GitHub (if model is in repository)
        if model_path is None:
            models_dir = 'models'
            if os.path.exists(models_dir):
                models = [f for f in os.listdir(models_dir) if f.endswith('.h5')]
                if models:
                    model_path = os.path.join(models_dir, models[-1])  # Load latest
                    print(f"Available models: {models}")
                else:
                    print("No models found in GitHub repository")
                    return None
            else:
                print("Models directory not found")
                return None
        
        model = tf.keras.models.load_model(model_path)
        print(f"✓ Model loaded from GitHub: {model_path}")
        
    elif location_type == 'url':
        # Load from URL (if you have a direct link)
        if model_path is None:
            print("Please provide a URL to the model file")
            return None
        
        import urllib.request
        local_path = 'downloaded_model.h5'
        urllib.request.urlretrieve(model_path, local_path)
        model = tf.keras.models.load_model(local_path)
        print(f"✓ Model loaded from URL: {model_path}")
    
    else:
        print("Invalid location_type. Use 'local', 'drive', 'github', or 'url'")
        return None
    
    return model

# Example: Load the latest model from Google Drive
print("Loading model from Google Drive...")
loaded_model = load_model_from_location('drive')

if loaded_model is not None:
    print(f"Model summary:")
    loaded_model.summary()
else:
    print("No model found. Train a model first!")


## 7. Model Management and Versioning


In [None]:
# Model management and versioning utilities

def list_saved_models():
    """List all saved models in different locations"""
    
    print("🔍 Searching for saved models...")
    
    # Check local
    if os.path.exists('cnn_lstm_model.h5'):
        print("📁 Local: cnn_lstm_model.h5")
    
    # Check Google Drive
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
        if os.path.exists(drive_folder):
            drive_models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
            if drive_models:
                print(f"☁️ Google Drive ({len(drive_models)} models):")
                for model in sorted(drive_models):
                    print(f"   - {model}")
            else:
                print("☁️ Google Drive: No models found")
        else:
            print("☁️ Google Drive: Trading_Strategy_ML folder not found")
    except Exception as e:
        print(f"☁️ Google Drive: Error accessing - {e}")
    
    # Check GitHub repository
    if os.path.exists('models'):
        git_models = [f for f in os.listdir('models') if f.endswith('.h5')]
        if git_models:
            print(f"🐙 GitHub ({len(git_models)} models):")
            for model in sorted(git_models):
                print(f"   - {model}")
        else:
            print("🐙 GitHub: No models found in models/ directory")
    else:
        print("🐙 GitHub: models/ directory not found")

def create_model_backup():
    """Create a backup of the current model with metadata"""
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create backup with metadata
    backup_info = {
        'timestamp': timestamp,
        'tensorflow_version': tf.__version__,
        'model_type': 'CNN+LSTM',
        'training_date': datetime.now().isoformat(),
        'description': 'Trading Strategy ML Model'
    }
    
    # Save metadata
    import json
    with open(f'model_metadata_{timestamp}.json', 'w') as f:
        json.dump(backup_info, f, indent=2)
    
    print(f"📋 Model metadata saved: model_metadata_{timestamp}.json")
    print(f"📊 TensorFlow version: {tf.__version__}")
    print(f"🕒 Backup timestamp: {timestamp}")

def cleanup_old_models(keep_last_n=5):
    """Clean up old models, keeping only the last N versions"""
    
    print(f"🧹 Cleaning up old models (keeping last {keep_last_n})...")
    
    # Clean Google Drive
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
        
        if os.path.exists(drive_folder):
            models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
            models.sort(reverse=True)  # Sort by name (newest first)
            
            if len(models) > keep_last_n:
                models_to_delete = models[keep_last_n:]
                for model in models_to_delete:
                    os.remove(os.path.join(drive_folder, model))
                    print(f"🗑️ Deleted old model: {model}")
                print(f"✅ Kept {keep_last_n} latest models in Google Drive")
            else:
                print(f"✅ Google Drive has {len(models)} models (≤ {keep_last_n}, no cleanup needed)")
    except Exception as e:
        print(f"❌ Error cleaning Google Drive: {e}")
    
    # Clean local models directory
    if os.path.exists('models'):
        models = [f for f in os.listdir('models') if f.endswith('.h5')]
        models.sort(reverse=True)
        
        if len(models) > keep_last_n:
            models_to_delete = models[keep_last_n:]
            for model in models_to_delete:
                os.remove(os.path.join('models', model))
                print(f"🗑️ Deleted old model: {model}")
            print(f"✅ Kept {keep_last_n} latest models locally")
        else:
            print(f"✅ Local models directory has {len(models)} models (≤ {keep_last_n}, no cleanup needed)")

# Run model management functions
print("=" * 50)
print("📊 MODEL MANAGEMENT DASHBOARD")
print("=" * 50)

list_saved_models()
print("\n" + "=" * 30)
create_model_backup()
print("\n" + "=" * 30)
cleanup_old_models(keep_last_n=3)
