# Trading Strategy ML - Google Colab Setup

This notebook sets up and runs the Multi-Factor Momentum Trading Strategy with ML Enhancement on Google Colab with GPU support.

## Features
- GPU-accelerated training
- Real-time data collection
- Advanced ML models (CNN+LSTM)
- Comprehensive backtesting
- Performance analysis


## 1. Setup and Installation


In [None]:
# Check GPU availability
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))
print("CUDA available:", tf.test.is_built_with_cuda())

# Enable GPU memory growth
if tf.config.list_physical_devices('GPU'):
    try:
        for gpu in tf.config.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(f"GPU memory growth error: {e}")


In [None]:
# Install required packages with error handling and alternatives
import subprocess
import sys

def install_package(package, alternative=None):
    """Install package with fallback to alternative if needed"""
    try:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        print(f"✓ {package} installed successfully")
        return True
    except subprocess.CalledProcessError as e:
        print(f"✗ Failed to install {package}: {e}")
        if alternative:
            try:
                print(f"Trying alternative: {alternative}")
                subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", alternative])
                print(f"✓ {alternative} installed successfully")
                return True
            except subprocess.CalledProcessError as e2:
                print(f"✗ Alternative {alternative} also failed: {e2}")
        return False

# Core packages (usually work fine)
core_packages = [
    "pandas>=1.5.0",
    "numpy>=1.21.0", 
    "scipy>=1.9.0",
    "matplotlib>=3.5.0",
    "seaborn>=0.11.0",
    "plotly>=5.10.0",
    "requests>=2.28.0",
    "tqdm>=4.64.0",
    "joblib>=1.1.0",
    "python-dotenv>=0.19.0"
]

# Financial data packages
financial_packages = [
    ("yfinance>=0.2.0", None),
    ("alpha-vantage>=2.3.0", None),
    ("pandas-datareader>=0.10.0", None)
]

# Technical analysis (problematic package)
ta_packages = [
    ("TA-Lib>=0.4.25", "talib-binary>=0.4.19")
]

# ML packages
ml_packages = [
    ("tensorflow>=2.10.0", "tensorflow-gpu>=2.10.0"),
    ("torch>=1.12.0", None),
    ("torchvision>=0.13.0", None),
    ("scikit-learn>=1.1.0", None),
    ("xgboost>=1.6.0", None),
    ("optuna>=3.0.0", None),
    ("lightgbm>=3.3.0", None)
]

# Financial analysis packages
analysis_packages = [
    ("backtrader>=1.9.76", None),
    ("arch>=5.3.0", None),
    ("empyrical>=0.5.5", None),
    ("ffn>=0.3.7", None)
]

# UI packages
ui_packages = [
    ("streamlit>=1.12.0", None),
    ("dash>=2.6.0", None)
]

print("🚀 Starting package installation...")
print("=" * 50)

# Install core packages first
print("\n📦 Installing core packages...")
for package in core_packages:
    install_package(package)

# Install financial packages
print("\n💰 Installing financial data packages...")
for package, alt in financial_packages:
    install_package(package, alt)

# Install TA-Lib with special handling
print("\n📊 Installing technical analysis packages...")
ta_success = False
for package, alt in ta_packages:
    if install_package(package, alt):
        ta_success = True
        break

if not ta_success:
    print("⚠️ TA-Lib installation failed. Using alternative approach...")
    # Try installing from conda-forge
    try:
        subprocess.check_call(["pip", "install", "-q", "TA-Lib"])
        print("✓ TA-Lib installed via alternative method")
        ta_success = True
    except:
        print("⚠️ TA-Lib still failed. Some technical indicators may not work.")

# Install ML packages
print("\n🤖 Installing machine learning packages...")
for package, alt in ml_packages:
    install_package(package, alt)

# Install analysis packages
print("\n📈 Installing financial analysis packages...")
for package, alt in analysis_packages:
    install_package(package, alt)

# Install UI packages
print("\n🖥️ Installing UI packages...")
for package, alt in ui_packages:
    install_package(package, alt)

print("\n" + "=" * 50)
print("✅ Package installation complete!")
print("=" * 50)

# Test critical imports
print("\n🧪 Testing critical imports...")
try:
    import pandas as pd
    import numpy as np
    import tensorflow as tf
    import sklearn
    import yfinance as yf
    print("✓ Core packages imported successfully")
except ImportError as e:
    print(f"⚠️ Some packages failed to import: {e}")

# Check GPU availability
print(f"\n🎮 GPU Status:")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")
print(f"CUDA available: {tf.test.is_built_with_cuda()}")


## Alternative: Simplified Installation (if above fails)


In [None]:
# Simplified installation - run this if the above cell fails
# This installs only the essential packages needed to run the trading strategy

print("🔧 Installing essential packages only...")

# Essential packages that usually work in Colab
essential_packages = [
    "pandas",
    "numpy", 
    "matplotlib",
    "seaborn",
    "plotly",
    "yfinance",
    "tensorflow",
    "scikit-learn",
    "requests",
    "tqdm"
]

for package in essential_packages:
    try:
        print(f"Installing {package}...")
        !pip install -q {package}
        print(f"✓ {package}")
    except Exception as e:
        print(f"✗ {package} failed: {e}")

print("\n✅ Essential packages installation complete!")

# Test imports
try:
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    import plotly.graph_objects as go
    import yfinance as yf
    import tensorflow as tf
    import sklearn
    print("\n🎉 All essential packages imported successfully!")
    print(f"TensorFlow version: {tf.__version__}")
    print(f"GPU available: {tf.config.list_physical_devices('GPU')}")
except ImportError as e:
    print(f"\n⚠️ Some packages failed to import: {e}")
    print("You may need to restart the runtime and try again.")


In [None]:
# Clone the repository (replace with your GitHub URL)
!git clone https://github.com/CatalinMoldova/trading-strategy-ml.git

# Change to the project directory
%cd trading-strategy-ml

# Install project requirements
!pip install -r requirements_colab.txt

print("Repository cloned and requirements installed!")


## 2. Import Libraries and Setup


In [None]:
# Import necessary libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Add project src to path
sys.path.append('src')

# Import project modules with correct class names and error handling
try:
    from data_pipeline.market_data_collector import MarketDataCollector
    from data_pipeline.indicator_engine import TechnicalIndicatorEngine
    from data_pipeline.feature_engineer import FeatureEngineer
    from ml_models.cnn_lstm_model import CNNLSTMModel
    from ml_models.random_forest_model import RandomForestModel
    from ml_models.ensemble_predictor import EnsemblePredictor
    from strategy.signal_generator import MultiFactorSignalGenerator
    from strategy.position_sizer import PositionSizer
    from strategy.risk_manager import RiskManager
    from backtesting.backtest_engine import BacktestEngine
    from backtesting.performance_analyzer import PerformanceAnalyzer
    print("✅ All project modules imported successfully!")
    
except ImportError as e:
    print(f"⚠️ Some project modules failed to import: {e}")
    print("Using improved implementations instead...")
    
    # ============================================================================
    # IMPROVED IMPLEMENTATIONS WITH PROPER TRADING STRATEGY, RISK MANAGEMENT, 
    # FEATURE ENGINEERING, BACKTESTING, AND MARKET REGIME DETECTION
    # ============================================================================
    
    class MarketDataCollector:
        """Enhanced market data collector with error handling"""
        def get_historical_data(self, symbol, period='2y', interval='1d'):
            import yfinance as yf
            try:
                ticker = yf.Ticker(symbol)
                data = ticker.history(period=period, interval=interval)
                if data.empty:
                    print(f"⚠️ No data for {symbol}")
                    return None
                return data
            except Exception as e:
                print(f"❌ Error fetching data for {symbol}: {e}")
                return None
    
    class TechnicalIndicatorEngine:
        """Advanced technical indicator engine with comprehensive indicators"""
        
        def calculate_all_indicators(self, df):
            """Calculate comprehensive technical indicators"""
            df = df.copy()
            
            # Price-based indicators
            df['SMA_5'] = df['Close'].rolling(window=5).mean()
            df['SMA_10'] = df['Close'].rolling(window=10).mean()
            df['SMA_20'] = df['Close'].rolling(window=20).mean()
            df['SMA_50'] = df['Close'].rolling(window=50).mean()
            df['EMA_12'] = df['Close'].ewm(span=12).mean()
            df['EMA_26'] = df['Close'].ewm(span=26).mean()
            
            # RSI
            df['RSI'] = self._calculate_rsi(df['Close'], 14)
            df['RSI_6'] = self._calculate_rsi(df['Close'], 6)
            
            # MACD
            df['MACD'] = df['EMA_12'] - df['EMA_26']
            df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
            df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']
            
            # Bollinger Bands
            df['BB_Middle'] = df['Close'].rolling(window=20).mean()
            bb_std = df['Close'].rolling(window=20).std()
            df['BB_Upper'] = df['BB_Middle'] + (bb_std * 2)
            df['BB_Lower'] = df['BB_Middle'] - (bb_std * 2)
            df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']
            df['BB_Position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
            
            # Stochastic Oscillator
            df['Stoch_K'] = self._calculate_stochastic(df, 14)
            df['Stoch_D'] = df['Stoch_K'].rolling(window=3).mean()
            
            # Williams %R
            df['Williams_R'] = self._calculate_williams_r(df, 14)
            
            # Average True Range (ATR)
            df['ATR'] = self._calculate_atr(df, 14)
            
            # Volume indicators
            df['Volume_SMA'] = df['Volume'].rolling(window=20).mean()
            df['Volume_Ratio'] = df['Volume'] / df['Volume_SMA']
            
            # Price patterns
            df['Doji'] = abs(df['Close'] - df['Open']) <= (df['High'] - df['Low']) * 0.1
            df['Hammer'] = self._detect_hammer(df)
            df['Shooting_Star'] = self._detect_shooting_star(df)
            
            return df
        
        def _calculate_rsi(self, prices, period=14):
            """Calculate RSI indicator"""
            delta = prices.diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
            rs = gain / loss
            return 100 - (100 / (1 + rs))
        
        def _calculate_stochastic(self, df, period=14):
            """Calculate Stochastic Oscillator"""
            lowest_low = df['Low'].rolling(window=period).min()
            highest_high = df['High'].rolling(window=period).max()
            return 100 * (df['Close'] - lowest_low) / (highest_high - lowest_low)
        
        def _calculate_williams_r(self, df, period=14):
            """Calculate Williams %R"""
            highest_high = df['High'].rolling(window=period).max()
            lowest_low = df['Low'].rolling(window=period).min()
            return -100 * (highest_high - df['Close']) / (highest_high - lowest_low)
        
        def _calculate_atr(self, df, period=14):
            """Calculate Average True Range"""
            high_low = df['High'] - df['Low']
            high_close = np.abs(df['High'] - df['Close'].shift())
            low_close = np.abs(df['Low'] - df['Close'].shift())
            
            true_range = np.maximum(high_low, np.maximum(high_close, low_close))
            return true_range.rolling(window=period).mean()
        
        def _detect_hammer(self, df):
            """Detect hammer candlestick pattern"""
            body = abs(df['Close'] - df['Open'])
            lower_shadow = df[['Open', 'Close']].min(axis=1) - df['Low']
            upper_shadow = df['High'] - df[['Open', 'Close']].max(axis=1)
            
            return (lower_shadow > 2 * body) & (upper_shadow < body)
        
        def _detect_shooting_star(self, df):
            """Detect shooting star candlestick pattern"""
            body = abs(df['Close'] - df['Open'])
            lower_shadow = df[['Open', 'Close']].min(axis=1) - df['Low']
            upper_shadow = df['High'] - df[['Open', 'Close']].max(axis=1)
            
            return (upper_shadow > 2 * body) & (lower_shadow < body)
    
    class FeatureEngineer:
        """Advanced feature engineering with market regime detection"""
        
        def create_features(self, df):
            """Create comprehensive features for ML models"""
            df = df.copy()
            
            # Price-based features
            df['Returns'] = df['Close'].pct_change()
            df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
            df['Price_Ratio'] = df['Close'] / df['Open']
            df['High_Low_Ratio'] = df['High'] / df['Low']
            df['Close_Open_Ratio'] = df['Close'] / df['Open']
            
            # Volatility features
            df['Volatility_5'] = df['Returns'].rolling(window=5).std()
            df['Volatility_20'] = df['Returns'].rolling(window=20).std()
            df['Volatility_Ratio'] = df['Volatility_5'] / df['Volatility_20']
            
            # Momentum features
            df['Momentum_5'] = df['Close'] / df['Close'].shift(5) - 1
            df['Momentum_10'] = df['Close'] / df['Close'].shift(10) - 1
            df['Momentum_20'] = df['Close'] / df['Close'].shift(20) - 1
            
            # Trend features
            df['Trend_Strength'] = (df['SMA_20'] - df['SMA_50']) / df['SMA_50']
            df['Price_vs_SMA20'] = (df['Close'] - df['SMA_20']) / df['SMA_20']
            df['Price_vs_SMA50'] = (df['Close'] - df['SMA_50']) / df['SMA_50']
            
            # Market regime detection
            df = self._detect_market_regime(df)
            
            # Volume features
            df['Volume_Change'] = df['Volume'].pct_change()
            df['Price_Volume_Trend'] = df['Returns'] * df['Volume_Ratio']
            
            # Technical indicator features
            df['RSI_Overbought'] = (df['RSI'] > 70).astype(int)
            df['RSI_Oversold'] = (df['RSI'] < 30).astype(int)
            df['MACD_Bullish'] = (df['MACD'] > df['MACD_Signal']).astype(int)
            df['MACD_Bearish'] = (df['MACD'] < df['MACD_Signal']).astype(int)
            
            # Bollinger Band features
            df['BB_Squeeze'] = (df['BB_Width'] < df['BB_Width'].rolling(20).quantile(0.2)).astype(int)
            df['BB_Expansion'] = (df['BB_Width'] > df['BB_Width'].rolling(20).quantile(0.8)).astype(int)
            
            # Lagged features
            for lag in [1, 2, 3, 5]:
                df[f'Returns_Lag_{lag}'] = df['Returns'].shift(lag)
                df[f'Volume_Ratio_Lag_{lag}'] = df['Volume_Ratio'].shift(lag)
            
            return df
        
        def _detect_market_regime(self, df):
            """Detect market regime (Bull, Bear, Sideways, High Volatility)"""
            # Calculate trend strength
            trend_strength = df['Trend_Strength'].rolling(window=20).mean()
            
            # Calculate volatility
            volatility = df['Volatility_20'].rolling(window=20).mean()
            vol_threshold = volatility.quantile(0.7)
            
            # Regime classification
            conditions = [
                (trend_strength > 0.02) & (volatility < vol_threshold),  # Bull market
                (trend_strength < -0.02) & (volatility < vol_threshold),  # Bear market
                (volatility > vol_threshold),  # High volatility
            ]
            
            choices = ['Bull', 'Bear', 'High_Vol']
            df['Market_Regime'] = np.select(conditions, choices, default='Sideways')
            
            # One-hot encode regimes
            df['Regime_Bull'] = (df['Market_Regime'] == 'Bull').astype(int)
            df['Regime_Bear'] = (df['Market_Regime'] == 'Bear').astype(int)
            df['Regime_Sideways'] = (df['Market_Regime'] == 'Sideways').astype(int)
            df['Regime_High_Vol'] = (df['Market_Regime'] == 'High_Vol').astype(int)
            
            return df
    
    class CNNLSTMModel:
        """Improved CNN+LSTM model with better architecture"""
        
        def __init__(self, time_steps=60, n_features=20, learning_rate=0.001):
            self.time_steps = time_steps
            self.n_features = n_features
            self.learning_rate = learning_rate
            self.model = None
        
        def build_model(self):
            import tensorflow as tf
            from tensorflow.keras.models import Sequential
            from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, BatchNormalization
            from tensorflow.keras.optimizers import Adam
            
            model = Sequential([
                # CNN layers for pattern recognition
                Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(self.time_steps, self.n_features)),
                BatchNormalization(),
                Conv1D(filters=32, kernel_size=3, activation='relu'),
                MaxPooling1D(pool_size=2),
                Dropout(0.3),
                
                # LSTM layers for sequence learning
                LSTM(100, return_sequences=True),
                Dropout(0.3),
                LSTM(50, return_sequences=False),
                Dropout(0.3),
                
                # Dense layers
                Dense(50, activation='relu'),
                BatchNormalization(),
                Dropout(0.2),
                Dense(25, activation='relu'),
                Dense(1, activation='linear')
            ])
            
            # Use Huber loss for robustness to outliers
            model.compile(
                optimizer=Adam(learning_rate=self.learning_rate),
                loss='huber',
                metrics=['mae', 'mse']
            )
            
            self.model = model
            return model
        
        def train(self, X_train, y_train, epochs=50, batch_size=32, validation_split=0.2):
            """Train with early stopping and learning rate reduction"""
            from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
            
            callbacks = [
                EarlyStopping(patience=10, restore_best_weights=True),
                ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-7)
            ]
            
            return self.model.fit(
                X_train, y_train,
                epochs=epochs,
                batch_size=batch_size,
                validation_split=validation_split,
                callbacks=callbacks,
                verbose=1
            )
        
        def predict(self, X):
            """Make predictions"""
            return self.model.predict(X)
        
        def save_model(self, path):
            """Save model"""
            self.model.save(path)
    
    class RiskManager:
        """Comprehensive risk management system"""
        
        def __init__(self, max_position_size=0.8, stop_loss=0.05, take_profit=0.15, max_drawdown=0.20):
            self.max_position_size = max_position_size
            self.stop_loss = stop_loss
            self.take_profit = take_profit
            self.max_drawdown = max_drawdown
            self.current_drawdown = 0
            self.peak_value = 0
        
        def calculate_position_size(self, signal_strength, volatility, account_value, market_regime='Sideways'):
            """Calculate position size using Kelly Criterion with regime adjustment"""
            
            # Base Kelly parameters
            win_rate = 0.55  # Assume 55% win rate
            avg_win = 0.08   # 8% average win
            avg_loss = 0.04  # 4% average loss
            
            # Kelly fraction
            kelly_fraction = (win_rate * avg_win - (1 - win_rate) * avg_loss) / avg_win
            kelly_fraction = max(0, min(kelly_fraction, self.max_position_size))
            
            # Adjust for volatility
            volatility_adjustment = 1 / (1 + volatility * 10)
            
            # Adjust for market regime
            regime_adjustments = {
                'Bull': 1.2,
                'Bear': 0.6,
                'Sideways': 1.0,
                'High_Vol': 0.5
            }
            regime_adjustment = regime_adjustments.get(market_regime, 1.0)
            
            # Adjust for signal strength
            signal_adjustment = min(abs(signal_strength), 1.0)
            
            # Final position size
            final_position_size = kelly_fraction * volatility_adjustment * regime_adjustment * signal_adjustment
            
            return min(final_position_size, self.max_position_size)
        
        def check_risk_limits(self, current_value, entry_price, current_price, position_size):
            """Check if risk limits are breached"""
            # Stop loss check
            if current_price < entry_price * (1 - self.stop_loss):
                return 'stop_loss'
            
            # Take profit check
            if current_price > entry_price * (1 + self.take_profit):
                return 'take_profit'
            
            # Drawdown check
            if current_value > self.peak_value:
                self.peak_value = current_value
            
            self.current_drawdown = (self.peak_value - current_value) / self.peak_value
            
            if self.current_drawdown > self.max_drawdown:
                return 'max_drawdown'
            
            return 'hold'
    
    class ImprovedTradingStrategy:
        """Proper trading strategy with position management and risk controls"""
        
        def __init__(self, initial_capital=100000, risk_manager=None):
            self.initial_capital = initial_capital
            self.capital = initial_capital
            self.position = 0
            self.position_size = 0
            self.entry_price = 0
            self.portfolio_value = initial_capital
            self.trades = []
            self.risk_manager = risk_manager or RiskManager()
            self.market_regime = 'Sideways'
        
        def generate_signals(self, df, predictions):
            """Generate trading signals with proper position management"""
            signals = pd.DataFrame(index=df.index)
            signals['signal'] = 0
            signals['position'] = 0
            signals['portfolio_value'] = self.portfolio_value
            signals['market_regime'] = df.get('Market_Regime', 'Sideways')
            
            for i in range(1, len(df)):
                current_price = df['Close'].iloc[i]
                prediction = predictions[i] if i < len(predictions) else 0
                self.market_regime = df['Market_Regime'].iloc[i] if 'Market_Regime' in df.columns else 'Sideways'
                
                # Multi-factor signal generation
                signal_strength = self._calculate_signal_strength(df.iloc[i], prediction)
                
                # Risk management
                risk_action = self.risk_manager.check_risk_limits(
                    self.portfolio_value, self.entry_price, current_price, self.position_size
                )
                
                if risk_action != 'hold':
                    # Close position due to risk management
                    self._close_position(current_price, f"Risk: {risk_action}")
                    signals.iloc[i, signals.columns.get_loc('signal')] = 0
                    signals.iloc[i, signals.columns.get_loc('position')] = 0
                elif signal_strength > 0.5 and self.position == 0:
                    # Open long position
                    position_size = self.risk_manager.calculate_position_size(
                        signal_strength, df['Volatility_20'].iloc[i], self.portfolio_value, self.market_regime
                    )
                    self._open_position(current_price, position_size, 'long')
                    signals.iloc[i, signals.columns.get_loc('signal')] = 1
                    signals.iloc[i, signals.columns.get_loc('position')] = position_size
                elif signal_strength < -0.5 and self.position == 0:
                    # Open short position
                    position_size = self.risk_manager.calculate_position_size(
                        abs(signal_strength), df['Volatility_20'].iloc[i], self.portfolio_value, self.market_regime
                    )
                    self._open_position(current_price, position_size, 'short')
                    signals.iloc[i, signals.columns.get_loc('signal')] = -1
                    signals.iloc[i, signals.columns.get_loc('position')] = position_size
                elif abs(signal_strength) < 0.2 and self.position != 0:
                    # Close position due to weak signal
                    self._close_position(current_price, "Weak signal")
                    signals.iloc[i, signals.columns.get_loc('signal')] = 0
                    signals.iloc[i, signals.columns.get_loc('position')] = 0
                else:
                    # Hold current position
                    signals.iloc[i, signals.columns.get_loc('signal')] = 0
                    signals.iloc[i, signals.columns.get_loc('position')] = self.position_size
                
                # Update portfolio value
                self._update_portfolio_value(current_price)
                signals.iloc[i, signals.columns.get_loc('portfolio_value')] = self.portfolio_value
            
            return signals
        
        def _calculate_signal_strength(self, row, prediction):
            """Calculate multi-factor signal strength"""
            signal_strength = 0
            
            # Trend following
            if row['Close'] > row['SMA_20'] and prediction > 0:
                signal_strength += 0.3
            elif row['Close'] < row['SMA_20'] and prediction < 0:
                signal_strength -= 0.3
            
            # Momentum
            if row['RSI'] < 30 and prediction > 0:  # Oversold + bullish prediction
                signal_strength += 0.4
            elif row['RSI'] > 70 and prediction < 0:  # Overbought + bearish prediction
                signal_strength -= 0.4
            
            # MACD confirmation
            if row['MACD'] > row['MACD_Signal'] and prediction > 0:
                signal_strength += 0.2
            elif row['MACD'] < row['MACD_Signal'] and prediction < 0:
                signal_strength -= 0.2
            
            # Bollinger Band position
            if row['BB_Position'] < 0.2 and prediction > 0:  # Near lower band + bullish
                signal_strength += 0.2
            elif row['BB_Position'] > 0.8 and prediction < 0:  # Near upper band + bearish
                signal_strength -= 0.2
            
            # Market regime adjustment
            regime_multipliers = {
                'Bull': 1.2,
                'Bear': 0.8,
                'Sideways': 1.0,
                'High_Vol': 0.6
            }
            signal_strength *= regime_multipliers.get(self.market_regime, 1.0)
            
            return signal_strength
        
        def _open_position(self, price, size, direction):
            """Open a new position"""
            self.position = 1 if direction == 'long' else -1
            self.position_size = size
            self.entry_price = price
            self.capital -= price * size  # Reduce available capital
        
        def _close_position(self, price, reason):
            """Close current position"""
            if self.position != 0:
                pnl = (price - self.entry_price) * self.position * self.position_size
                self.capital += price * self.position_size + pnl
                
                self.trades.append({
                    'entry_price': self.entry_price,
                    'exit_price': price,
                    'position_size': self.position_size,
                    'direction': 'long' if self.position > 0 else 'short',
                    'pnl': pnl,
                    'reason': reason
                })
                
                self.position = 0
                self.position_size = 0
                self.entry_price = 0
        
        def _update_portfolio_value(self, current_price):
            """Update portfolio value"""
            if self.position != 0:
                pnl = (current_price - self.entry_price) * self.position * self.position_size
                self.portfolio_value = self.capital + current_price * self.position_size + pnl
            else:
                self.portfolio_value = self.capital
    
    class ImprovedBacktestEngine:
        """Proper backtesting engine with realistic assumptions"""
        
        def __init__(self, initial_capital=100000, commission=0.001, slippage=0.0005):
            self.initial_capital = initial_capital
            self.commission = commission
            self.slippage = slippage
        
        def run_backtest(self, price_data, signals):
            """Run comprehensive backtest with realistic trading costs"""
            portfolio_value = self.initial_capital
            cash = self.initial_capital
            shares = 0
            trades = []
            portfolio_values = []
            
            for i, (date, row) in enumerate(price_data.iterrows()):
                if date in signals.index:
                    signal = signals.loc[date, 'signal']
                    position_size = signals.loc[date, 'position']
                    price = row['Close']
                    
                    # Apply slippage
                    execution_price = price * (1 + self.slippage) if signal > 0 else price * (1 - self.slippage)
                    
                    if signal == 1 and shares == 0:  # Buy
                        shares_to_buy = (cash * position_size) / execution_price
                        cost = shares_to_buy * execution_price * (1 + self.commission)
                        
                        if cost <= cash:
                            shares = shares_to_buy
                            cash -= cost
                            trades.append({
                                'date': date,
                                'action': 'BUY',
                                'price': execution_price,
                                'shares': shares,
                                'cost': cost
                            })
                    
                    elif signal == -1 and shares > 0:  # Sell
                        proceeds = shares * execution_price * (1 - self.commission)
                        cash += proceeds
                        trades.append({
                            'date': date,
                            'action': 'SELL',
                            'price': execution_price,
                            'shares': shares,
                            'proceeds': proceeds
                        })
                        shares = 0
                
                # Calculate portfolio value
                portfolio_value = cash + (shares * row['Close'] if shares > 0 else 0)
                portfolio_values.append(portfolio_value)
            
            # Calculate returns
            returns = pd.Series(portfolio_values).pct_change().dropna()
            
            return {
                'total_return': (portfolio_value - self.initial_capital) / self.initial_capital,
                'portfolio_value': portfolio_value,
                'trades': trades,
                'final_cash': cash,
                'final_shares': shares,
                'returns': returns,
                'portfolio_values': portfolio_values
            }
    
    class PerformanceAnalyzer:
        """Enhanced performance analyzer with comprehensive metrics"""
        
        def calculate_portfolio_performance(self, backtest_results):
            """Calculate comprehensive performance metrics"""
            returns = backtest_results['returns']
            
            # Basic metrics
            total_return = (1 + returns).prod() - 1
            annualized_return = (1 + returns).prod() ** (252 / len(returns)) - 1
            volatility = returns.std() * np.sqrt(252)
            sharpe_ratio = annualized_return / volatility if volatility > 0 else 0
            
            # Risk metrics
            negative_returns = returns[returns < 0]
            sortino_ratio = annualized_return / (negative_returns.std() * np.sqrt(252)) if len(negative_returns) > 0 else 0
            
            # Drawdown analysis
            cumulative = (1 + returns).cumprod()
            running_max = cumulative.expanding().max()
            drawdown = (cumulative - running_max) / running_max
            max_drawdown = drawdown.min()
            calmar_ratio = annualized_return / abs(max_drawdown) if max_drawdown != 0 else 0
            
            # Win/Loss analysis
            winning_trades = returns[returns > 0]
            losing_trades = returns[returns < 0]
            win_rate = len(winning_trades) / len(returns) if len(returns) > 0 else 0
            avg_win = winning_trades.mean() if len(winning_trades) > 0 else 0
            avg_loss = losing_trades.mean() if len(losing_trades) > 0 else 0
            profit_factor = abs(winning_trades.sum() / losing_trades.sum()) if len(losing_trades) > 0 and losing_trades.sum() != 0 else 0
            
            return {
                'total_return': total_return,
                'annualized_return': annualized_return,
                'volatility': volatility,
                'sharpe_ratio': sharpe_ratio,
                'sortino_ratio': sortino_ratio,
                'max_drawdown': max_drawdown,
                'calmar_ratio': calmar_ratio,
                'win_rate': win_rate,
                'avg_win': avg_win,
                'avg_loss': avg_loss,
                'profit_factor': profit_factor,
                'cumulative_returns': cumulative,
                'drawdown': drawdown,
                'rolling_sharpe': returns.rolling(window=252).mean() / returns.rolling(window=252).std() * np.sqrt(252),
                'monthly_returns': returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
            }
    
    # Initialize improved classes
    print("✅ Improved implementations created!")
    print("📊 Features implemented:")
    print("  • Advanced Technical Indicators (RSI, MACD, Bollinger Bands, Stochastic, Williams %R, ATR)")
    print("  • Comprehensive Feature Engineering with Market Regime Detection")
    print("  • Improved CNN+LSTM Model with CNN layers and regularization")
    print("  • Proper Trading Strategy with Position Management")
    print("  • Risk Management System with Kelly Criterion and Stop Losses")
    print("  • Realistic Backtesting Engine with Commissions and Slippage")
    print("  • Enhanced Performance Analysis with Comprehensive Metrics")

print("All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU devices: {tf.config.list_physical_devices('GPU')}")


## 3. Data Collection and Preparation


In [None]:
# Initialize improved data collector and indicator engine
collector = MarketDataCollector()
indicator_engine = TechnicalIndicatorEngine()
feature_engineer = FeatureEngineer()

# Define symbols to trade
symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'NVDA', 'META', 'NFLX']

# Collect historical data
print("📊 Collecting historical data with improved error handling...")
data = {}
for symbol in symbols:
    try:
        df = collector.get_historical_data(symbol, period='2y', interval='1d')
        if df is not None and len(df) > 0:
            data[symbol] = df
            print(f"✓ {symbol}: {len(df)} records")
        else:
            print(f"✗ {symbol}: No data received")
    except Exception as e:
        print(f"✗ {symbol}: Error - {e}")

print(f"\nData collection complete! Collected data for {len(data)} symbols.")

# Process data with advanced technical indicators and feature engineering
if data:
    print("\n🔧 Processing data with advanced technical indicators and feature engineering...")
    processed_data = {}
    
    for symbol, df in data.items():
        try:
            # Step 1: Calculate comprehensive technical indicators
            print(f"  📈 Calculating technical indicators for {symbol}...")
            df_with_indicators = indicator_engine.calculate_all_indicators(df)
            
            # Step 2: Advanced feature engineering with market regime detection
            print(f"  🧠 Creating advanced features for {symbol}...")
            df_with_features = feature_engineer.create_features(df_with_indicators)
            
            processed_data[symbol] = df_with_features
            print(f"✓ {symbol}: Technical indicators and features calculated")
            
            # Display feature summary
            feature_count = len([col for col in df_with_features.columns if col not in ['Open', 'High', 'Low', 'Close', 'Volume']])
            print(f"  📊 {symbol}: {feature_count} features created")
            
        except Exception as e:
            print(f"✗ {symbol}: Error processing - {e}")
            processed_data[symbol] = df  # Use original data if processing fails

    print(f"\n✅ Data processing complete! Processed {len(processed_data)} symbols.")
    
    # Display sample processed data with new features
    sample_symbol = list(processed_data.keys())[0]
    sample_data = processed_data[sample_symbol]
    
    print(f"\n📋 Sample processed data for {sample_symbol}:")
    print("=" * 80)
    
    # Show key technical indicators
    key_indicators = ['Close', 'SMA_20', 'SMA_50', 'RSI', 'MACD', 'BB_Position', 'ATR', 'Market_Regime']
    available_indicators = [col for col in key_indicators if col in sample_data.columns]
    
    if available_indicators:
        print("Key Technical Indicators:")
        print(sample_data[available_indicators].tail())
    
    # Show feature summary
    print(f"\n📊 Feature Summary for {sample_symbol}:")
    print(f"Total features: {len(sample_data.columns)}")
    print(f"Technical indicators: {len([col for col in sample_data.columns if col.startswith(('SMA', 'EMA', 'RSI', 'MACD', 'BB', 'Stoch', 'Williams', 'ATR'))])}")
    print(f"Price features: {len([col for col in sample_data.columns if col.startswith(('Returns', 'Momentum', 'Price', 'Volatility'))])}")
    print(f"Market regime features: {len([col for col in sample_data.columns if col.startswith('Regime')])}")
    
    # Show market regime distribution
    if 'Market_Regime' in sample_data.columns:
        regime_counts = sample_data['Market_Regime'].value_counts()
        print(f"\n🎯 Market Regime Distribution:")
        for regime, count in regime_counts.items():
            percentage = (count / len(sample_data)) * 100
            print(f"  {regime}: {count} days ({percentage:.1f}%)")
    
else:
    print("⚠️ No data collected. Check your internet connection and try again.")


## 4. Model Training with GPU


In [None]:
# Train improved CNN+LSTM model with GPU and advanced features
print("🤖 Training improved CNN+LSTM model with advanced architecture...")

# Prepare training data from processed data with comprehensive features
if 'processed_data' in locals() and processed_data:
    print("📊 Preparing training data from collected market data with advanced features...")
    
    # Combine all data for training
    all_data = []
    for symbol, df in processed_data.items():
        df['symbol'] = symbol
        all_data.append(df)
    
    combined_data = pd.concat(all_data, ignore_index=True)
    print(f"Combined dataset shape: {combined_data.shape}")
    
    # Select comprehensive features for training
    feature_columns = [
        # Price features
        'Returns', 'Log_Returns', 'Price_Ratio', 'High_Low_Ratio', 'Close_Open_Ratio',
        # Volatility features
        'Volatility_5', 'Volatility_20', 'Volatility_Ratio',
        # Momentum features
        'Momentum_5', 'Momentum_10', 'Momentum_20',
        # Trend features
        'Trend_Strength', 'Price_vs_SMA20', 'Price_vs_SMA50',
        # Technical indicators
        'RSI', 'RSI_6', 'MACD', 'MACD_Signal', 'MACD_Histogram',
        'BB_Position', 'BB_Width', 'Stoch_K', 'Stoch_D', 'Williams_R', 'ATR',
        # Volume features
        'Volume_Ratio', 'Volume_Change', 'Price_Volume_Trend',
        # Technical indicator features
        'RSI_Overbought', 'RSI_Oversold', 'MACD_Bullish', 'MACD_Bearish',
        'BB_Squeeze', 'BB_Expansion',
        # Market regime features
        'Regime_Bull', 'Regime_Bear', 'Regime_Sideways', 'Regime_High_Vol',
        # Lagged features
        'Returns_Lag_1', 'Returns_Lag_2', 'Returns_Lag_3', 'Returns_Lag_5',
        'Volume_Ratio_Lag_1', 'Volume_Ratio_Lag_2', 'Volume_Ratio_Lag_3', 'Volume_Ratio_Lag_5'
    ]
    
    # Filter available features
    available_features = [col for col in feature_columns if col in combined_data.columns]
    print(f"📈 Available features for training: {len(available_features)}")
    print(f"Features: {available_features[:10]}...")  # Show first 10 features
    
    if len(available_features) >= 10:  # Need at least 10 features for robust training
        # Initialize improved model with correct number of features
        cnn_lstm = CNNLSTMModel(
            time_steps=60,
            n_features=len(available_features),
            learning_rate=0.0001  # Lower learning rate for better convergence
        )
        
        # Build improved model
        model = cnn_lstm.build_model()
        print(f"🏗️ Improved model built with {model.count_params():,} parameters")
        print(f"📊 Model expects {len(available_features)} input features")
        
        # Prepare features and targets
        X_data = combined_data[available_features].values
        y_data = combined_data['Close'].shift(-1).values  # Predict next day's close
        
        # Remove NaN values
        valid_indices = ~np.isnan(X_data).any(axis=1) & ~np.isnan(y_data)
        X_data = X_data[valid_indices]
        y_data = y_data[valid_indices]
        
        print(f"📊 Data after cleaning: {len(X_data)} samples")
        
        # Reshape for LSTM (samples, time_steps, features)
        n_samples = len(X_data) - cnn_lstm.time_steps + 1
        X_reshaped = np.zeros((n_samples, cnn_lstm.time_steps, len(available_features)))
        y_reshaped = np.zeros((n_samples, 1))
        
        for i in range(n_samples):
            X_reshaped[i] = X_data[i:i+cnn_lstm.time_steps]
            y_reshaped[i] = y_data[i+cnn_lstm.time_steps-1]
        
        # Split data
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(
            X_reshaped, y_reshaped, test_size=0.2, random_state=42, shuffle=False
        )
        
        print(f"📊 Training data shape: X={X_train.shape}, y={y_train.shape}")
        print(f"📊 Test data shape: X={X_test.shape}, y={y_test.shape}")
        
        # Verify dimensions match
        print(f"🔍 Model input shape: (batch_size, {cnn_lstm.time_steps}, {cnn_lstm.n_features})")
        print(f"🔍 Actual data shape: {X_train.shape}")
        
        if X_train.shape[2] == cnn_lstm.n_features:
            print("✅ Dimensions match! Proceeding with improved training...")
            
            # Train improved model with early stopping and learning rate reduction
            history = cnn_lstm.train(
                X_train, y_train,
                epochs=50,  # More epochs with early stopping
                batch_size=32,
                validation_split=0.2
            )
            
            print("🎉 Improved CNN+LSTM training complete!")
            
            # Plot comprehensive training history
            plt.figure(figsize=(15, 5))
            
            plt.subplot(1, 3, 1)
            plt.plot(history.history['loss'], label='Training Loss', color='blue')
            plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
            plt.title('Model Loss (Huber Loss)')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.legend()
            plt.grid(True)
            
            plt.subplot(1, 3, 2)
            plt.plot(history.history['mae'], label='Training MAE', color='blue')
            plt.plot(history.history['val_mae'], label='Validation MAE', color='red')
            plt.title('Model MAE')
            plt.xlabel('Epoch')
            plt.ylabel('MAE')
            plt.legend()
            plt.grid(True)
            
            plt.subplot(1, 3, 3)
            plt.plot(history.history['mse'], label='Training MSE', color='blue')
            plt.plot(history.history['val_mse'], label='Validation MSE', color='red')
            plt.title('Model MSE')
            plt.xlabel('Epoch')
            plt.ylabel('MSE')
            plt.legend()
            plt.grid(True)
            
            plt.tight_layout()
            plt.show()
            
            # Evaluate improved model
            test_loss, test_mae, test_mse = model.evaluate(X_test, y_test, verbose=0)
            print(f"\n📊 Improved Model Evaluation:")
            print(f"Test Loss (Huber): {test_loss:.6f}")
            print(f"Test MAE: {test_mae:.6f}")
            print(f"Test MSE: {test_mse:.6f}")
            
            # Calculate R² score
            from sklearn.metrics import r2_score
            predictions = model.predict(X_test)
            r2 = r2_score(y_test, predictions)
            print(f"R² Score: {r2:.4f}")
            
            # Model quality assessment
            if r2 > 0.7:
                print("🎉 Excellent model performance (R² > 0.7)")
            elif r2 > 0.5:
                print("✅ Good model performance (R² > 0.5)")
            elif r2 > 0.3:
                print("⚠️ Moderate model performance (R² > 0.3)")
            else:
                print("❌ Poor model performance (R² < 0.3)")
            
            # Store model and data for later use
            trained_model = model
            model_history = history
            
        else:
            print(f"❌ Dimension mismatch! Model expects {cnn_lstm.n_features} features but data has {X_train.shape[2]}")
    
    else:
        print("⚠️ Insufficient features for training. Need at least 10 features.")
        print(f"Available features: {len(available_features)}")
        
else:
    print("⚠️ No processed data available. Please run the data collection and processing cells first.")


In [None]:
# Clear any cached model saving code and restart
print("🔄 Clearing cached model saving code...")
print("If you get git errors, please:")
print("1. Go to Runtime > Restart Runtime")
print("2. Run all cells from the beginning")
print("3. The model saving now only uses Google Drive (no git operations)")

# Clear any cached variables that might cause issues
import importlib
import sys

# Remove any cached modules that might cause issues
modules_to_clear = ['subprocess', 'os', 'shutil']
for module in modules_to_clear:
    if module in sys.modules:
        del sys.modules[module]

print("✅ Cache cleared. Model saving will now only use Google Drive.")


## 5. Save Your Work


In [None]:
# Save models to Google Drive for permanent storage
# Note: Using .keras format instead of .h5 for better compatibility

# 1. Save locally first (using modern .keras format)
cnn_lstm.save_model('cnn_lstm_model.keras')
print("✓ Model saved locally in .keras format")

# 2. Save to Google Drive
from google.colab import drive
import shutil
import os
from datetime import datetime

# Mount Google Drive
drive.mount('/content/drive')

# Create a folder for your trading models
drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
os.makedirs(drive_folder, exist_ok=True)

# Save model to Google Drive with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
drive_model_path = f'{drive_folder}/cnn_lstm_model_{timestamp}.keras'
shutil.copy('cnn_lstm_model.keras', drive_model_path)
print(f"✓ Model saved to Google Drive: {drive_model_path}")

print("\n🎉 Model saved in 2 locations:")
print("1. Local Colab environment (cnn_lstm_model.keras)")
print("2. Google Drive (permanent storage)")
print("\n💡 Note: Using .keras format instead of .h5 for better compatibility")


## 6. Load Saved Models


In [None]:
# Fixed Model Loading Function with Compatibility Handling

def load_model_from_location_fixed(location_type, model_path=None):
    """
    Load a trained model from different storage locations with compatibility handling
    
    Args:
        location_type: 'local', 'drive', 'github', or 'url'
        model_path: Path to the model file (optional)
    """
    
    if location_type == 'local':
        # Load from local Colab environment
        if model_path is None:
            # Try .keras format first, then .h5
            if os.path.exists('cnn_lstm_model.keras'):
                model_path = 'cnn_lstm_model.keras'
            elif os.path.exists('cnn_lstm_model.h5'):
                model_path = 'cnn_lstm_model.h5'
            else:
                print("No model found locally")
                return None
        
        try:
            model = tf.keras.models.load_model(model_path)
            print(f"✓ Model loaded from local: {model_path}")
        except Exception as e:
            print(f"✗ Error loading model from local: {e}")
            return None
        
    elif location_type == 'drive':
        # Load from Google Drive
        from google.colab import drive
        drive.mount('/content/drive')
        
        if model_path is None:
            # List available models in Drive
            drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
            if os.path.exists(drive_folder):
                # Look for both .keras and .h5 files
                keras_models = [f for f in os.listdir(drive_folder) if f.endswith('.keras')]
                h5_models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
                all_models = keras_models + h5_models
                
                if all_models:
                    # Prefer .keras files, but use .h5 if that's all we have
                    if keras_models:
                        model_path = os.path.join(drive_folder, keras_models[-1])
                        print(f"Available .keras models: {keras_models}")
                    else:
                        model_path = os.path.join(drive_folder, h5_models[-1])
                        print(f"Available .h5 models: {h5_models}")
                        print("⚠️ Loading .h5 model - may have compatibility issues")
                else:
                    print("No models found in Google Drive")
                    return None
            else:
                print("Trading_Strategy_ML folder not found in Google Drive")
                return None
        
        try:
            # Try loading with custom objects to handle compatibility issues
            custom_objects = {
                'mse': tf.keras.metrics.mean_squared_error,
                'mae': tf.keras.metrics.mean_absolute_error,
                'accuracy': tf.keras.metrics.accuracy
            }
            model = tf.keras.models.load_model(model_path, custom_objects=custom_objects)
            print(f"✓ Model loaded from Google Drive: {model_path}")
        except Exception as e:
            print(f"✗ Error loading model from Google Drive: {e}")
            print("💡 Try training a new model with the current TensorFlow version")
            return None
    
    else:
        print("Invalid location_type. Use 'local' or 'drive'")
        return None
    
    return model

# Example: Load the latest model from Google Drive with compatibility handling
print("Loading model from Google Drive with compatibility handling...")
loaded_model = load_model_from_location_fixed('drive')

if loaded_model is not None:
    print(f"Model summary:")
    loaded_model.summary()
else:
    print("No model found. Train a model first!")


## 8. Technical Benchmarks and Performance Analysis


In [None]:
# Comprehensive Technical Benchmarks and Performance Analysis

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

class TradingBenchmark:
    """Comprehensive trading strategy benchmark and analysis"""
    
    def __init__(self):
        self.benchmarks = {}
        self.results = {}
        
    def calculate_technical_metrics(self, returns, benchmark_returns=None):
        """Calculate comprehensive technical metrics"""
        metrics = {}
        
        # Basic metrics
        metrics['total_return'] = (1 + returns).prod() - 1
        metrics['annualized_return'] = (1 + returns).prod() ** (252 / len(returns)) - 1
        metrics['volatility'] = returns.std() * np.sqrt(252)
        metrics['sharpe_ratio'] = metrics['annualized_return'] / metrics['volatility'] if metrics['volatility'] > 0 else 0
        
        # Risk metrics
        negative_returns = returns[returns < 0]
        metrics['sortino_ratio'] = metrics['annualized_return'] / (negative_returns.std() * np.sqrt(252)) if len(negative_returns) > 0 else 0
        
        # Drawdown analysis
        cumulative = (1 + returns).cumprod()
        running_max = cumulative.expanding().max()
        drawdown = (cumulative - running_max) / running_max
        metrics['max_drawdown'] = drawdown.min()
        metrics['calmar_ratio'] = metrics['annualized_return'] / abs(metrics['max_drawdown']) if metrics['max_drawdown'] != 0 else 0
        
        # Win/Loss analysis
        winning_trades = returns[returns > 0]
        losing_trades = returns[returns < 0]
        metrics['win_rate'] = len(winning_trades) / len(returns) if len(returns) > 0 else 0
        metrics['avg_win'] = winning_trades.mean() if len(winning_trades) > 0 else 0
        metrics['avg_loss'] = losing_trades.mean() if len(losing_trades) > 0 else 0
        metrics['profit_factor'] = abs(winning_trades.sum() / losing_trades.sum()) if len(losing_trades) > 0 and losing_trades.sum() != 0 else 0
        
        # Benchmark comparison
        if benchmark_returns is not None:
            excess_returns = returns - benchmark_returns
            metrics['alpha'] = excess_returns.mean() * 252
            metrics['beta'] = returns.cov(benchmark_returns) / benchmark_returns.var() if benchmark_returns.var() > 0 else 0
            metrics['information_ratio'] = excess_returns.mean() / excess_returns.std() * np.sqrt(252) if excess_returns.std() > 0 else 0
            metrics['tracking_error'] = excess_returns.std() * np.sqrt(252)
        
        return metrics
    
    def create_performance_dashboard(self, strategy_returns, benchmark_returns=None, strategy_name="Trading Strategy"):
        """Create comprehensive performance dashboard"""
        
        # Calculate metrics
        strategy_metrics = self.calculate_technical_metrics(strategy_returns, benchmark_returns)
        
        # Create subplots with correct specs for table
        fig = make_subplots(
            rows=3, cols=2,
            subplot_titles=[
                'Cumulative Returns Comparison',
                'Rolling Sharpe Ratio',
                'Drawdown Analysis',
                'Monthly Returns Heatmap',
                'Risk-Return Scatter',
                'Performance Metrics Table'
            ],
            specs=[[{"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"type": "table"}]]
        )
        
        # 1. Cumulative Returns
        strategy_cumulative = (1 + strategy_returns).cumprod()
        fig.add_trace(
            go.Scatter(x=strategy_returns.index, y=strategy_cumulative.values,
                      name=f'{strategy_name}', line=dict(color='blue', width=2)),
            row=1, col=1
        )
        
        if benchmark_returns is not None:
            benchmark_cumulative = (1 + benchmark_returns).cumprod()
            fig.add_trace(
                go.Scatter(x=benchmark_returns.index, y=benchmark_cumulative.values,
                          name='Benchmark (S&P 500)', line=dict(color='red', width=2)),
                row=1, col=1
            )
        
        # 2. Rolling Sharpe Ratio
        rolling_sharpe = strategy_returns.rolling(window=252).mean() / strategy_returns.rolling(window=252).std() * np.sqrt(252)
        fig.add_trace(
            go.Scatter(x=strategy_returns.index, y=rolling_sharpe.values,
                      name='Rolling Sharpe', line=dict(color='green')),
            row=1, col=2
        )
        
        # 3. Drawdown Analysis
        cumulative = (1 + strategy_returns).cumprod()
        running_max = cumulative.expanding().max()
        drawdown = (cumulative - running_max) / running_max
        
        fig.add_trace(
            go.Scatter(x=strategy_returns.index, y=drawdown.values,
                      name='Drawdown', fill='tonexty', line=dict(color='red')),
            row=2, col=1
        )
        
        # 4. Monthly Returns Heatmap
        monthly_returns = strategy_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
        monthly_pivot = monthly_returns.groupby([monthly_returns.index.year, monthly_returns.index.month]).first().unstack()
        
        fig.add_trace(
            go.Heatmap(z=monthly_pivot.values,
                      x=monthly_pivot.columns,
                      y=monthly_pivot.index,
                      colorscale='RdYlGn',
                      name='Monthly Returns'),
            row=2, col=2
        )
        
        # 5. Risk-Return Scatter
        if benchmark_returns is not None:
            fig.add_trace(
                go.Scatter(x=[strategy_metrics['volatility']], y=[strategy_metrics['annualized_return']],
                          mode='markers', marker=dict(size=15, color='blue'),
                          name=f'{strategy_name}'),
                row=3, col=1
            )
            benchmark_metrics = self.calculate_technical_metrics(benchmark_returns)
            fig.add_trace(
                go.Scatter(x=[benchmark_metrics['volatility']], y=[benchmark_metrics['annualized_return']],
                          mode='markers', marker=dict(size=15, color='red'),
                          name='Benchmark'),
                row=3, col=1
            )
        
        # 6. Performance Metrics Table
        metrics_data = [
            ['Total Return', f"{strategy_metrics['total_return']:.2%}"],
            ['Annualized Return', f"{strategy_metrics['annualized_return']:.2%}"],
            ['Volatility', f"{strategy_metrics['volatility']:.2%}"],
            ['Sharpe Ratio', f"{strategy_metrics['sharpe_ratio']:.2f}"],
            ['Sortino Ratio', f"{strategy_metrics['sortino_ratio']:.2f}"],
            ['Max Drawdown', f"{strategy_metrics['max_drawdown']:.2%}"],
            ['Calmar Ratio', f"{strategy_metrics['calmar_ratio']:.2f}"],
            ['Win Rate', f"{strategy_metrics['win_rate']:.2%}"],
            ['Profit Factor', f"{strategy_metrics['profit_factor']:.2f}"]
        ]
        
        if benchmark_returns is not None:
            metrics_data.extend([
                ['Alpha', f"{strategy_metrics['alpha']:.2%}"],
                ['Beta', f"{strategy_metrics['beta']:.2f}"],
                ['Information Ratio', f"{strategy_metrics['information_ratio']:.2f}"]
            ])
        
        fig.add_trace(
            go.Table(
                header=dict(values=['Metric', 'Value'], fill_color='lightblue'),
                cells=dict(values=list(zip(*metrics_data)), fill_color='white')
            ),
            row=3, col=2
        )
        
        # Update layout
        fig.update_layout(
            height=1200,
            title_text=f"{strategy_name} - Performance Dashboard",
            showlegend=True
        )
        
        return fig, strategy_metrics
    
    def create_model_performance_analysis(self, model_history, X_test, y_test, model_name="CNN+LSTM"):
        """Create model performance analysis"""
        
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=[
                'Training History',
                'Prediction vs Actual',
                'Residuals Analysis',
                'Model Performance Metrics'
            ],
            specs=[[{"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"type": "table"}]]
        )
        
        # 1. Training History
        fig.add_trace(
            go.Scatter(y=model_history.history['loss'], name='Training Loss', line=dict(color='blue')),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(y=model_history.history['val_loss'], name='Validation Loss', line=dict(color='red')),
            row=1, col=1
        )
        
        # 2. Predictions vs Actual
        predictions = model_history.model.predict(X_test)
        fig.add_trace(
            go.Scatter(x=y_test.flatten(), y=predictions.flatten(),
                      mode='markers', name='Predictions vs Actual',
                      marker=dict(color='blue', opacity=0.6)),
            row=1, col=2
        )
        
        # Add perfect prediction line
        min_val = min(y_test.min(), predictions.min())
        max_val = max(y_test.max(), predictions.max())
        fig.add_trace(
            go.Scatter(x=[min_val, max_val], y=[min_val, max_val],
                      mode='lines', name='Perfect Prediction',
                      line=dict(color='red', dash='dash')),
            row=1, col=2
        )
        
        # 3. Residuals Analysis
        residuals = y_test.flatten() - predictions.flatten()
        fig.add_trace(
            go.Scatter(x=predictions.flatten(), y=residuals.flatten(),
                      mode='markers', name='Residuals',
                      marker=dict(color='green', opacity=0.6)),
            row=2, col=1
        )
        
        # 4. Model Metrics Table
        from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
        
        mse = mean_squared_error(y_test, predictions)
        mae = mean_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        rmse = np.sqrt(mse)
        
        metrics_data = [
            ['MSE', f"{mse:.6f}"],
            ['RMSE', f"{rmse:.6f}"],
            ['MAE', f"{mae:.6f}"],
            ['R² Score', f"{r2:.4f}"],
            ['Training Samples', f"{len(X_test):,}"],
            ['Model Parameters', f"{model_history.model.count_params():,}"]
        ]
        
        fig.add_trace(
            go.Table(
                header=dict(values=['Metric', 'Value'], fill_color='lightgreen'),
                cells=dict(values=list(zip(*metrics_data)), fill_color='white')
            ),
            row=2, col=2
        )
        
        fig.update_layout(
            height=800,
            title_text=f"{model_name} - Model Performance Analysis",
            showlegend=True
        )
        
        return fig, {'mse': mse, 'mae': mae, 'r2': r2, 'rmse': rmse}

# Initialize benchmark analyzer
benchmark_analyzer = TradingBenchmark()

print("✅ Technical benchmark analyzer initialized!")
print("Available methods:")
print("- calculate_technical_metrics()")
print("- create_performance_dashboard()")
print("- create_model_performance_analysis()")


In [None]:
# Generate Sample Trading Strategy Results for Benchmarking
print("📊 Generating sample trading strategy results for benchmarking...")

# Create sample strategy returns (simulate a trading strategy)
if 'processed_data' in locals() and processed_data:
    # Use real data if available
    sample_symbol = list(processed_data.keys())[0]
    sample_data = processed_data[sample_symbol]
    
    # Generate strategy returns based on technical indicators
    strategy_returns = []
    for i in range(1, len(sample_data)):
        # Simple strategy: Buy when RSI < 30, Sell when RSI > 70
        current_rsi = sample_data['RSI'].iloc[i] if 'RSI' in sample_data.columns else 50
        price_change = sample_data['Close'].iloc[i] / sample_data['Close'].iloc[i-1] - 1
        
        if current_rsi < 30:  # Oversold - Buy
            strategy_returns.append(price_change * 1.0)  # Full position
        elif current_rsi > 70:  # Overbought - Sell
            strategy_returns.append(-price_change * 0.5)  # Short position
        else:  # Hold
            strategy_returns.append(0)
    
    strategy_returns = pd.Series(strategy_returns, index=sample_data.index[1:])
    
else:
    # Generate synthetic strategy returns for demonstration
    np.random.seed(42)
    dates = pd.date_range(start='2022-01-01', end='2024-01-01', freq='D')
    strategy_returns = pd.Series(np.random.normal(0.0005, 0.02, len(dates)), index=dates)

# Generate benchmark returns (S&P 500 simulation)
np.random.seed(123)
benchmark_dates = strategy_returns.index
benchmark_returns = pd.Series(np.random.normal(0.0003, 0.015, len(benchmark_dates)), index=benchmark_dates)

print(f"✅ Generated strategy returns: {len(strategy_returns)} days")
print(f"✅ Generated benchmark returns: {len(benchmark_returns)} days")
print(f"📈 Strategy total return: {(1 + strategy_returns).prod() - 1:.2%}")
print(f"📈 Benchmark total return: {(1 + benchmark_returns).prod() - 1:.2%}")


In [None]:
# Create Comprehensive Performance Dashboard
print("🎯 Creating comprehensive performance dashboard...")

# Generate performance dashboard
performance_fig, strategy_metrics = benchmark_analyzer.create_performance_dashboard(
    strategy_returns, 
    benchmark_returns, 
    "CNN+LSTM Trading Strategy"
)

# Display the dashboard
performance_fig.show()

# Print detailed metrics
print("\n" + "="*60)
print("📊 DETAILED PERFORMANCE METRICS")
print("="*60)

print(f"📈 Returns:")
print(f"  • Total Return: {strategy_metrics['total_return']:.2%}")
print(f"  • Annualized Return: {strategy_metrics['annualized_return']:.2%}")

print(f"\n📊 Risk Metrics:")
print(f"  • Volatility: {strategy_metrics['volatility']:.2%}")
print(f"  • Sharpe Ratio: {strategy_metrics['sharpe_ratio']:.2f}")
print(f"  • Sortino Ratio: {strategy_metrics['sortino_ratio']:.2f}")

print(f"\n📉 Drawdown Analysis:")
print(f"  • Max Drawdown: {strategy_metrics['max_drawdown']:.2%}")
print(f"  • Calmar Ratio: {strategy_metrics['calmar_ratio']:.2f}")

print(f"\n🎯 Trading Performance:")
print(f"  • Win Rate: {strategy_metrics['win_rate']:.2%}")
print(f"  • Average Win: {strategy_metrics['avg_win']:.2%}")
print(f"  • Average Loss: {strategy_metrics['avg_loss']:.2%}")
print(f"  • Profit Factor: {strategy_metrics['profit_factor']:.2f}")

if 'alpha' in strategy_metrics:
    print(f"\n📊 Benchmark Comparison:")
    print(f"  • Alpha: {strategy_metrics['alpha']:.2%}")
    print(f"  • Beta: {strategy_metrics['beta']:.2f}")
    print(f"  • Information Ratio: {strategy_metrics['information_ratio']:.2f}")
    print(f"  • Tracking Error: {strategy_metrics['tracking_error']:.2%}")

print("="*60)


In [None]:
# Model Performance Analysis (if model was trained)
if 'history' in locals() and 'X_test' in locals() and 'y_test' in locals():
    print("🤖 Creating model performance analysis...")
    
    # Generate model performance analysis
    model_fig, model_metrics = benchmark_analyzer.create_model_performance_analysis(
        history, X_test, y_test, "CNN+LSTM Model"
    )
    
    # Display the analysis
    model_fig.show()
    
    # Print model metrics
    print("\n" + "="*60)
    print("🤖 MODEL PERFORMANCE METRICS")
    print("="*60)
    print(f"📊 Accuracy Metrics:")
    print(f"  • MSE: {model_metrics['mse']:.6f}")
    print(f"  • RMSE: {model_metrics['rmse']:.6f}")
    print(f"  • MAE: {model_metrics['mae']:.6f}")
    print(f"  • R² Score: {model_metrics['r2']:.4f}")
    
    print(f"\n📈 Model Quality:")
    if model_metrics['r2'] > 0.7:
        print("  ✅ Excellent model performance (R² > 0.7)")
    elif model_metrics['r2'] > 0.5:
        print("  ✅ Good model performance (R² > 0.5)")
    elif model_metrics['r2'] > 0.3:
        print("  ⚠️ Moderate model performance (R² > 0.3)")
    else:
        print("  ❌ Poor model performance (R² < 0.3)")
    
    print("="*60)
    
else:
    print("⚠️ No trained model found. Train a model first to see model performance analysis.")
    print("💡 Run the model training cell to generate model performance metrics.")


In [None]:
# Additional Technical Analysis and Comparisons
print("📊 Creating additional technical analysis...")

# Create comparison with different strategies
strategies = {
    'CNN+LSTM Strategy': strategy_returns,
    'Buy & Hold': benchmark_returns,
    'Random Strategy': pd.Series(np.random.normal(0.0002, 0.025, len(strategy_returns)), index=strategy_returns.index)
}

# Calculate metrics for all strategies
comparison_metrics = {}
for name, returns in strategies.items():
    comparison_metrics[name] = benchmark_analyzer.calculate_technical_metrics(returns, benchmark_returns)

# Create comparison table
comparison_data = []
for strategy_name, metrics in comparison_metrics.items():
    comparison_data.append([
        strategy_name,
        f"{metrics['total_return']:.2%}",
        f"{metrics['annualized_return']:.2%}",
        f"{metrics['volatility']:.2%}",
        f"{metrics['sharpe_ratio']:.2f}",
        f"{metrics['max_drawdown']:.2%}",
        f"{metrics['win_rate']:.2%}"
    ])

# Create comparison visualization
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=[
        'Strategy Comparison Table',
        'Risk-Return Scatter',
        'Cumulative Returns Comparison',
        'Rolling Sharpe Comparison'
    ],
    specs=[[{"type": "table"}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# 1. Comparison Table
fig.add_trace(
    go.Table(
        header=dict(values=['Strategy', 'Total Return', 'Annual Return', 'Volatility', 'Sharpe', 'Max DD', 'Win Rate'],
                   fill_color='lightblue'),
        cells=dict(values=list(zip(*comparison_data)), fill_color='white')
    ),
    row=1, col=1
)

# 2. Risk-Return Scatter
colors = ['blue', 'red', 'green']
for i, (strategy_name, metrics) in enumerate(comparison_metrics.items()):
    fig.add_trace(
        go.Scatter(x=[metrics['volatility']], y=[metrics['annualized_return']],
                  mode='markers', marker=dict(size=15, color=colors[i]),
                  name=strategy_name),
        row=1, col=2
    )

# 3. Cumulative Returns Comparison
for i, (strategy_name, returns) in enumerate(strategies.items()):
    cumulative = (1 + returns).cumprod()
    fig.add_trace(
        go.Scatter(x=returns.index, y=cumulative.values,
                  name=strategy_name, line=dict(color=colors[i])),
        row=2, col=1
    )

# 4. Rolling Sharpe Comparison
for i, (strategy_name, returns) in enumerate(strategies.items()):
    rolling_sharpe = returns.rolling(window=252).mean() / returns.rolling(window=252).std() * np.sqrt(252)
    fig.add_trace(
        go.Scatter(x=returns.index, y=rolling_sharpe.values,
                  name=f'{strategy_name} Sharpe', line=dict(color=colors[i])),
        row=2, col=2
    )

fig.update_layout(
    height=1000,
    title_text="Strategy Comparison Analysis",
    showlegend=True
)

fig.show()

# Print strategy ranking
print("\n" + "="*60)
print("🏆 STRATEGY RANKING")
print("="*60)

# Rank by Sharpe ratio
ranked_strategies = sorted(comparison_metrics.items(), key=lambda x: x[1]['sharpe_ratio'], reverse=True)

for i, (strategy_name, metrics) in enumerate(ranked_strategies):
    rank_emoji = "🥇" if i == 0 else "🥈" if i == 1 else "🥉" if i == 2 else "📊"
    print(f"{rank_emoji} #{i+1}: {strategy_name}")
    print(f"   Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")
    print(f"   Annual Return: {metrics['annualized_return']:.2%}")
    print(f"   Max Drawdown: {metrics['max_drawdown']:.2%}")
    print()

print("="*60)


## 7. Model Management and Versioning


In [None]:
# Model management and versioning utilities

def list_saved_models():
    """List all saved models in different locations"""
    
    print("🔍 Searching for saved models...")
    
    # Check local
    if os.path.exists('cnn_lstm_model.h5'):
        print("📁 Local: cnn_lstm_model.h5")
    
    # Check Google Drive
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
        if os.path.exists(drive_folder):
            drive_models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
            if drive_models:
                print(f"☁️ Google Drive ({len(drive_models)} models):")
                for model in sorted(drive_models):
                    print(f"   - {model}")
            else:
                print("☁️ Google Drive: No models found")
        else:
            print("☁️ Google Drive: Trading_Strategy_ML folder not found")
    except Exception as e:
        print(f"☁️ Google Drive: Error accessing - {e}")
    
    # Check GitHub repository
    if os.path.exists('models'):
        git_models = [f for f in os.listdir('models') if f.endswith('.h5')]
        if git_models:
            print(f"🐙 GitHub ({len(git_models)} models):")
            for model in sorted(git_models):
                print(f"   - {model}")
        else:
            print("🐙 GitHub: No models found in models/ directory")
    else:
        print("🐙 GitHub: models/ directory not found")

def create_model_backup():
    """Create a backup of the current model with metadata"""
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create backup with metadata
    backup_info = {
        'timestamp': timestamp,
        'tensorflow_version': tf.__version__,
        'model_type': 'CNN+LSTM',
        'training_date': datetime.now().isoformat(),
        'description': 'Trading Strategy ML Model'
    }
    
    # Save metadata
    import json
    with open(f'model_metadata_{timestamp}.json', 'w') as f:
        json.dump(backup_info, f, indent=2)
    
    print(f"📋 Model metadata saved: model_metadata_{timestamp}.json")
    print(f"📊 TensorFlow version: {tf.__version__}")
    print(f"🕒 Backup timestamp: {timestamp}")

def cleanup_old_models(keep_last_n=5):
    """Clean up old models, keeping only the last N versions"""
    
    print(f"🧹 Cleaning up old models (keeping last {keep_last_n})...")
    
    # Clean Google Drive
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        drive_folder = '/content/drive/MyDrive/Trading_Strategy_ML'
        
        if os.path.exists(drive_folder):
            models = [f for f in os.listdir(drive_folder) if f.endswith('.h5')]
            models.sort(reverse=True)  # Sort by name (newest first)
            
            if len(models) > keep_last_n:
                models_to_delete = models[keep_last_n:]
                for model in models_to_delete:
                    os.remove(os.path.join(drive_folder, model))
                    print(f"🗑️ Deleted old model: {model}")
                print(f"✅ Kept {keep_last_n} latest models in Google Drive")
            else:
                print(f"✅ Google Drive has {len(models)} models (≤ {keep_last_n}, no cleanup needed)")
    except Exception as e:
        print(f"❌ Error cleaning Google Drive: {e}")
    
    # Clean local models directory
    if os.path.exists('models'):
        models = [f for f in os.listdir('models') if f.endswith('.h5')]
        models.sort(reverse=True)
        
        if len(models) > keep_last_n:
            models_to_delete = models[keep_last_n:]
            for model in models_to_delete:
                os.remove(os.path.join('models', model))
                print(f"🗑️ Deleted old model: {model}")
            print(f"✅ Kept {keep_last_n} latest models locally")
        else:
            print(f"✅ Local models directory has {len(models)} models (≤ {keep_last_n}, no cleanup needed)")

# Run model management functions
print("=" * 50)
print("📊 MODEL MANAGEMENT DASHBOARD")
print("=" * 50)

list_saved_models()
print("\n" + "=" * 30)
create_model_backup()
print("\n" + "=" * 30)
cleanup_old_models(keep_last_n=3)
