In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import pickle
import warnings
warnings.filterwarnings('ignore')



In [3]:
np.random.seed(42)

class TradingStrategies:
    def __init__(self, data):
        """
        Initialize with OHLC data
        data: DataFrame with columns ['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume', etc.]
        """
        self.data = data.copy()
        self.symbols = data['Symbol'].unique()
        self.dates = sorted(data['Date'].unique())
        
    def prepare_data(self):
        """Prepare data in pivot format for easier calculations"""
        self.close_prices = self.data.pivot(index='Date', columns='Symbol', values='Close')
        self.high_prices = self.data.pivot(index='Date', columns='Symbol', values='High')
        self.low_prices = self.data.pivot(index='Date', columns='Symbol', values='Low')
        self.volume_data = self.data.pivot(index='Date', columns='Symbol', values='Volume')
        
    def task1_Strategy1(self, current_date_idx):
        """
        Strategy 1: Average Weekly Returns
        - Calculate weekly returns over past 50 weeks
        - Top 6 stocks get negative weights (-1 total), bottom 6 get positive weights (+1 total)
        """
        if current_date_idx < 250:  # Need at least 50 weeks of data
            return pd.Series(0, index=self.symbols)
        
        # Get data up to current date (no lookahead bias)
        end_idx = current_date_idx
        start_idx = max(0, end_idx - 250)  # 50 weeks * 5 days
        
        weekly_returns = {}
        
        for symbol in self.symbols:
            prices = self.close_prices.iloc[start_idx:end_idx][symbol].dropna()
            if len(prices) < 250:
                weekly_returns[symbol] = 0
                continue
                
            # Calculate weekly returns for complete weeks only
            weekly_rets = []
            for week_start in range(0, len(prices) - 5, 5):
                if week_start + 5 <= len(prices):
                    week_end_price = prices.iloc[week_start + 4]
                    prev_week_end_price = prices.iloc[week_start - 1] if week_start > 0 else 1.0
                    weekly_ret = (week_end_price - prev_week_end_price) / prev_week_end_price
                    weekly_rets.append(weekly_ret)
            
            weekly_returns[symbol] = np.mean(weekly_rets) if weekly_rets else 0
        
        # Rank and assign weights
        returns_series = pd.Series(weekly_returns)
        ranked_stocks = returns_series.sort_values(ascending=False)
        
        weights = pd.Series(0.0, index=self.symbols)
        
        # Top 6 get negative weights
        top_6 = ranked_stocks.head(6).index
        weights[top_6] = -1/6
        
        # Bottom 6 get positive weights
        bottom_6 = ranked_stocks.tail(6).index
        weights[bottom_6] = 1/6
        
        return weights
    
    def task1_Strategy2(self, current_date_idx):
        """
        Strategy 2: Mean Reversion (SMA vs LMA)
        - Compare 5-day SMA vs 30-day LMA
        - Top 5 get negative weights (-1 total), bottom 5 get positive weights (+1 total)
        """
        if current_date_idx < 30:
            return pd.Series(0, index=self.symbols)
        
        relative_positions = {}
        
        for symbol in self.symbols:
            prices = self.close_prices.iloc[max(0, current_date_idx-30):current_date_idx][symbol].dropna()
            
            if len(prices) < 30:
                relative_positions[symbol] = 0
                continue
                
            lma = prices.tail(30).mean()  # 30-day LMA
            sma = prices.tail(5).mean()   # 5-day SMA
            
            relative_pos = (sma - lma) / lma if lma != 0 else 0
            relative_positions[symbol] = relative_pos
        
        # Rank and assign weights
        rel_pos_series = pd.Series(relative_positions)
        ranked_stocks = rel_pos_series.sort_values(ascending=False)
        
        weights = pd.Series(0.0, index=self.symbols)
        
        # Top 5 get negative weights
        top_5 = ranked_stocks.head(5).index
        weights[top_5] = -1/5
        
        # Bottom 5 get positive weights
        bottom_5 = ranked_stocks.tail(5).index
        weights[bottom_5] = 1/5
        
        return weights
    
    def task1_Strategy3(self, current_date_idx):
        """
        Strategy 3: Rate of Change (ROC)
        - Calculate 7-day ROC
        - Rank based on ROC (implementation needs completion based on weight assignment)
        """
        if current_date_idx < 7:
            return pd.Series(0, index=self.symbols)
        
        roc_values = {}
        
        for symbol in self.symbols:
            current_price = self.close_prices.iloc[current_date_idx-1][symbol]
            price_7_days_ago = self.close_prices.iloc[current_date_idx-8][symbol]
            
            if pd.notna(current_price) and pd.notna(price_7_days_ago) and price_7_days_ago != 0:
                roc = 100 * (current_price - price_7_days_ago) / price_7_days_ago
                roc_values[symbol] = roc
            else:
                roc_values[symbol] = 0
        
        # Rank and assign weights (assuming similar pattern to other strategies)
        roc_series = pd.Series(roc_values)
        ranked_stocks = roc_series.sort_values(ascending=False)
        
        weights = pd.Series(0.0, index=self.symbols)
        
        # Top 5 get negative weights, bottom 5 get positive weights
        top_5 = ranked_stocks.head(5).index
        weights[top_5] = -1/5
        
        bottom_5 = ranked_stocks.tail(5).index
        weights[bottom_5] = 1/5
        
        return weights
    
    def task1_Strategy4(self, current_date_idx):
        """
        Strategy 4: Support and Resistance
        - Calculate support/resistance using 21-day SMA ± 3*std
        - Complex weight assignment based on proximity
        """
        if current_date_idx < 21:
            return pd.Series(0, index=self.symbols)
        
        proximities = {}
        
        for symbol in self.symbols:
            prices = self.close_prices.iloc[max(0, current_date_idx-21):current_date_idx][symbol].dropna()
            
            if len(prices) < 21:
                proximities[symbol] = {'support': 0, 'resistance': 0}
                continue
            
            sma_21 = prices.mean()
            std_21 = prices.std()
            
            resistance = sma_21 + 3 * std_21
            support = sma_21 - 3 * std_21
            current_price = prices.iloc[-1]
            
            prox_resistance = (current_price - resistance) / resistance if resistance != 0 else 0
            prox_support = (current_price - support) / support if support != 0 else 0
            
            proximities[symbol] = {'support': prox_support, 'resistance': prox_resistance}
        
        # Complex weight assignment
        support_prox = {symbol: proximities[symbol]['support'] for symbol in self.symbols}
        resistance_prox = {symbol: proximities[symbol]['resistance'] for symbol in self.symbols}
        
        # Rank by proximity to support (increasing order)
        support_ranked = sorted(support_prox.items(), key=lambda x: x[1])
        top_4_support = [x[0] for x in support_ranked[:4]]
        
        # Remaining stocks ranked by proximity to resistance (decreasing order)
        remaining_stocks = [s for s in self.symbols if s not in top_4_support]
        resistance_remaining = {s: resistance_prox[s] for s in remaining_stocks}
        resistance_ranked = sorted(resistance_remaining.items(), key=lambda x: x[1], reverse=True)
        top_4_resistance = [x[0] for x in resistance_ranked[:4]]
        
        weights = pd.Series(0.0, index=self.symbols)
        
        # Top 4 by support proximity get positive weights
        for symbol in top_4_support:
            weights[symbol] = 1/4
            
        # Top 4 by resistance proximity get negative weights
        for symbol in top_4_resistance:
            weights[symbol] = -1/4
        
        return weights
    
    def task1_Strategy5(self, current_date_idx):
        """
        Strategy 5: Stochastic %K
        - Calculate %K = 100 * (Close - 14day_Low) / (14day_High - 14day_Low)
        - Top 3 %K get negative weights, bottom 3 get positive weights
        """
        if current_date_idx < 14:
            return pd.Series(0, index=self.symbols)
        
        k_values = {}
        
        for symbol in self.symbols:
            highs = self.high_prices.iloc[max(0, current_date_idx-14):current_date_idx][symbol].dropna()
            lows = self.low_prices.iloc[max(0, current_date_idx-14):current_date_idx][symbol].dropna()
            current_close = self.close_prices.iloc[current_date_idx-1][symbol]
            
            if len(highs) < 14 or len(lows) < 14 or pd.isna(current_close):
                k_values[symbol] = 50  # Neutral value
                continue
            
            high_14 = highs.max()
            low_14 = lows.min()
            
            if high_14 != low_14:
                k_percent = 100 * (current_close - low_14) / (high_14 - low_14)
                k_values[symbol] = k_percent
            else:
                k_values[symbol] = 50  # Neutral value
        
        # Rank and assign weights
        k_series = pd.Series(k_values)
        ranked_stocks = k_series.sort_values(ascending=False)
        
        weights = pd.Series(0.0, index=self.symbols)
        
        # Top 3 %K get negative weights
        top_3 = ranked_stocks.head(3).index
        weights[top_3] = -1/3
        
        # Bottom 3 %K get positive weights
        bottom_3 = ranked_stocks.tail(3).index
        weights[bottom_3] = 1/3
        
        return weights


In [26]:
class StrategyBacktester:
    """
    Backtesting engine to evaluate strategy performance
    """
    
    def __init__(self, data, strategies):
        self.data = data
        self.strategies = strategies
        self.results = {}
        
    def calculate_returns(self, weights_df):
        """Calculate returns for given weights"""
        close_prices = self.strategies.close_prices
        
        # Calculate daily returns
        price_returns = close_prices.pct_change().fillna(0)
        
        # Calculate portfolio returns
        portfolio_returns = []
        
        for date in weights_df.index:
            if date in price_returns.index:
                weights = weights_df.loc[date]
                returns = price_returns.loc[date]
                
                # Portfolio return = sum of (weight * stock_return)
                portfolio_return = (weights * returns).sum()
                portfolio_returns.append(portfolio_return)
            else:
                portfolio_returns.append(0)
        
        return pd.Series(portfolio_returns, index=weights_df.index)
    
    def calculate_performance_metrics(self, returns):
        """Calculate performance metrics"""
        total_return = (1 + returns).prod() - 1
        annualized_return = (1 + returns.mean())**252 - 1
        volatility = returns.std() * np.sqrt(252)
        sharpe_ratio = annualized_return / volatility if volatility != 0 else 0
        
        # Maximum drawdown
        cumulative = (1 + returns).cumprod()
        peak = cumulative.expanding().max()
        drawdown = (peak - cumulative) / peak
        max_drawdown = drawdown.max()
        
        return {
            'total_return': total_return,
            'annualized_return': annualized_return,
            'volatility': volatility,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown
        }
    
    def backtest_strategy(self, strategy_name, start_idx=250, end_idx=3500):
        """Backtest a single strategy"""
        weights_list = []
        dates_list = []
        
        strategy_func = getattr(self.strategies, f'task1_{strategy_name}')
        
        for i in range(start_idx, min(end_idx, len(self.strategies.dates))):
            date = self.strategies.dates[i]
            weights = strategy_func(i)
            
            weights_list.append(weights.values)
            dates_list.append(date)
        
        weights_df = pd.DataFrame(weights_list, 
                                 index=dates_list, 
                                 columns=self.strategies.symbols)
        
        returns = self.calculate_returns(weights_df)
        performance = self.calculate_performance_metrics(returns)
        
        return weights_df, returns, performance

In [27]:
class EnsembleStrategySelector:
    """
    Ensemble strategy selection using clustering approach
    """
    
    def __init__(self, strategies, backtester):
        self.strategies = strategies
        self.backtester = backtester
        self.strategy_names = ['Strategy1', 'Strategy2', 'Strategy3', 'Strategy4', 'Strategy5']
        self.model = None
        self.scaler = StandardScaler()
        
    def extract_market_features(self, current_date_idx, lookback=20):
        """Extract market features for clustering"""
        if current_date_idx < lookback:
            return np.zeros(10)  # Return neutral features
        
        # Get recent market data
        recent_closes = self.strategies.close_prices.iloc[current_date_idx-lookback:current_date_idx]
        recent_volumes = self.strategies.volume_data.iloc[current_date_idx-lookback:current_date_idx]
        
        features = []
        
        # Market volatility
        market_returns = recent_closes.pct_change().fillna(0)
        avg_volatility = market_returns.std().mean()
        features.append(avg_volatility)
        
        # Market trend
        market_trend = (recent_closes.iloc[-1] / recent_closes.iloc[0] - 1).mean()
        features.append(market_trend)
        
        # Volume trend
        volume_trend = (recent_volumes.iloc[-1] / recent_volumes.iloc[0] - 1).mean()
        features.append(volume_trend)
        
        # Cross-sectional dispersion
        latest_returns = market_returns.iloc[-1]
        dispersion = latest_returns.std()
        features.append(dispersion)
        
        # Momentum (5-day vs 20-day)
        short_ma = recent_closes.tail(5).mean().mean()
        long_ma = recent_closes.mean().mean()
        momentum = (short_ma / long_ma - 1) if long_ma != 0 else 0
        features.append(momentum)
        
        # Additional technical features
        # RSI-like momentum
        gains = market_returns[market_returns > 0].sum().sum()
        losses = abs(market_returns[market_returns < 0].sum().sum())
        rsi = gains / (gains + losses) if (gains + losses) != 0 else 0.5
        features.append(rsi)
        
        # VIX-like fear index (volatility of volatility)
        rolling_vol = market_returns.rolling(5).std()
        vol_of_vol = rolling_vol.std().mean()
        features.append(vol_of_vol)
        
        # Market correlation
        corr_matrix = market_returns.corr()
        avg_correlation = corr_matrix.mean().mean()
        features.append(avg_correlation)
        
        # Skewness and Kurtosis
        skewness = market_returns.skew().mean()
        kurtosis = market_returns.kurtosis().mean()
        features.extend([skewness, kurtosis])
        
        return np.array(features)
    
    def train_ensemble_selector(self, train_start=250, train_end=3500, validation_window=50):
        """
        Train the ensemble selector using clustering approach
        """
        print("Training ensemble strategy selector...")
        
        # Step 1: Evaluate all strategies on rolling windows
        strategy_performances = []
        market_features_list = []
        best_strategies = []
        
        for window_start in range(train_start, train_end - validation_window, validation_window):
            window_end = window_start + validation_window
            
            # Extract market features for this period
            features = self.extract_market_features(window_start)
            market_features_list.append(features)
            
            # Evaluate all strategies in this window
            window_performances = []
            
            for strategy_name in self.strategy_names:
                _, returns, performance = self.backtester.backtest_strategy(
                    strategy_name, window_start, window_end
                )
                
                # Focus on Sharpe ratio as primary metric
                sharpe = performance['sharpe_ratio']
                total_ret = performance['total_return']
                max_dd = performance['max_drawdown']
                
                # Combined score (you can adjust weights)
                combined_score = sharpe * 0.6 + total_ret * 0.3 - abs(max_dd) * 0.1
                window_performances.append(combined_score)
            
            strategy_performances.append(window_performances)
            
            # Find best strategy for this period
            best_strategy_idx = np.argmax(window_performances)
            best_strategies.append(best_strategy_idx)
        
        # Step 2: Create feature matrix
        X = np.array(market_features_list)
        y = np.array(best_strategies)
        
        # Scale features
        X_scaled = self.scaler.fit_transform(X)
        
        # Step 3: Cluster market conditions and map to best strategies
        n_clusters = 5  # One for each strategy
        
        self.model = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = self.model.fit_predict(X_scaled)
        
        # Map each cluster to the most successful strategy
        self.cluster_strategy_map = {}
        
        for cluster_id in range(n_clusters):
            cluster_mask = clusters == cluster_id
            if cluster_mask.sum() > 0:
                cluster_strategies = np.array(best_strategies)[cluster_mask]
                # Most common strategy in this cluster
                most_common_strategy = np.bincount(cluster_strategies).argmax()
                self.cluster_strategy_map[cluster_id] = most_common_strategy
            else:
                self.cluster_strategy_map[cluster_id] = 0  # Default to Strategy1
        
        print(f"Training completed. Cluster-Strategy mapping: {self.cluster_strategy_map}")
        
        return X_scaled, clusters, best_strategies
    
    def predict_best_strategy(self, current_date_idx):
        """Predict best strategy for current market conditions"""
        if self.model is None:
            return 0  # Default to Strategy1
        
        # Extract current market features
        features = self.extract_market_features(current_date_idx)
        features_scaled = self.scaler.transform(features.reshape(1, -1))
        
        # Predict cluster
        cluster = self.model.predict(features_scaled)[0]
        
        # Map to strategy
        strategy_idx = self.cluster_strategy_map.get(cluster, 0)
        
        return strategy_idx

In [38]:
def task1(train_data_path='train_data.csv', test_data_path='test_data.csv'):
    """
    Task 1: Implement and test all 5 strategies on both train and test data
    """
    print("=== TASK 1: Individual Strategy Implementation ===")

    # Load training data for strategy development
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.iloc[:, 1:]
    
    print(f"Training data shape: {train_data.shape}")
    
    # Load test data for backtesting
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]
    
    print(f"Test data shape: {test_data.shape}")
    
    # Initialize strategies with test data for backtesting
    strategies = TradingStrategies(test_data)
    strategies.prepare_data()
    
    backtester = StrategyBacktester(test_data, strategies)
    
    # Test all strategies on test data
    results = {}
    for strategy_name in ['Strategy1', 'Strategy2', 'Strategy3', 'Strategy4', 'Strategy5']:
        print(f"\nBacktesting {strategy_name} on test data...")
        weights_df, returns, performance = backtester.backtest_strategy(strategy_name)
        results[strategy_name] = {
            'weights': weights_df,
            'returns': returns,
            'performance': performance
        }
        
        print(f"\n{strategy_name} Performance on Test Data:")
        for metric, value in performance.items():
            print(f"  {metric}: {value:.4f}")
    
    return results

In [39]:
def task2(train_data_path='train_data.csv', test_data_path='test_data.csv'):
    """
    Task 2: Train ensemble on training data, backtest on test data
    """
    print("=== TASK 2: Ensemble Strategy Selection ===")
    
    # Load training data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.iloc[:, 1:]
    
    print(f"Training data shape: {train_data.shape}")
    
    # Load test data
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]
    
    print(f"Test data shape: {test_data.shape}")
    
    # TRAINING PHASE: Train ensemble selector on training data
    print("\n--- TRAINING PHASE ---")
    train_strategies = TradingStrategies(train_data)
    train_strategies.prepare_data()
    
    train_backtester = StrategyBacktester(train_data, train_strategies)
    ensemble_selector = EnsembleStrategySelector(train_strategies, train_backtester)
    
    # Train ensemble selector on training data only
    print("Training ensemble selector on training data...")
    X_scaled, clusters, best_strategies = ensemble_selector.train_ensemble_selector(
        train_start=250, 
        train_end=len(train_strategies.dates),  # Use all available training data
        validation_window=50
    )
    
    print(f"Training completed. Cluster-Strategy mapping: {ensemble_selector.cluster_strategy_map}")
    
    # TESTING PHASE: Apply trained model to test data
    print("\n--- TESTING PHASE ---")
    test_strategies = TradingStrategies(test_data)
    test_strategies.prepare_data()
    
    test_backtester = StrategyBacktester(test_data, test_strategies)
    
    # Apply trained ensemble selector to test data
    # We need to transfer the trained model to work with test data
    test_ensemble_selector = EnsembleStrategySelector(test_strategies, test_backtester)
    test_ensemble_selector.model = ensemble_selector.model
    test_ensemble_selector.scaler = ensemble_selector.scaler
    test_ensemble_selector.cluster_strategy_map = ensemble_selector.cluster_strategy_map
    test_ensemble_selector.strategy_names = ensemble_selector.strategy_names
    
    # Generate ensemble weights for test period
    ensemble_weights = []
    ensemble_dates = []
    strategy_selections = []
    
    print("Generating predictions on test data...")
    
    test_start = 250 if len(test_strategies.dates) > 250 else 50  # Adjust if test data is shorter
    
    for i in range(test_start, len(test_strategies.dates)):
        date = test_strategies.dates[i]
        
        # Predict best strategy using trained model
        best_strategy_idx = test_ensemble_selector.predict_best_strategy(i)
        strategy_name = test_ensemble_selector.strategy_names[best_strategy_idx]
        
        # Get weights from selected strategy
        strategy_func = getattr(test_strategies, f'task1_{strategy_name}')
        weights = strategy_func(i)
        
        ensemble_weights.append(weights.values)
        ensemble_dates.append(date)
        strategy_selections.append(strategy_name)
        
        if i % 100 == 0:
            print(f"  Processed {i-test_start}/{len(test_strategies.dates)-test_start} test days")
    
    # Create weights DataFrame
    ensemble_weights_df = pd.DataFrame(
        ensemble_weights, 
        index=ensemble_dates, 
        columns=test_strategies.symbols
    )
    
    # Calculate ensemble performance on test data
    ensemble_returns = test_backtester.calculate_returns(ensemble_weights_df)
    ensemble_performance = test_backtester.calculate_performance_metrics(ensemble_returns)
    
    # Save ensemble_performance as CSV
    ensemble_performance_df = pd.DataFrame([ensemble_performance])
    ensemble_performance_df.to_csv('backtest_performance_metrics.csv', index=False)
    
    print("\nEnsemble Strategy Performance on Test Data:")
    for metric, value in ensemble_performance.items():
        print(f"  {metric}: {value:.4f}")
    
    print(f"\nStrategy Selection Distribution on Test Data:")
    selection_counts = pd.Series(strategy_selections).value_counts()
    for strategy, count in selection_counts.items():
        pct = count / len(strategy_selections) * 100
        print(f"  {strategy}: {count} days ({pct:.1f}%)")
    
    # Save outputs
    ensemble_weights_df.to_csv('ensemble_weights.csv')
    
    # Save trained model (this is the model trained on training data)
    model_data = {
        'model': ensemble_selector.model,
        'scaler': ensemble_selector.scaler,
        'cluster_strategy_map': ensemble_selector.cluster_strategy_map,
        'strategy_names': ensemble_selector.strategy_names,
        'training_data_info': {
            'train_data_shape': train_data.shape,
            'test_data_shape': test_data.shape,
            'train_period': (250, len(train_strategies.dates)),
            'test_period': (test_start, len(test_strategies.dates))
        }
    }
    
    with open('ensemble_model.pkl', 'wb') as f:
        pickle.dump(model_data, f)
    
    # Performance CSV (test data results)
    performance_df = pd.DataFrame({
        'Date': ensemble_dates,
        'Returns': ensemble_returns.values,
        'Cumulative_Returns': (1 + ensemble_returns).cumprod().values,
        'Selected_Strategy': strategy_selections
    })
    performance_df.to_csv('ensemble_performance.csv', index=False)
    
    return ensemble_weights_df, ensemble_performance, model_data

In [40]:
def plot_strategy_comparison(results):
    """Plot comparison of strategy performances"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    strategies = list(results.keys())
    metrics = ['sharpe_ratio', 'total_return', 'volatility', 'max_drawdown']
    
    for i, metric in enumerate(metrics):
        ax = axes[i//2, i%2]
        values = [results[strategy]['performance'][metric] for strategy in strategies]
        ax.bar(strategies, values)
        ax.set_title(f'{metric.replace("_", " ").title()}')
        ax.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()

In [41]:
def analyze_ensemble_selections(strategy_selections):
    """Analyze which strategies were selected when"""
    selection_counts = pd.Series(strategy_selections).value_counts()
    
    plt.figure(figsize=(10, 6))
    selection_counts.plot(kind='bar')
    plt.title('Strategy Selection Frequency')
    plt.ylabel('Number of Days Selected')
    plt.xticks(rotation=45)
    plt.show()
    
    return selection_counts

In [42]:

def run_complete_pipeline(train_data_path='train_data.csv', test_data_path='test_data.csv'):
    """
    Run complete pipeline: train on training data, test on test data
    """
    print("RUNNING COMPLETE TRADING PIPELINE")
    print("=" * 50)
    
    # Task 1: Individual strategies on test data
    print("\n1. Running individual strategies on test data...")
    task1_results = task1(train_data_path, test_data_path)
    
    # Task 2: Train ensemble on training data, test on test data
    print("\n2. Training ensemble on training data and testing on test data...")
    ensemble_weights_df, ensemble_performance, model_data = task2(train_data_path, test_data_path)
    
    print("\n" + "=" * 50)
    print("PIPELINE COMPLETED SUCCESSFULLY")
    print("=" * 50)
    
    print("\nGenerated Files:")
    print("1. ensemble_weights.csv - Portfolio weights from test data")
    print("2. ensemble_performance.csv - Performance metrics from test data") 
    print("3. backtest_performance_metrics.csv - Summary performance metrics")
    print("4. ensemble_model.pkl - Trained model (trained on training data)")
    
    return task1_results, ensemble_weights_df, ensemble_performance, model_data



if __name__ == "__main__":
    task1_results, ensemble_weights, ensemble_perf, model = run_complete_pipeline(
        train_data_path='train_data.csv',
        test_data_path='test_data.csv'
    )


RUNNING COMPLETE TRADING PIPELINE

1. Running individual strategies on test data...
=== TASK 1: Individual Strategy Implementation ===
Training data shape: (70000, 14)
Test data shape: (11120, 14)

Backtesting Strategy1 on test data...

Strategy1 Performance on Test Data:
  total_return: 0.4139
  annualized_return: 0.3803
  volatility: 0.2763
  sharpe_ratio: 1.3761
  max_drawdown: 0.1682

Backtesting Strategy2 on test data...

Strategy2 Performance on Test Data:
  total_return: -0.0832
  annualized_return: -0.0315
  volatility: 0.2811
  sharpe_ratio: -0.1122
  max_drawdown: 0.2353

Backtesting Strategy3 on test data...

Strategy3 Performance on Test Data:
  total_return: 0.0526
  annualized_return: 0.0825
  volatility: 0.2730
  sharpe_ratio: 0.3021
  max_drawdown: 0.2776

Backtesting Strategy4 on test data...

Strategy4 Performance on Test Data:
  total_return: -0.0458
  annualized_return: -0.0073
  volatility: 0.2500
  sharpe_ratio: -0.0293
  max_drawdown: 0.1940

Backtesting Strategy

In [43]:
! pip install -q hmmlearn

In [45]:
import numpy as np
import pandas as pd
from hmmlearn import hmm
from sklearn.preprocessing import StandardScaler
import pickle
import matplotlib.pyplot as plt

class HMMStrategySelector:
    """
    Hidden Markov Model for trading strategy selection
    """
    
    def __init__(self, n_regimes=3, covariance_type='full'):
        self.n_regimes = n_regimes
        self.model = hmm.GaussianHMM(
            n_components=n_regimes, 
            covariance_type=covariance_type,
            n_iter=100,
            random_state=42
        )
        self.scaler = StandardScaler()
        self.regime_strategy_map = {}
        self.strategy_names = ['Strategy1', 'Strategy2', 'Strategy3', 'Strategy4', 'Strategy5']
        self.regime_names = {
            0: "Bull Market",
            1: "Bear Market", 
            2: "Volatile/Sideways"
        }
        self.is_trained = False
        
    def extract_regime_features(self, strategies, current_idx, lookback=20):
        """
        Extract features that help identify market regimes
        """
        if current_idx < lookback:
            return np.zeros(8)  # Return neutral features for early indices
        
        # Get recent price data
        recent_prices = strategies.close_prices.iloc[current_idx-lookback:current_idx]
        
        features = []
        
        # 1. Average returns
        returns = recent_prices.pct_change().fillna(0)
        avg_return = returns.mean().mean()
        features.append(avg_return)
        
        # 2. Volatility (key regime indicator)
        volatility = returns.std().mean()
        features.append(volatility)
        
        # 3. Trend strength (directional persistence)
        trend_strength = (recent_prices.iloc[-1] / recent_prices.iloc[0] - 1).mean()
        features.append(trend_strength)
        
        # 4. Market correlation (risk-on vs risk-off)
        correlation_matrix = returns.corr().fillna(0)
        avg_correlation = correlation_matrix.values[np.triu_indices_from(correlation_matrix.values, k=1)].mean()
        features.append(avg_correlation)
        
        # 5. Skewness (tail risk indicator)
        skewness = returns.skew().mean()
        if np.isnan(skewness):
            skewness = 0
        features.append(skewness)
        
        # 6. Kurtosis (extreme moves indicator)  
        kurtosis = returns.kurtosis().mean()
        if np.isnan(kurtosis):
            kurtosis = 0
        features.append(kurtosis)
        
        # 7. Volume trend (using price as proxy if volume not available)
        volume_proxy = recent_prices.sum(axis=1)  # Sum of all prices as volume proxy
        volume_trend = (volume_proxy.iloc[-1] / volume_proxy.iloc[0] - 1) if volume_proxy.iloc[0] != 0 else 0
        features.append(volume_trend)
        
        # 8. Volatility of volatility
        rolling_vol = returns.rolling(5).std()
        vol_of_vol = rolling_vol.std().mean()
        if np.isnan(vol_of_vol):
            vol_of_vol = 0
        features.append(vol_of_vol)
        
        return np.array(features)
    
    def train_hmm_selector(self, strategies, backtester, train_start=250, train_end=None, window_size=50):
        """
        Train HMM to detect market regimes and map them to best strategies
        """
        print("Training HMM regime detector...")
        
        if train_end is None:
            train_end = len(strategies.dates) - window_size
        
        # Step 1: Extract features and evaluate strategies over rolling windows
        feature_matrix = []
        best_strategy_sequence = []
        regime_performance_history = []
        
        for window_start in range(train_start, train_end, window_size):
            window_end = min(window_start + window_size, len(strategies.dates))
            
            if window_end - window_start < 20:  # Skip if window too small
                continue
                
            # Extract regime features for this window (use midpoint)
            feature_idx = window_start + (window_end - window_start) // 2
            features = self.extract_regime_features(strategies, feature_idx)
            feature_matrix.append(features)
            
            # Evaluate all strategies in this window
            strategy_performances = {}
            
            for i, strategy_name in enumerate(self.strategy_names):
                try:
                    # Backtest strategy for this window
                    strategy_weights = []
                    strategy_dates = []
                    
                    for j in range(window_start, window_end):
                        if j < len(strategies.dates):
                            strategy_func = getattr(strategies, f'task1_{strategy_name}')
                            weights = strategy_func(j)
                            strategy_weights.append(weights.values)
                            strategy_dates.append(strategies.dates[j])
                    
                    if len(strategy_weights) > 0:
                        weights_df = pd.DataFrame(
                            strategy_weights,
                            index=strategy_dates,
                            columns=strategies.symbols
                        )
                        
                        # Calculate returns and performance
                        returns = backtester.calculate_returns(weights_df)
                        performance = backtester.calculate_performance_metrics(returns)
                        
                        # Combined performance score
                        sharpe = performance.get('sharpe_ratio', 0)
                        total_ret = performance.get('total_return', 0)
                        max_dd = performance.get('max_drawdown', 0)
                        
                        # Weighted score (adjust weights as needed)
                        score = sharpe * 0.5 + total_ret * 0.3 - abs(max_dd) * 0.2
                        strategy_performances[i] = score
                    else:
                        strategy_performances[i] = 0
                        
                except Exception as e:
                    print(f"Error evaluating {strategy_name} in window {window_start}-{window_end}: {e}")
                    strategy_performances[i] = 0
            
            # Find best strategy for this window
            if strategy_performances:
                best_strategy_idx = max(strategy_performances, key=strategy_performances.get)
                best_strategy_sequence.append(best_strategy_idx)
                regime_performance_history.append(strategy_performances)
            
        if len(feature_matrix) == 0:
            raise ValueError("No valid training windows found. Check data length and parameters.")
        
        # Step 2: Scale features
        feature_matrix = np.array(feature_matrix)
        
        # Handle NaN/Inf values
        feature_matrix = np.nan_to_num(feature_matrix, nan=0.0, posinf=1.0, neginf=-1.0)
        
        feature_matrix_scaled = self.scaler.fit_transform(feature_matrix)
        
        # Step 3: Fit HMM to identify regimes
        print(f"Fitting HMM with {self.n_regimes} regimes on {len(feature_matrix)} windows...")
        self.model.fit(feature_matrix_scaled)
        
        # Step 4: Predict regimes for training data
        predicted_regimes = self.model.predict(feature_matrix_scaled)
        
        # Step 5: Map each regime to its most successful strategy
        for regime_id in range(self.n_regimes):
            regime_mask = predicted_regimes == regime_id
            
            if regime_mask.sum() > 0:
                # Get strategies used in this regime
                regime_strategies = np.array(best_strategy_sequence)[regime_mask]
                
                # Find most common strategy in this regime
                strategy_counts = np.bincount(regime_strategies, minlength=len(self.strategy_names))
                most_common_strategy = np.argmax(strategy_counts)
                
                self.regime_strategy_map[regime_id] = most_common_strategy
                
                # Calculate regime statistics
                regime_performances = np.array(regime_performance_history)[regime_mask]
                avg_performance = np.mean([perf[most_common_strategy] for perf in regime_performances])
                
                print(f"Regime {regime_id} ({self.regime_names.get(regime_id, f'Regime_{regime_id}')}):")
                print(f"  Best Strategy: {self.strategy_names[most_common_strategy]}")
                print(f"  Occurrences: {regime_mask.sum()} windows ({regime_mask.sum()/len(predicted_regimes)*100:.1f}%)")
                print(f"  Avg Performance: {avg_performance:.4f}")
            else:
                # Default mapping if regime never occurs
                self.regime_strategy_map[regime_id] = 0
                print(f"Regime {regime_id}: No occurrences, defaulting to {self.strategy_names[0]}")
        
        self.is_trained = True
        
        return feature_matrix_scaled, predicted_regimes, best_strategy_sequence
    
    def predict_current_regime(self, strategies, current_idx):
        """
        Predict current market regime
        """
        if not self.is_trained:
            raise ValueError("Model must be trained first!")
        
        # Extract current features
        current_features = self.extract_regime_features(strategies, current_idx)
        
        # Handle NaN/Inf values
        current_features = np.nan_to_num(current_features, nan=0.0, posinf=1.0, neginf=-1.0)
        
        # Scale features
        current_features_scaled = self.scaler.transform(current_features.reshape(1, -1))
        
        # Predict regime
        predicted_regime = self.model.predict(current_features_scaled)[0]
        
        # Get regime probabilities
        regime_probs = self.model.predict_proba(current_features_scaled)[0]
        
        return predicted_regime, regime_probs
    
    def predict_best_strategy(self, strategies, current_idx):
        """
        Select best strategy based on predicted regime
        """
        predicted_regime, regime_probs = self.predict_current_regime(strategies, current_idx)
        
        # Get mapped strategy for this regime
        selected_strategy_idx = self.regime_strategy_map.get(predicted_regime, 0)
        
        return selected_strategy_idx, predicted_regime, regime_probs

In [46]:
def task2(train_data_path='train_data.csv', test_data_path='test_data.csv'):
    """
    Task 2: Train HMM ensemble on training data, backtest on test data
    """
    print("=== TASK 2: HMM-Based Strategy Selection ===")
    
    # Load training data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.iloc[:, 1:]
    
    print(f"Training data shape: {train_data.shape}")
    
    # Load test data
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]
    
    print(f"Test data shape: {test_data.shape}")
    
    # TRAINING PHASE: Train HMM selector on training data
    print("\n--- TRAINING PHASE ---")
    train_strategies = TradingStrategies(train_data)
    train_strategies.prepare_data()
    
    train_backtester = StrategyBacktester(train_data, train_strategies)
    hmm_selector = HMMStrategySelector(n_regimes=3)
    
    # Train HMM selector on training data only
    print("Training HMM selector on training data...")
    features, regimes, best_strategies = hmm_selector.train_hmm_selector(
        train_strategies, 
        train_backtester,
        train_start=250, 
        train_end=len(train_strategies.dates) - 50,  # Leave some buffer
        window_size=50
    )
    
    print(f"Training completed. Regime-Strategy mapping: {hmm_selector.regime_strategy_map}")
    
    # TESTING PHASE: Apply trained model to test data
    print("\n--- TESTING PHASE ---")
    test_strategies = TradingStrategies(test_data)
    test_strategies.prepare_data()
    
    test_backtester = StrategyBacktester(test_data, test_strategies)
    
    # Apply trained HMM selector to test data
    test_hmm_selector = HMMStrategySelector(n_regimes=3)
    test_hmm_selector.model = hmm_selector.model
    test_hmm_selector.scaler = hmm_selector.scaler
    test_hmm_selector.regime_strategy_map = hmm_selector.regime_strategy_map
    test_hmm_selector.strategy_names = hmm_selector.strategy_names
    test_hmm_selector.regime_names = hmm_selector.regime_names
    test_hmm_selector.is_trained = True
    
    # Generate ensemble weights for test period
    ensemble_weights = []
    ensemble_dates = []
    strategy_selections = []
    regime_predictions = []
    regime_confidences = []
    
    print("Generating predictions on test data...")
    
    # Fixed lookback requirement
    LOOKBACK_REQUIRED = 250
    
    if len(test_strategies.dates) < LOOKBACK_REQUIRED:
        raise ValueError(f"Test data insufficient. Need at least {LOOKBACK_REQUIRED} days, got {len(test_strategies.dates)}")
    
    for i in range(LOOKBACK_REQUIRED, len(test_strategies.dates)):
        date = test_strategies.dates[i]
        
        # Predict best strategy using trained HMM model
        best_strategy_idx, predicted_regime, regime_probs = test_hmm_selector.predict_best_strategy(test_strategies, i)
        strategy_name = test_hmm_selector.strategy_names[best_strategy_idx]
        
        # Get weights from selected strategy
        strategy_func = getattr(test_strategies, f'task1_{strategy_name}')
        weights = strategy_func(i)
        
        ensemble_weights.append(weights.values)
        ensemble_dates.append(date)
        strategy_selections.append(strategy_name)
        regime_predictions.append(predicted_regime)
        regime_confidences.append(max(regime_probs))
        
        if i % 100 == 0:
            regime_name = test_hmm_selector.regime_names.get(predicted_regime, f'Regime_{predicted_regime}')
            print(f"  Day {i-LOOKBACK_REQUIRED+1}/{len(test_strategies.dates)-LOOKBACK_REQUIRED}: {regime_name} -> {strategy_name} (conf: {max(regime_probs):.3f})")
    
    # Create weights DataFrame
    ensemble_weights_df = pd.DataFrame(
        ensemble_weights, 
        index=ensemble_dates, 
        columns=test_strategies.symbols
    )
    
    # Calculate ensemble performance on test data
    ensemble_returns = test_backtester.calculate_returns(ensemble_weights_df)
    ensemble_performance = test_backtester.calculate_performance_metrics(ensemble_returns)
    
    # Save ensemble_performance as CSV
    ensemble_performance_df = pd.DataFrame([ensemble_performance])
    ensemble_performance_df.to_csv('backtest_performance_metrics_hmm.csv', index=False)
    
    print("\nHMM Ensemble Strategy Performance on Test Data:")
    for metric, value in ensemble_performance.items():
        print(f"  {metric}: {value:.4f}")
    
    print(f"\nStrategy Selection Distribution on Test Data:")
    selection_counts = pd.Series(strategy_selections).value_counts()
    for strategy, count in selection_counts.items():
        pct = count / len(strategy_selections) * 100
        print(f"  {strategy}: {count} days ({pct:.1f}%)")
    
    print(f"\nRegime Distribution on Test Data:")
    regime_counts = pd.Series(regime_predictions).value_counts()
    for regime_id, count in regime_counts.items():
        pct = count / len(regime_predictions) * 100
        regime_name = test_hmm_selector.regime_names.get(regime_id, f'Regime_{regime_id}')
        print(f"  {regime_name}: {count} days ({pct:.1f}%)")
    
    print(f"\nRegime-Strategy Mapping:")
    for regime_id, strategy_idx in test_hmm_selector.regime_strategy_map.items():
        regime_name = test_hmm_selector.regime_names.get(regime_id, f'Regime_{regime_id}')
        strategy_name = test_hmm_selector.strategy_names[strategy_idx]
        print(f"  {regime_name} -> {strategy_name}")
    
    # Save outputs
    ensemble_weights_df.to_csv('ensemble_weights_hmm.csv')
    
    # Save trained HMM model
    model_data = {
        'hmm_model': hmm_selector.model,
        'scaler': hmm_selector.scaler,
        'regime_strategy_map': hmm_selector.regime_strategy_map,
        'strategy_names': hmm_selector.strategy_names,
        'regime_names': hmm_selector.regime_names,
        'n_regimes': hmm_selector.n_regimes,
        'training_data_info': {
            'train_data_shape': train_data.shape,
            'test_data_shape': test_data.shape,
            'train_period': (250, len(train_strategies.dates)),
            'test_period': (LOOKBACK_REQUIRED, len(test_strategies.dates))
        }
    }
    
    with open('ensemble_model_hmm.pkl', 'wb') as f:
        pickle.dump(model_data, f)
    
    # Enhanced performance CSV with regime information
    performance_df = pd.DataFrame({
        'Date': ensemble_dates,
        'Returns': ensemble_returns.values,
        'Cumulative_Returns': (1 + ensemble_returns).cumprod().values,
        'Selected_Strategy': strategy_selections,
        'Predicted_Regime': regime_predictions,
        'Regime_Confidence': regime_confidences,
        'Regime_Name': [test_hmm_selector.regime_names.get(r, f'Regime_{r}') for r in regime_predictions]
    })
    performance_df.to_csv('ensemble_performance_hmm.csv', index=False)
    
    return ensemble_weights_df, ensemble_performance, model_data




In [47]:
if __name__ == "__main__":
    # Run HMM-based ensemble strategy
    ensemble_weights, ensemble_performance, model_data = task2(
        train_data_path='train_data.csv',
        test_data_path='test_data.csv'
    )
    
    print("\nHMM-based ensemble strategy completed successfully!")

=== TASK 2: HMM-Based Strategy Selection ===
Training data shape: (70000, 14)
Test data shape: (11120, 14)

--- TRAINING PHASE ---
Training HMM selector on training data...
Training HMM regime detector...
Fitting HMM with 3 regimes on 64 windows...
Regime 0 (Bull Market):
  Best Strategy: Strategy2
  Occurrences: 22 windows (34.4%)
  Avg Performance: 1.8946
Regime 1 (Bear Market):
  Best Strategy: Strategy3
  Occurrences: 36 windows (56.2%)
  Avg Performance: 1.6031
Regime 2 (Volatile/Sideways):
  Best Strategy: Strategy2
  Occurrences: 6 windows (9.4%)
  Avg Performance: 3.9760
Training completed. Regime-Strategy mapping: {0: np.int64(1), 1: np.int64(2), 2: np.int64(1)}

--- TESTING PHASE ---
Generating predictions on test data...
  Day 51/306: Bull Market -> Strategy2 (conf: 1.000)
  Day 151/306: Bull Market -> Strategy2 (conf: 1.000)
  Day 251/306: Bull Market -> Strategy2 (conf: 1.000)

HMM Ensemble Strategy Performance on Test Data:
  total_return: -0.0832
  annualized_return: -0.

In [74]:
import numpy as np
import pandas as pd
from collections import defaultdict, deque
import random
import pickle

class MultiAgentRLSelector:
    """
    Multiple RL agents compete and learn from each other
    Each agent has a personality that influences strategy selection
    """
    
    def __init__(self, n_agents=5, learning_rate=0.1, epsilon=0.1):
        self.n_agents = n_agents
        self.lr = learning_rate
        self.epsilon = epsilon
        
        # Strategy-Personality mappings based on analysis
        self.personality_strategy_map = {
            'conservative': [0],    # Strategy 1: Average Weekly Returns
            'contrarian': [1],      # Strategy 2: Mean Reversion
            'momentum': [2],        # Strategy 3: Rate of Change ROC
            'adaptive': [3],        # Strategy 4: Support & Resistance
            'aggressive': [4]       # Strategy 5: Stochastic %K
        }
        
        # Each agent represents a different strategy selection philosophy
        self.agents = {}
        personalities = ['conservative', 'aggressive', 'momentum', 'contrarian', 'adaptive']
        
        for i in range(n_agents):
            personality = personalities[i % len(personalities)]  # Ensure we have all personalities
            self.agents[f'Agent_{i}'] = {
                'q_table': defaultdict(lambda: defaultdict(float)),
                'strategy_preferences': np.random.dirichlet([1]*5),
                'wins': 0,
                'total_games': 0,
                'recent_rewards': deque(maxlen=20),
                'personality': personality,
                'preferred_strategies': self.personality_strategy_map[personality].copy()
            }
    
    def generate_personality(self):
        """Give each agent a unique personality - now integrated into __init__"""
        personalities = [
            'conservative',  # Prefers low-risk strategies
            'aggressive',    # Prefers high-return strategies
            'momentum',      # Follows trends
            'contrarian',    # Goes against trends
            'adaptive'       # Changes based on market
        ]
        return random.choice(personalities)
    
    def get_market_state(self, strategies, current_idx):
        """Discretize market conditions into states"""
        if current_idx < 20:
            return "neutral"
        
        recent_data = strategies.close_prices.iloc[current_idx-20:current_idx]
        returns = recent_data.pct_change().fillna(0)
        
        volatility = returns.std().mean()
        trend = (recent_data.iloc[-1] / recent_data.iloc[0] - 1).mean()
        
        # Create discrete state
        vol_state = "high_vol" if volatility > 0.02 else "low_vol"
        trend_state = "up" if trend > 0.01 else "down" if trend < -0.01 else "flat"
        
        return f"{vol_state}_{trend_state}"
    
    def agent_select_strategy(self, agent_name, market_state):
        """Agent selects strategy based on Q-table and personality with improved mapping"""
        agent = self.agents[agent_name]
        personality = agent['personality']
        
        # Epsilon-greedy with personality-based strategy preference
        if random.random() < self.epsilon:
            # Exploration: 70% from preferred strategies, 30% random
            if random.random() < 0.7:
                # Choose from personality-matched strategies
                strategy = random.choice(agent['preferred_strategies'])
            else:
                # Secondary strategy preferences based on personality traits
                if personality == 'conservative':
                    # May occasionally try other stable strategies
                    strategy = random.choice([0, 1, 3])  # Conservative, contrarian, adaptive
                elif personality == 'aggressive':
                    # May try momentum or high-risk strategies
                    strategy = random.choice([2, 4])  # Momentum, aggressive
                elif personality == 'momentum':
                    # May try aggressive or adaptive strategies
                    strategy = random.choice([2, 3, 4])  # Momentum, adaptive, aggressive
                elif personality == 'contrarian':
                    # May try conservative or adaptive strategies
                    strategy = random.choice([0, 1, 3])  # Conservative, contrarian, adaptive
                else:  # adaptive
                    # Can try any strategy based on market conditions
                    if "high_vol" in market_state:
                        strategy = random.choice([0, 1])  # Conservative or contrarian in high vol
                    elif "up" in market_state:
                        strategy = random.choice([2, 4])  # Momentum or aggressive in uptrend
                    else:
                        strategy = random.choice([1, 3])  # Contrarian or adaptive otherwise
        else:
            # Exploitation: choose best Q-value, but prefer personality-matched strategies
            q_values = agent['q_table'][market_state]
            if q_values:
                # Boost Q-values for preferred strategies
                adjusted_q_values = dict(q_values)
                for preferred_strategy in agent['preferred_strategies']:
                    if preferred_strategy in adjusted_q_values:
                        adjusted_q_values[preferred_strategy] += 0.1  # Personality bias
                
                strategy = max(adjusted_q_values.items(), key=lambda x: x[1])[0]
            else:
                # Default to preferred strategy if no Q-values exist
                strategy = random.choice(agent['preferred_strategies'])
        
        return strategy
    
    def tournament_round(self, strategies, backtester, current_idx):
        """Run tournament between agents with personality-aware evaluation"""
        market_state = self.get_market_state(strategies, current_idx)
        
        # Each agent selects a strategy
        agent_strategies = {}
        for agent_name in self.agents:
            strategy = self.agent_select_strategy(agent_name, market_state)
            agent_strategies[agent_name] = strategy
        
        # Evaluate strategies over short window
        window_size = 10
        start_idx = max(250, current_idx - window_size)
        end_idx = min(current_idx + window_size, len(strategies.dates))
        
        agent_rewards = {}
        
        for agent_name, strategy_idx in agent_strategies.items():
            try:
                strategy_name = f'Strategy{strategy_idx + 1}'
                
                weights_list = []
                dates_list = []
                
                for i in range(start_idx, end_idx):
                    if i < len(strategies.dates):
                        strategy_func = getattr(strategies, f'task1_{strategy_name}')
                        weights = strategy_func(i)
                        weights_list.append(weights.values)
                        dates_list.append(strategies.dates[i])
                
                if weights_list:
                    weights_df = pd.DataFrame(weights_list, index=dates_list, columns=strategies.symbols)
                    returns = backtester.calculate_returns(weights_df)
                    performance = backtester.calculate_performance_metrics(returns)
                    
                    # Personality-adjusted reward calculation
                    base_reward = (
                        performance.get('sharpe_ratio', 0) * 0.5 +
                        performance.get('total_return', 0) * 0.3 +
                        (1 - abs(performance.get('max_drawdown', 0))) * 0.2
                    )
                    
                    # Bonus for using preferred strategy
                    personality_bonus = 0.05 if strategy_idx in self.agents[agent_name]['preferred_strategies'] else 0
                    
                    agent_rewards[agent_name] = base_reward + personality_bonus
                else:
                    agent_rewards[agent_name] = -0.1
            except:
                agent_rewards[agent_name] = -0.1
        
        # Update Q-tables and agent stats
        for agent_name, reward in agent_rewards.items():
            agent = self.agents[agent_name]
            strategy = agent_strategies[agent_name]
            
            # Q-learning update with personality-aware learning rate
            personality_lr_modifier = 1.2 if strategy in agent['preferred_strategies'] else 0.8
            effective_lr = self.lr * personality_lr_modifier
            
            old_q = agent['q_table'][market_state][strategy]
            agent['q_table'][market_state][strategy] = old_q + effective_lr * (reward - old_q)
            
            # Update agent stats
            agent['recent_rewards'].append(reward)
            agent['total_games'] += 1
            
            # Update strategy preferences with personality bias
            decay = 0.95
            agent['strategy_preferences'] *= decay
            
            # Stronger preference update for personality-matched strategies
            preference_boost = 1.5 if strategy in agent['preferred_strategies'] else 1.0
            agent['strategy_preferences'][strategy] += (1 - decay) * max(0, reward) * preference_boost
            agent['strategy_preferences'] /= agent['strategy_preferences'].sum()
        
        # Determine winner
        winner = max(agent_rewards.items(), key=lambda x: x[1])
        self.agents[winner[0]]['wins'] += 1
        
        return winner[0], agent_strategies[winner[0]], winner[1]
    
    def train_tournament(self, strategies, backtester, train_start=250, train_end=None):
        """Train agents through tournaments with personality tracking"""
        print("Training Multi-Agent RL Tournament with Personality-Strategy Mapping...")
        
        if train_end is None:
            train_end = len(strategies.dates) - 50
        
        tournament_results = []
        personality_performance = defaultdict(list)
        
        for i in range(train_start, train_end, 20):
            winner_agent, winning_strategy, reward = self.tournament_round(strategies, backtester, i)
            
            # Track personality performance
            winner_personality = self.agents[winner_agent]['personality']
            personality_performance[winner_personality].append(reward)
            
            tournament_results.append({
                'index': i,
                'winner_agent': winner_agent,
                'winner_personality': winner_personality,
                'winning_strategy': winning_strategy,
                'reward': reward,
                'strategy_personality_match': winning_strategy in self.personality_strategy_map[winner_personality]
            })
            
            if i % 100 == 0:
                match_indicator = "✓" if winning_strategy in self.personality_strategy_map[winner_personality] else "✗"
                print(f"  Tournament at index {i}: {winner_agent} ({winner_personality}) wins with Strategy{winning_strategy+1} {match_indicator} (reward: {reward:.4f})")
        
        # Print final agent and personality statistics
        print("\nFinal Agent Statistics:")
        for agent_name, agent in self.agents.items():
            win_rate = agent['wins'] / agent['total_games'] if agent['total_games'] > 0 else 0
            avg_reward = np.mean(agent['recent_rewards']) if agent['recent_rewards'] else 0
            preferred_strats = [f"S{s+1}" for s in agent['preferred_strategies']]
            print(f"  {agent_name} ({agent['personality']}): {win_rate:.2%} win rate, {avg_reward:.4f} avg reward, prefers {preferred_strats}")
        
        print("\nPersonality Performance Summary:")
        for personality, rewards in personality_performance.items():
            avg_reward = np.mean(rewards)
            win_count = len(rewards)
            preferred_strategy = self.personality_strategy_map[personality][0] + 1
            print(f"  {personality}: {win_count} wins, {avg_reward:.4f} avg reward, prefers Strategy{preferred_strategy}")
        
        # Calculate strategy-personality alignment rate
        matches = sum(1 for result in tournament_results if result['strategy_personality_match'])
        alignment_rate = matches / len(tournament_results) if tournament_results else 0
        print(f"\nStrategy-Personality Alignment Rate: {alignment_rate:.2%}")
        
        self.tournament_results = tournament_results
        self.personality_performance = dict(personality_performance)
    
    def predict_strategy(self, strategies, current_idx):
        """Predict using ensemble of agents with personality weighting"""
        market_state = self.get_market_state(strategies, current_idx)
        
        # Get strategy votes from all agents
        strategy_votes = defaultdict(float)
        personality_confidence = defaultdict(float)
        
        for agent_name, agent in self.agents.items():
            strategy = self.agent_select_strategy(agent_name, market_state)
            
            # Weight vote by agent's recent performance
            base_weight = np.mean(agent['recent_rewards']) if agent['recent_rewards'] else 0
            base_weight = max(0.1, base_weight)  # Minimum weight
            
            # Boost weight if using preferred strategy
            personality_boost = 1.3 if strategy in agent['preferred_strategies'] else 1.0
            final_weight = base_weight * personality_boost
            
            strategy_votes[strategy] += final_weight
            personality_confidence[agent['personality']] += final_weight
        
        # Select strategy with highest weighted votes
        if strategy_votes:
            best_strategy = max(strategy_votes.items(), key=lambda x: x[1])[0]
            confidence = strategy_votes[best_strategy] / sum(strategy_votes.values())
            
            # Determine which personality type is driving the decision
            dominant_personality = max(personality_confidence.items(), key=lambda x: x[1])[0]
            
            return best_strategy, confidence, dominant_personality
        else:
            return 0, 0.2, 'conservative'  # Safe default
    
    def get_personality_insights(self):
        """Get insights about personality performance and strategy preferences"""
        insights = {}
        
        for agent_name, agent in self.agents.items():
            personality = agent['personality']
            preferred_strategies = agent['preferred_strategies']
            
            # Calculate strategy usage distribution
            strategy_usage = defaultdict(int)
            for state, strategies in agent['q_table'].items():
                for strategy, q_value in strategies.items():
                    strategy_usage[strategy] += 1
            
            insights[agent_name] = {
                'personality': personality,
                'preferred_strategies': [f"Strategy{s+1}" for s in preferred_strategies],
                'win_rate': agent['wins'] / agent['total_games'] if agent['total_games'] > 0 else 0,
                'avg_recent_reward': np.mean(agent['recent_rewards']) if agent['recent_rewards'] else 0,
                'strategy_usage': dict(strategy_usage),
                'total_games': agent['total_games']
            }
        
        return insights

In [77]:
def task2(train_data_path='train_data.csv', test_data_path='test_data.csv'):
    """
    Task 2: Multi-Agent RL Tournament Strategy Selection
    """
    print("=== TASK 2: Multi-Agent RL Tournament Strategy Selection ===")
    
    # Load data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.iloc[:, 1:]
    
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]
    
    print(f"Training data shape: {train_data.shape}")
    print(f"Test data shape: {test_data.shape}")
    
    # TRAINING PHASE
    print("\n--- TRAINING PHASE ---")
    train_strategies = TradingStrategies(train_data)
    train_strategies.prepare_data()
    
    train_backtester = StrategyBacktester(train_data, train_strategies)
    
    # Initialize Multi-Agent RL selector
    marl_selector = MultiAgentRLSelector(n_agents=5, learning_rate=0.1, epsilon=0.2)
    
    # Train through tournaments
    marl_selector.train_tournament(
        train_strategies, 
        train_backtester,
        train_start=250,
        train_end=len(train_strategies.dates) - 50
    )
    
    # TESTING PHASE
    print("\n--- TESTING PHASE ---")
    test_strategies = TradingStrategies(test_data)
    test_strategies.prepare_data()
    
    test_backtester = StrategyBacktester(test_data, test_strategies)
    
    # Apply trained agents to test data
    ensemble_weights = []
    ensemble_dates = []
    strategy_selections = []
    prediction_confidences = []
    
    print("Generating predictions with trained agents...")
    
    LOOKBACK_REQUIRED = 250
    
    if len(test_strategies.dates) < LOOKBACK_REQUIRED:
        raise ValueError(f"Test data insufficient. Need at least {LOOKBACK_REQUIRED} days")
    
    for i in range(LOOKBACK_REQUIRED, len(test_strategies.dates)):
        date = test_strategies.dates[i]
        
        # Get prediction from agent ensemble - Fixed: now properly unpacking 3 values
        strategy_idx, confidence, dominant_personality = marl_selector.predict_strategy(test_strategies, i)
        strategy_name = f'Strategy{strategy_idx + 1}'
        
        # Get weights
        strategy_func = getattr(test_strategies, f'task1_{strategy_name}')
        weights = strategy_func(i)
        
        ensemble_weights.append(weights.values)
        ensemble_dates.append(date)
        strategy_selections.append(strategy_name)
        prediction_confidences.append(confidence)
        
        if i % 100 == 0:
            print(f"  Day {i-LOOKBACK_REQUIRED+1}: {strategy_name} (confidence: {confidence:.3f})")
    
    # Create results
    ensemble_weights_df = pd.DataFrame(
        ensemble_weights,
        index=ensemble_dates,
        columns=test_strategies.symbols
    )
    
    # Calculate performance
    ensemble_returns = test_backtester.calculate_returns(ensemble_weights_df)
    ensemble_performance = test_backtester.calculate_performance_metrics(ensemble_returns)
    
    print("\nMulti-Agent RL Ensemble Performance:")
    for metric, value in ensemble_performance.items():
        print(f"  {metric}: {value:.4f}")
    
    # Save results
    ensemble_performance_df = pd.DataFrame([ensemble_performance])
    ensemble_performance_df.to_csv('backtest_performance_metrics_rl.csv', index=False)
    
    ensemble_weights_df.to_csv('ensemble_weights_rl.csv')
    
    performance_df = pd.DataFrame({
        'Date': ensemble_dates,
        'Returns': ensemble_returns.values,
        'Cumulative_Returns': (1 + ensemble_returns).cumprod().values,
        'Selected_Strategy': strategy_selections,
        'Prediction_Confidence': prediction_confidences
    })
    performance_df.to_csv('ensemble_performance_rl.csv', index=False)
    
    # Save model
    model_data = {
        'marl_selector': marl_selector,
        'training_results': marl_selector.tournament_results if hasattr(marl_selector, 'tournament_results') else []
    }
    
    # with open('ensemble_model_rl.pkl', 'wb') as f:
    #     pickle.dump(model_data, f)
    
    return ensemble_weights_df, ensemble_performance, model_data

In [78]:
if __name__ == "__main__":
    ensemble_weights, ensemble_performance, model_data = task2(
        train_data_path='train_data.csv',
        test_data_path='test_data.csv'
    )
    
    print("\nRL epsilon greedy-based ensemble strategy completed successfully!")

=== TASK 2: Multi-Agent RL Tournament Strategy Selection ===
Training data shape: (70000, 14)
Test data shape: (11120, 14)

--- TRAINING PHASE ---
Training Multi-Agent RL Tournament with Personality-Strategy Mapping...

Final Agent Statistics:
  Agent_0 (conservative): 13.12% win rate, 1.4105 avg reward, prefers ['S1']
  Agent_1 (aggressive): 26.25% win rate, 3.6005 avg reward, prefers ['S5']
  Agent_2 (momentum): 20.62% win rate, 2.2784 avg reward, prefers ['S3']
  Agent_3 (contrarian): 25.62% win rate, 2.8366 avg reward, prefers ['S2']
  Agent_4 (adaptive): 14.37% win rate, 1.5003 avg reward, prefers ['S4']

Personality Performance Summary:
  adaptive: 23 wins, 5.8157 avg reward, prefers Strategy4
  contrarian: 41 wins, 6.3756 avg reward, prefers Strategy2
  momentum: 33 wins, 5.6666 avg reward, prefers Strategy3
  aggressive: 42 wins, 15.0173 avg reward, prefers Strategy5
  conservative: 21 wins, 4.5703 avg reward, prefers Strategy1

Strategy-Personality Alignment Rate: 98.75%

--- 

In [79]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from collections import deque, defaultdict
import random
import pickle

class DQNNetwork(nn.Module):
    """Deep Q-Network for strategy selection"""
    
    def __init__(self, state_size, action_size, hidden_size=128):
        super(DQNNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, action_size)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

class DQNAgent:
    """DQN Agent with experience replay and target network"""
    
    def __init__(self, state_size, action_size, lr=0.001, gamma=0.95, epsilon=1.0, 
                 epsilon_decay=0.995, epsilon_min=0.01, memory_size=10000, batch_size=32):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=memory_size)
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Neural networks
        self.q_network = DQNNetwork(state_size, action_size).to(self.device)
        self.target_network = DQNNetwork(state_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)
        
        # Initialize target network
        self.update_target_network()
        
    def update_target_network(self):
        """Copy weights from main network to target network"""
        self.target_network.load_state_dict(self.q_network.state_dict())
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in replay memory"""
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state, personality_bias=None):
        """Choose action using epsilon-greedy policy with personality bias"""
        if np.random.random() <= self.epsilon:
            if personality_bias is not None:
                # Weighted random selection based on personality
                return np.random.choice(self.action_size, p=personality_bias)
            return random.randrange(self.action_size)
        
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        q_values = self.q_network(state_tensor)
        
        # Apply personality bias to Q-values
        if personality_bias is not None:
            bias_tensor = torch.FloatTensor(personality_bias).to(self.device)
            q_values = q_values + bias_tensor * 0.1
        
        return np.argmax(q_values.cpu().data.numpy())
    
    def replay(self):
        """Train the model on a batch of experiences"""
        if len(self.memory) < self.batch_size:
            return
        
        batch = random.sample(self.memory, self.batch_size)
        states = torch.FloatTensor([e[0] for e in batch]).to(self.device)
        actions = torch.LongTensor([e[1] for e in batch]).to(self.device)
        rewards = torch.FloatTensor([e[2] for e in batch]).to(self.device)
        next_states = torch.FloatTensor([e[3] for e in batch]).to(self.device)
        dones = torch.BoolTensor([e[4] for e in batch]).to(self.device)
        
        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1))
        next_q_values = self.target_network(next_states).max(1)[0].detach()
        target_q_values = rewards + (self.gamma * next_q_values * ~dones)
        
        loss = nn.MSELoss()(current_q_values.squeeze(), target_q_values)
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class AdvancedDQNSelector:
    
    def __init__(self, n_agents=5, state_size=15, action_size=5):
        self.n_agents = n_agents
        self.state_size = state_size
        self.action_size = action_size
        
        # Personality mappings
        self.personality_strategy_map = {
            'conservative': [0],
            'contrarian': [1],
            'momentum': [2],
            'adaptive': [3],
            'aggressive': [4]
        }
        
        # Initialize agents
        self.agents = {}
        personalities = ['conservative', 'aggressive', 'momentum', 'contrarian', 'adaptive']
        
        for i in range(n_agents):
            personality = personalities[i % len(personalities)]
            
            # Create personality bias (probability distribution)
            bias = np.ones(action_size) * 0.1
            preferred_strategies = self.personality_strategy_map[personality]
            for strategy in preferred_strategies:
                bias[strategy] = 0.4
            bias = bias / bias.sum()
            
            self.agents[f'Agent_{i}'] = {
                'dqn': DQNAgent(state_size, action_size),
                'personality': personality,
                'personality_bias': bias,
                'wins': 0,
                'total_games': 0,
                'recent_rewards': deque(maxlen=20)
            }
    
    def get_market_state_vector(self, strategies, current_idx):
        """Convert market conditions to feature vector"""
        if current_idx < 50:
            return np.zeros(self.state_size)
        
        features = []
        
        # Price-based features
        recent_prices = strategies.close_prices.iloc[current_idx-20:current_idx]
        returns = recent_prices.pct_change().fillna(0)
        
        # Returns statistics
        features.extend([
            returns.mean().mean(),
            returns.std().mean(),
            returns.skew().mean(),
            returns.kurt().mean()
        ])
        
        # Trend features
        short_ma = recent_prices.rolling(5).mean().iloc[-1]
        long_ma = recent_prices.rolling(20).mean().iloc[-1]
        trend = ((short_ma / long_ma) - 1).mean()
        features.append(trend)
        
        # Volatility features
        volatility = returns.rolling(5).std().iloc[-1].mean()
        features.append(volatility)
        
        # Momentum features
        momentum_5 = (recent_prices.iloc[-1] / recent_prices.iloc[-5] - 1).mean()
        momentum_10 = (recent_prices.iloc[-1] / recent_prices.iloc[-10] - 1).mean()
        features.extend([momentum_5, momentum_10])
        
        # Volume-based features (if available)
        if hasattr(strategies, 'volume_data'):
            volume_ma = strategies.volume_data.iloc[current_idx-5:current_idx].mean().mean()
            volume_std = strategies.volume_data.iloc[current_idx-5:current_idx].std().mean()
            features.extend([volume_ma, volume_std])
        else:
            features.extend([0, 0])
        
        # RSI-like feature
        gains = returns[returns > 0].mean().mean()
        losses = abs(returns[returns < 0]).mean().mean()
        rsi = gains / (gains + losses) if (gains + losses) > 0 else 0.5
        features.append(rsi)
        
        # Market regime features
        high_vol = 1 if volatility > 0.02 else 0
        uptrend = 1 if trend > 0.01 else 0
        downtrend = 1 if trend < -0.01 else 0
        features.extend([high_vol, uptrend, downtrend])
        
        # Pad or truncate to exact size
        features = features[:self.state_size]
        while len(features) < self.state_size:
            features.append(0)
        
        return np.array(features, dtype=np.float32)
    
    def train_tournament(self, strategies, backtester, train_start=250, train_end=None):
        """Train DQN agents through tournaments"""
        print("Training Advanced DQN Multi-Agent Tournament...")
        
        if train_end is None:
            train_end = len(strategies.dates) - 50
        
        tournament_results = []
        update_target_frequency = 100
        step_count = 0
        
        for i in range(train_start, train_end, 10):
            state = self.get_market_state_vector(strategies, i)
            
            # Each agent selects action
            agent_actions = {}
            for agent_name, agent_data in self.agents.items():
                action = agent_data['dqn'].act(state, agent_data['personality_bias'])
                agent_actions[agent_name] = action
            
            # Evaluate strategies
            agent_rewards = self.evaluate_strategies(strategies, backtester, agent_actions, i)
            
            # Get next state
            next_i = min(i + 10, train_end - 1)
            next_state = self.get_market_state_vector(strategies, next_i)
            done = (next_i >= train_end - 1)
            
            # Store experiences and train
            for agent_name, agent_data in self.agents.items():
                action = agent_actions[agent_name]
                reward = agent_rewards[agent_name]
                
                agent_data['dqn'].remember(state, action, reward, next_state, done)
                agent_data['dqn'].replay()
                
                # Update stats
                agent_data['recent_rewards'].append(reward)
                agent_data['total_games'] += 1
            
            # Update target networks periodically
            step_count += 1
            if step_count % update_target_frequency == 0:
                for agent_data in self.agents.values():
                    agent_data['dqn'].update_target_network()
            
            # Track winner
            winner = max(agent_rewards.items(), key=lambda x: x[1])
            self.agents[winner[0]]['wins'] += 1
            
            tournament_results.append({
                'index': i,
                'winner_agent': winner[0],
                'winning_strategy': agent_actions[winner[0]],
                'reward': winner[1]
            })
            
            if i % 100 == 0:
                avg_reward = np.mean(list(agent_rewards.values()))
                print(f"  Step {i}: Winner {winner[0]}, Strategy {winner[1]}, Avg Reward: {avg_reward:.4f}")
        
        self.tournament_results = tournament_results
        print("DQN Training completed!")
    
    def evaluate_strategies(self, strategies, backtester, agent_actions, current_idx):
        """Evaluate agent strategies and return rewards"""
        window_size = 10
        start_idx = max(250, current_idx - window_size)
        end_idx = min(current_idx + window_size, len(strategies.dates))
        
        agent_rewards = {}
        
        for agent_name, strategy_idx in agent_actions.items():
            try:
                strategy_name = f'Strategy{strategy_idx + 1}'
                
                weights_list = []
                dates_list = []
                
                for i in range(start_idx, end_idx):
                    if i < len(strategies.dates):
                        strategy_func = getattr(strategies, f'task1_{strategy_name}')
                        weights = strategy_func(i)
                        weights_list.append(weights.values)
                        dates_list.append(strategies.dates[i])
                
                if weights_list:
                    weights_df = pd.DataFrame(weights_list, index=dates_list, columns=strategies.symbols)
                    returns = backtester.calculate_returns(weights_df)
                    performance = backtester.calculate_performance_metrics(returns)
                    
                    reward = (
                        performance.get('sharpe_ratio', 0) * 0.5 +
                        performance.get('total_return', 0) * 0.3 +
                        (1 - abs(performance.get('max_drawdown', 0))) * 0.2
                    )
                    agent_rewards[agent_name] = reward
                else:
                    agent_rewards[agent_name] = -0.1
            except:
                agent_rewards[agent_name] = -0.1
        
        return agent_rewards
    
    def predict_strategy(self, strategies, current_idx):
        """Predict using ensemble of DQN agents"""
        state = self.get_market_state_vector(strategies, current_idx)
        
        strategy_votes = defaultdict(float)
        
        for agent_name, agent_data in self.agents.items():
            # Set epsilon to 0 for prediction (no exploration)
            original_epsilon = agent_data['dqn'].epsilon
            agent_data['dqn'].epsilon = 0
            
            action = agent_data['dqn'].act(state, agent_data['personality_bias'])
            
            # Weight by recent performance
            weight = np.mean(agent_data['recent_rewards']) if agent_data['recent_rewards'] else 0
            weight = max(0.1, weight)
            
            strategy_votes[action] += weight
            
            # Restore original epsilon
            agent_data['dqn'].epsilon = original_epsilon
        
        if strategy_votes:
            best_strategy = max(strategy_votes.items(), key=lambda x: x[1])[0]
            confidence = strategy_votes[best_strategy] / sum(strategy_votes.values())
            return best_strategy, confidence
        else:
            return 0, 0.2

In [80]:
def task2_advanced(train_data_path='train_data.csv', test_data_path='test_data.csv', method='DQN'):
    """
    Task 2: Advanced RL Tournament Strategy Selection
    Methods: 'DQN', 'PPO'
    """
    print(f"=== TASK 2: Advanced {method} Multi-Agent Strategy Selection ===")
    
    # Load data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.iloc[:, 1:]
    
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]
    
    print(f"Training data shape: {train_data.shape}")
    print(f"Test data shape: {test_data.shape}")
    
    # TRAINING PHASE
    print("\n--- TRAINING PHASE ---")
    train_strategies = TradingStrategies(train_data)
    train_strategies.prepare_data()
    
    train_backtester = StrategyBacktester(train_data, train_strategies)
    
    # Initialize Advanced RL selector based on method
    if method == 'DQN':
        rl_selector = AdvancedDQNSelector(n_agents=5, state_size=15, action_size=5)
    elif method == 'PPO':
        rl_selector = AdvancedPPOSelector(n_agents=5, state_size=15, action_size=5)
    else:
        raise ValueError(f"Unknown method: {method}. Choose 'DQN' or 'PPO'")
    
    # Train through tournaments
    rl_selector.train_tournament(
        train_strategies, 
        train_backtester,
        train_start=250,
        train_end=len(train_strategies.dates) - 50
    )
    
    # TESTING PHASE
    print("\n--- TESTING PHASE ---")
    test_strategies = TradingStrategies(test_data)
    test_strategies.prepare_data()
    
    test_backtester = StrategyBacktester(test_data, test_strategies)
    
    # Apply trained agents to test data
    ensemble_weights = []
    ensemble_dates = []
    strategy_selections = []
    prediction_confidences = []
    
    print("Generating predictions with trained agents...")
    
    LOOKBACK_REQUIRED = 250
    
    if len(test_strategies.dates) < LOOKBACK_REQUIRED:
        raise ValueError(f"Test data insufficient. Need at least {LOOKBACK_REQUIRED} days")
    
    for i in range(LOOKBACK_REQUIRED, len(test_strategies.dates)):
        date = test_strategies.dates[i]
        
        strategy_idx, confidence = rl_selector.predict_strategy(test_strategies, i)
        strategy_name = f'Strategy{strategy_idx + 1}'
        
        # Get weights
        strategy_func = getattr(test_strategies, f'task1_{strategy_name}')
        weights = strategy_func(i)
        
        ensemble_weights.append(weights.values)
        ensemble_dates.append(date)
        strategy_selections.append(strategy_name)
        prediction_confidences.append(confidence)
        
        if i % 100 == 0:
            print(f"  Day {i-LOOKBACK_REQUIRED+1}: {strategy_name} (confidence: {confidence:.3f})")
    
    # Create results
    ensemble_weights_df = pd.DataFrame(
        ensemble_weights,
        index=ensemble_dates,
        columns=test_strategies.symbols
    )
    
    # Calculate performance
    ensemble_returns = test_backtester.calculate_returns(ensemble_weights_df)
    ensemble_performance = test_backtester.calculate_performance_metrics(ensemble_returns)
    
    print(f"\n{method} Ensemble Performance:")
    for metric, value in ensemble_performance.items():
        print(f"  {metric}: {value:.4f}")
    
    # Save results
    ensemble_performance_df = pd.DataFrame([ensemble_performance])
    ensemble_performance_df.to_csv(f'backtest_performance_metrics_{method.lower()}.csv', index=False)
    
    ensemble_weights_df.to_csv(f'ensemble_weights_{method.lower()}.csv')
    
    performance_df = pd.DataFrame({
        'Date': ensemble_dates,
        'Returns': ensemble_returns.values,
        'Cumulative_Returns': (1 + ensemble_returns).cumprod().values,
        'Selected_Strategy': strategy_selections,
        'Prediction_Confidence': prediction_confidences
    })
    performance_df.to_csv(f'ensemble_performance_{method.lower()}.csv', index=False)
    
    # Save model
    model_data = {
        'rl_selector': rl_selector,
        'method': method,
        'training_results': rl_selector.tournament_results if hasattr(rl_selector, 'tournament_results') else []
    }
    
    with open(f'ensemble_model_{method.lower()}.pkl', 'wb') as f:
        pickle.dump(model_data, f)
    
    return ensemble_weights_df, ensemble_performance, model_data

In [88]:
if __name__ == "__main__":
    # Test DQN method
    print("Testing DQN method...")
    ensemble_weights_dqn, ensemble_performance_dqn, model_data_dqn = task2_advanced(
        train_data_path='train_data.csv',
        test_data_path='cross_val_data.csv',
        method='DQN'
    )
    
    print("\n" + "="*80 + "\n")
    
    print("\n=== PERFORMANCE COMPARISON ===")
    print("DQN Performance:")
    for metric, value in ensemble_performance_dqn.items():
        print(f"  {metric}: {value:.4f}")

Testing DQN method...
=== TASK 2: Advanced DQN Multi-Agent Strategy Selection ===
Training data shape: (70000, 14)
Test data shape: (10000, 14)

--- TRAINING PHASE ---
Training Advanced DQN Multi-Agent Tournament...
  Step 300: Winner Agent_2, Strategy 10.69268617515505, Avg Reward: 4.1262
  Step 400: Winner Agent_3, Strategy 19.09849967646336, Avg Reward: 6.9748
  Step 500: Winner Agent_0, Strategy 4.144673029075346, Avg Reward: 1.7357
  Step 600: Winner Agent_3, Strategy 7.980537078421094, Avg Reward: 3.1261
  Step 700: Winner Agent_0, Strategy 3.8746701712487726, Avg Reward: 2.2265
  Step 800: Winner Agent_1, Strategy -0.5330969115133646, Avg Reward: -0.7787
  Step 900: Winner Agent_1, Strategy 6.857081049077467, Avg Reward: 3.3095
  Step 1000: Winner Agent_4, Strategy 16.880271038853493, Avg Reward: 4.2498
  Step 1100: Winner Agent_0, Strategy -1.6601180303126826, Avg Reward: -2.0561
  Step 1200: Winner Agent_0, Strategy 8.080165196510615, Avg Reward: 8.0802
  Step 1300: Winner Age

In [91]:
import pickle
import pandas as pd
import numpy as np
from collections import defaultdict

def load_and_test_model(model_path, test_data_path='cross_val_data.csv', method='DQN'):
    """
    Load pre-trained ensemble model and test on test data only
    
    Args:
        model_path: Path to the saved model pickle file
        test_data_path: Path to test data CSV
        method: Method used ('DQN' or 'PPO')
    """
    print(f"=== LOADING AND TESTING {method} MODEL ===")
    
    # Load the pre-trained model
    print(f"Loading model from: {model_path}")
    try:
        with open(model_path, 'rb') as f:
            model_data = pickle.load(f)
        
        rl_selector = model_data['rl_selector']
        model_method = model_data.get('method', method)
        print(f"Model loaded successfully! Method: {model_method}")
        print(f"Number of agents: {len(rl_selector.agents)}")
        
        # CRITICAL FIX: Set all agents to evaluation mode (no exploration)
        if method == 'DQN':
            for agent_name, agent_info in rl_selector.agents.items():
                # Force epsilon to 0 for pure exploitation during testing
                agent_info['dqn'].epsilon = 0.0
                print(f"  {agent_name}: epsilon set to {agent_info['dqn'].epsilon}")
        
        # Print agent information
        for agent_name, agent_info in rl_selector.agents.items():
            personality = agent_info.get('personality', 'unknown')
            total_games = agent_info.get('total_games', 0)
            wins = agent_info.get('wins', 0)
            win_rate = wins / total_games if total_games > 0 else 0
            print(f"  {agent_name}: {personality} personality, {total_games} games, {win_rate:.2%} win rate")
            
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None, None
    
    # Load test data
    print(f"\nLoading test data from: {test_data_path}")
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.iloc[:, 1:]  # Remove first column if it's index
    print(f"Test data shape: {test_data.shape}")
    
    # TESTING PHASE ONLY
    print("\n--- TESTING PHASE ---")
    test_strategies = TradingStrategies(test_data)
    test_strategies.prepare_data()
    
    test_backtester = StrategyBacktester(test_data, test_strategies)
    
    # Apply trained agents to test data
    ensemble_weights = []
    ensemble_dates = []
    strategy_selections = []
    prediction_confidences = []
    agent_decisions = []  # Track which agents made decisions
    
    print("Generating predictions with pre-trained agents...")
    
    LOOKBACK_REQUIRED = 250
    
    if len(test_strategies.dates) < LOOKBACK_REQUIRED:
        raise ValueError(f"Test data insufficient. Need at least {LOOKBACK_REQUIRED} days")
    
    for i in range(LOOKBACK_REQUIRED, len(test_strategies.dates)):
        date = test_strategies.dates[i]
        
        # Get prediction from agent ensemble with FIXED prediction method
        strategy_idx, confidence = predict_strategy_deterministic(rl_selector, test_strategies, i, method)
        strategy_name = f'Strategy{strategy_idx + 1}'
        
        # Get detailed agent decisions for this step
        state = rl_selector.get_market_state_vector(test_strategies, i)
        agent_votes = {}
        
        for agent_name, agent_info in rl_selector.agents.items():
            if method == 'DQN':
                # Ensure epsilon is 0 and use deterministic action selection
                agent_info['dqn'].epsilon = 0.0
                action = agent_info['dqn'].act(state, agent_info['personality_bias'])
            else:  # PPO
                action, _, _ = agent_info['ppo'].get_action_and_value(state, agent_info['personality_bias'])
            
            agent_votes[agent_name] = {
                'strategy': action,
                'personality': agent_info['personality']
            }
        
        # Get weights using selected strategy
        strategy_func = getattr(test_strategies, f'task1_{strategy_name}')
        weights = strategy_func(i)
        
        ensemble_weights.append(weights.values)
        ensemble_dates.append(date)
        strategy_selections.append(strategy_name)
        prediction_confidences.append(confidence)
        agent_decisions.append(agent_votes)
        
        if i % 100 == 0:
            day_num = i - LOOKBACK_REQUIRED + 1
            print(f"  Day {day_num}: {strategy_name} (confidence: {confidence:.3f})")
            
            # Show agent consensus for this prediction
            strategy_counts = defaultdict(int)
            for agent_name, decision in agent_votes.items():
                strategy_counts[f"S{decision['strategy']+1}"] += 1
            
            consensus_str = ", ".join([f"{strat}: {count}" for strat, count in strategy_counts.items()])
            print(f"    Agent votes: {consensus_str}")
    
    # Create results DataFrame
    ensemble_weights_df = pd.DataFrame(
        ensemble_weights,
        index=ensemble_dates,
        columns=test_strategies.symbols
    )
    
    # Calculate performance
    ensemble_returns = test_backtester.calculate_returns(ensemble_weights_df)
    ensemble_performance = test_backtester.calculate_performance_metrics(ensemble_returns)
    
    print(f"\n{method} Ensemble Test Performance:")
    print("=" * 50)
    for metric, value in ensemble_performance.items():
        print(f"  {metric}: {value:.4f}")
    
    # Analyze strategy selection patterns
    print(f"\nStrategy Selection Analysis:")
    print("=" * 50)
    strategy_counts = pd.Series(strategy_selections).value_counts()
    for strategy, count in strategy_counts.items():
        percentage = (count / len(strategy_selections)) * 100
        print(f"  {strategy}: {count} times ({percentage:.1f}%)")
    
    # Analyze agent consensus
    print(f"\nAgent Decision Analysis:")
    print("=" * 50)
    personality_decisions = defaultdict(list)
    for decisions in agent_decisions:
        for agent_name, decision in decisions.items():
            personality_decisions[decision['personality']].append(decision['strategy'])
    
    for personality, decisions in personality_decisions.items():
        strategy_dist = pd.Series(decisions).value_counts()
        total = len(decisions)
        print(f"  {personality.title()} agents:")
        for strategy_idx, count in strategy_dist.items():
            percentage = (count / total) * 100
            print(f"    Strategy{strategy_idx+1}: {percentage:.1f}%")
    
    # Save test results
    test_suffix = f"test_{method.lower()}"
    
    ensemble_performance_df = pd.DataFrame([ensemble_performance])
    ensemble_performance_df.to_csv(f'backtest_performance_metrics_{test_suffix}.csv', index=False)
    
    ensemble_weights_df.to_csv(f'ensemble_weights_{test_suffix}.csv')
    
    performance_df = pd.DataFrame({
        'Date': ensemble_dates,
        'Returns': ensemble_returns.values,
        'Cumulative_Returns': (1 + ensemble_returns).cumprod().values,
        'Selected_Strategy': strategy_selections,
        'Prediction_Confidence': prediction_confidences
    })
    performance_df.to_csv(f'ensemble_performance_{test_suffix}.csv', index=False)
    
    # Save detailed agent decisions
    agent_decisions_df = []
    for i, (date, decisions) in enumerate(zip(ensemble_dates, agent_decisions)):
        for agent_name, decision in decisions.items():
            agent_decisions_df.append({
                'Date': date,
                'Day': i + 1,
                'Agent': agent_name,
                'Personality': decision['personality'],
                'Selected_Strategy': f"Strategy{decision['strategy']+1}",
                'Ensemble_Strategy': strategy_selections[i],
                'Confidence': prediction_confidences[i]
            })
    
    agent_decisions_df = pd.DataFrame(agent_decisions_df)
    agent_decisions_df.to_csv(f'agent_decisions_{test_suffix}.csv', index=False)
    
    print(f"\nTest results saved:")
    print(f"  - Performance metrics: backtest_performance_metrics_{test_suffix}.csv")
    print(f"  - Ensemble weights: ensemble_weights_{test_suffix}.csv")
    print(f"  - Daily performance: ensemble_performance_{test_suffix}.csv")
    print(f"  - Agent decisions: agent_decisions_{test_suffix}.csv")
    
    return ensemble_weights_df, ensemble_performance, agent_decisions_df

def predict_strategy_deterministic(rl_selector, strategies, current_idx, method):
    """
    Deterministic prediction method that matches the original training behavior
    """
    state = rl_selector.get_market_state_vector(strategies, current_idx)
    
    strategy_votes = defaultdict(float)
    
    for agent_name, agent_info in rl_selector.agents.items():
        if method == 'DQN':
            # Force deterministic behavior (no exploration)
            agent_info['dqn'].epsilon = 0.0
            action = agent_info['dqn'].act(state, agent_info['personality_bias'])
        else:  # PPO
            action, _, _ = agent_info['ppo'].get_action_and_value(state, agent_info['personality_bias'])
        
        # Weight by recent performance
        weight = np.mean(agent_info['recent_rewards']) if agent_info['recent_rewards'] else 0
        weight = max(0.1, weight)
        
        strategy_votes[action] += weight
    
    if strategy_votes:
        best_strategy = max(strategy_votes.items(), key=lambda x: x[1])[0]
        confidence = strategy_votes[best_strategy] / sum(strategy_votes.values())
        return best_strategy, confidence
    else:
        return 0, 0.2

def fix_dqn_model_for_testing(model_path, output_path=None):
    """
    Load a DQN model and fix it for consistent testing behavior
    """
    if output_path is None:
        output_path = model_path.replace('.pkl', '_fixed.pkl')
    
    print(f"Fixing DQN model: {model_path}")
    
    with open(model_path, 'rb') as f:
        model_data = pickle.load(f)
    
    rl_selector = model_data['rl_selector']
    
    # Fix all DQN agents
    for agent_name, agent_info in rl_selector.agents.items():
        if 'dqn' in agent_info:
            # Set epsilon to 0 for deterministic behavior
            agent_info['dqn'].epsilon = 0.0
            print(f"  Fixed {agent_name}: epsilon = {agent_info['dqn'].epsilon}")
    
    # Save fixed model
    with open(output_path, 'wb') as f:
        pickle.dump(model_data, f)
    
    print(f"Fixed model saved to: {output_path}")
    return output_path

# Usage functions
def test_fixed_model(model_path='ensemble_model_dqn.pkl', method='DQN'):
    """
    Test model with guaranteed consistent behavior
    """
    if method == 'DQN':
        # First fix the model
        fixed_model_path = fix_dqn_model_for_testing(model_path)
        return load_and_test_model(fixed_model_path, method=method)
    else:
        return load_and_test_model(model_path, method=method)

if __name__ == "__main__":
    print("Testing DQN model with fixed epsilon...")
    dqn_weights, dqn_performance, dqn_decisions = test_fixed_model(
        model_path='ensemble_model_dqn.pkl',
        method='DQN'
    )
    
    print("\nFixed DQN Performance:")
    for metric, value in dqn_performance.items():
        print(f"  {metric}: {value:.4f}")



Testing DQN model with fixed epsilon...
Fixing DQN model: ensemble_model_dqn.pkl
  Fixed Agent_0: epsilon = 0.0
  Fixed Agent_1: epsilon = 0.0
  Fixed Agent_2: epsilon = 0.0
  Fixed Agent_3: epsilon = 0.0
  Fixed Agent_4: epsilon = 0.0
Fixed model saved to: ensemble_model_dqn_fixed.pkl
=== LOADING AND TESTING DQN MODEL ===
Loading model from: ensemble_model_dqn_fixed.pkl
Model loaded successfully! Method: DQN
Number of agents: 5
  Agent_0: epsilon set to 0.0
  Agent_1: epsilon set to 0.0
  Agent_2: epsilon set to 0.0
  Agent_3: epsilon set to 0.0
  Agent_4: epsilon set to 0.0
  Agent_0: conservative personality, 15 games, 13.33% win rate
  Agent_1: aggressive personality, 15 games, 40.00% win rate
  Agent_2: momentum personality, 15 games, 6.67% win rate
  Agent_3: contrarian personality, 15 games, 26.67% win rate
  Agent_4: adaptive personality, 15 games, 13.33% win rate

Loading test data from: cross_val_data.csv
Test data shape: (10000, 14)

--- TESTING PHASE ---
Generating predicti