# Portfolio Optimization - Advanced Analysis
## Model Performance and Strategy Deep Dive

This notebook provides advanced analysis of the ML models and portfolio strategies.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
sys.path.append('..')

from config import Config
from src.evaluation.backtesting import PerformanceMetrics

import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 1. Model Performance Analysis

In [None]:
# Load and analyze model results
try:
    model_results_path = os.path.join(Config.RESULTS_DIR, 'model_results_summary.json')
    
    if os.path.exists(model_results_path):
        import json
        with open(model_results_path, 'r') as f:
            model_results = json.load(f)
        
        print("üìä ML/DL Model Performance Summary")
        print("=" * 60)
        
        # Display ML models
        if 'ml_models' in model_results:
            print("\nü§ñ Traditional ML Models:")
            ml_df = pd.DataFrame(model_results['ml_models']).T
            print(ml_df.round(6))
        
        # Display DL models
        if 'dl_models' in model_results:
            print("\nüß† Deep Learning Models:")
            dl_df = pd.DataFrame(model_results['dl_models']).T
            print(dl_df.round(6))
    
    else:
        print("‚ùå Model results not found. Please run model training first.")
        
except Exception as e:
    print(f"Error loading model results: {e}")

In [None]:
# Visualize model performance
if 'model_results' in locals():
    plt.figure(figsize=(15, 8))
    
    all_models = {}
    if 'ml_models' in model_results:
        all_models.update(model_results['ml_models'])
    if 'dl_models' in model_results:
        all_models.update(model_results['dl_models'])
    
    if all_models:
        models_df = pd.DataFrame(all_models).T
        
        # Plot 1: R¬≤ comparison
        plt.subplot(2, 3, 1)
        models_df['r2'].plot(kind='bar', color='skyblue')
        plt.title('Model R¬≤ Score Comparison')
        plt.ylabel('R¬≤ Score')
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
        
        # Plot 2: RMSE comparison
        plt.subplot(2, 3, 2)
        models_df['rmse'].plot(kind='bar', color='lightcoral')
        plt.title('Model RMSE Comparison')
        plt.ylabel('RMSE')
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
        
        # Plot 3: R¬≤ vs RMSE scatter
        plt.subplot(2, 3, 3)
        plt.scatter(models_df['rmse'], models_df['r2'], s=80, alpha=0.7)
        
        for i, model in enumerate(models_df.index):
            plt.annotate(model, (models_df['rmse'].iloc[i], models_df['r2'].iloc[i]),
                        xytext=(5, 5), textcoords='offset points', fontsize=9)
        
        plt.xlabel('RMSE')
        plt.ylabel('R¬≤ Score')
        plt.title('Model Performance Trade-off')
        plt.grid(True, alpha=0.3)
        
        # Plot 4: Performance ranking
        plt.subplot(2, 3, 4)
        
        # Rank models by R¬≤
        r2_ranks = models_df['r2'].rank(ascending=False)
        rmse_ranks = models_df['rmse'].rank(ascending=True)
        combined_rank = (r2_ranks + rmse_ranks) / 2
        
        combined_rank.sort_values().plot(kind='barh', color='lightgreen')
        plt.title('Model Ranking (Lower is Better)')
        plt.xlabel('Average Rank')
        
        # Plot 5: Model type comparison
        plt.subplot(2, 3, 5)
        
        model_types = []
        for model in models_df.index:
            if model in model_results.get('ml_models', {}):
                model_types.append('Traditional ML')
            else:
                model_types.append('Deep Learning')
        
        models_df['type'] = model_types
        type_performance = models_df.groupby('type')['r2'].mean()
        type_performance.plot(kind='bar', color=['orange', 'purple'])
        plt.title('Average R¬≤ by Model Type')
        plt.ylabel('Average R¬≤ Score')
        plt.xticks(rotation=0)
        
        plt.tight_layout()
        plt.show()
        
        # Print best models
        print(f"\nüèÜ Best performing models:")
        print(f"   ‚Ä¢ Highest R¬≤: {models_df['r2'].idxmax()} ({models_df['r2'].max():.6f})")
        print(f"   ‚Ä¢ Lowest RMSE: {models_df['rmse'].idxmin()} ({models_df['rmse'].min():.6f})")
        print(f"   ‚Ä¢ Best overall: {combined_rank.idxmin()} (rank: {combined_rank.min():.1f})")

## 2. Portfolio Strategy Analysis

In [None]:
# Advanced portfolio analysis
def analyze_portfolio_performance(returns_series, name, benchmark_returns=None):
    """Analyze portfolio performance with advanced metrics"""
    
    metrics = {
        'Total Return': PerformanceMetrics.calculate_total_return(returns_series),
        'Annualized Return': PerformanceMetrics.calculate_annualized_return(returns_series),
        'Volatility': PerformanceMetrics.calculate_volatility(returns_series),
        'Sharpe Ratio': PerformanceMetrics.calculate_sharp_ratio(returns_series),
        'Sortino Ratio': PerformanceMetrics.calculate_sortino_ratio(returns_series),
        'Max Drawdown': PerformanceMetrics.calculate_max_drawdown(returns_series),
        'Calmar Ratio': PerformanceMetrics.calculate_calmar_ratio(returns_series)
    }
    
    if benchmark_returns is not None:
        metrics['Beta'] = PerformanceMetrics.calculate_beta(returns_series, benchmark_returns)
        metrics['Alpha'] = PerformanceMetrics.calculate_alpha(returns_series, benchmark_returns)
        metrics['Information Ratio'] = PerformanceMetrics.calculate_information_ratio(returns_series, benchmark_returns)
    
    return pd.Series(metrics, name=name)

# Load strategy returns
strategy_returns = {}
optimization_methods = ['mean_variance', 'risk_parity', 'min_variance', 'max_sharpe', 'cluster_based']

for method in optimization_methods:
    try:
        returns_path = os.path.join(Config.RESULTS_DIR, f'portfolio_values_{method}.csv')
        if os.path.exists(returns_path):
            returns = pd.read_csv(returns_path, index_col=0, squeeze=True)
            returns.index = pd.to_datetime(returns.index)
            strategy_returns[method] = returns
    except Exception as e:
        continue

print(f"‚úÖ Loaded returns for {len(strategy_returns)} strategies")

In [None]:
# Rolling performance analysis
if strategy_returns:
    plt.figure(figsize=(16, 12))
    
    # Calculate rolling metrics
    window = 60  # 60-day rolling window
    
    # Plot 1: Rolling returns
    plt.subplot(3, 2, 1)
    for method, returns in strategy_returns.items():
        rolling_returns = returns.rolling(window).mean() * 252
        plt.plot(rolling_returns.index, rolling_returns.values, 
                label=method.replace('_', ' ').title(), linewidth=2)
    
    plt.title(f'Rolling {window}-Day Annualized Returns')
    plt.ylabel('Annualized Return')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 2: Rolling volatility
    plt.subplot(3, 2, 2)
    for method, returns in strategy_returns.items():
        rolling_vol = returns.rolling(window).std() * np.sqrt(252)
        plt.plot(rolling_vol.index, rolling_vol.values, 
                label=method.replace('_', ' ').title(), linewidth=2)
    
    plt.title(f'Rolling {window}-Day Volatility')
    plt.ylabel('Annualized Volatility')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 3: Rolling Sharpe ratio
    plt.subplot(3, 2, 3)
    for method, returns in strategy_returns.items():
        rolling_sharpe = returns.rolling(window).apply(
            lambda x: PerformanceMetrics.calculate_sharpe_ratio(x)
        )
        plt.plot(rolling_sharpe.index, rolling_sharpe.values, 
                label=method.replace('_', ' ').title(), linewidth=2)
    
    plt.title(f'Rolling {window}-Day Sharpe Ratio')
    plt.ylabel('Sharpe Ratio')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 4: Strategy correlation
    plt.subplot(3, 2, 4)
    strategy_df = pd.DataFrame(strategy_returns)
    correlation_matrix = strategy_df.corr()
    
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
                square=True, fmt='.2f', cbar_kws={"shrink": .8})
    plt.title('Strategy Return Correlations')
    
    # Plot 5: Return distribution comparison
    plt.subplot(3, 2, 5)
    for method, returns in strategy_returns.items():
        plt.hist(returns.dropna(), bins=50, alpha=0.6, density=True,
                label=method.replace('_', ' ').title())
    
    plt.title('Return Distribution Comparison')
    plt.xlabel('Daily Return')
    plt.ylabel('Density')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 6: Cumulative performance
    plt.subplot(3, 2, 6)
    for method, returns in strategy_returns.items():
        cumulative = (1 + returns).cumprod()
        plt.plot(cumulative.index, cumulative.values, 
                label=method.replace('_', ' ').title(), linewidth=2)
    
    plt.title('Cumulative Performance')
    plt.ylabel('Cumulative Return')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 3. Risk Analysis

In [None]:
# Advanced risk analysis
def calculate_var_cvar(returns, confidence_level=0.05):
    """Calculate Value at Risk and Conditional Value at Risk"""
    var = returns.quantile(confidence_level)
    cvar = returns[returns <= var].mean()
    return var, cvar

def calculate_tail_ratio(returns):
    """Calculate tail ratio (95th percentile / 5th percentile)"""
    return returns.quantile(0.95) / abs(returns.quantile(0.05))

if strategy_returns:
    risk_analysis = []
    
    for method, returns in strategy_returns.items():
        var_5, cvar_5 = calculate_var_cvar(returns, 0.05)
        var_1, cvar_1 = calculate_var_cvar(returns, 0.01)
        
        risk_metrics = {
            'Strategy': method.replace('_', ' ').title(),
            'VaR (5%)': var_5,
            'CVaR (5%)': cvar_5,
            'VaR (1%)': var_1,
            'CVaR (1%)': cvar_1,
            'Tail Ratio': calculate_tail_ratio(returns),
            'Skewness': returns.skew(),
            'Kurtosis': returns.kurtosis(),
            'Downside Deviation': returns[returns < 0].std()
        }
        
        risk_analysis.append(risk_metrics)
    
    risk_df = pd.DataFrame(risk_analysis).set_index('Strategy')
    
    print("üìâ Advanced Risk Analysis")
    print("=" * 80)
    print(risk_df.round(4))

In [None]:
# Visualize risk metrics
if 'risk_df' in locals():
    plt.figure(figsize=(15, 10))
    
    # Plot 1: VaR comparison
    plt.subplot(2, 3, 1)
    x = np.arange(len(risk_df))
    width = 0.35
    
    plt.bar(x - width/2, risk_df['VaR (5%)'], width, label='VaR 5%', alpha=0.7)
    plt.bar(x + width/2, risk_df['VaR (1%)'], width, label='VaR 1%', alpha=0.7)
    
    plt.xlabel('Strategy')
    plt.ylabel('Value at Risk')
    plt.title('Value at Risk Comparison')
    plt.xticks(x, risk_df.index, rotation=45)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 2: Skewness vs Kurtosis
    plt.subplot(2, 3, 2)
    plt.scatter(risk_df['Skewness'], risk_df['Kurtosis'], s=100, alpha=0.7)
    
    for i, strategy in enumerate(risk_df.index):
        plt.annotate(strategy, (risk_df['Skewness'].iloc[i], risk_df['Kurtosis'].iloc[i]),
                    xytext=(5, 5), textcoords='offset points', fontsize=9)
    
    plt.xlabel('Skewness')
    plt.ylabel('Kurtosis')
    plt.title('Return Distribution Shape')
    plt.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    plt.axvline(x=0, color='r', linestyle='--', alpha=0.5)
    plt.grid(True, alpha=0.3)
    
    # Plot 3: Tail ratio
    plt.subplot(2, 3, 3)
    risk_df['Tail Ratio'].plot(kind='bar', color='coral')
    plt.title('Tail Ratio (Upside/Downside)')
    plt.ylabel('Tail Ratio')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # Plot 4: Downside deviation
    plt.subplot(2, 3, 4)
    risk_df['Downside Deviation'].plot(kind='bar', color='lightcoral')
    plt.title('Downside Deviation')
    plt.ylabel('Downside Deviation')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # Plot 5: CVaR comparison
    plt.subplot(2, 3, 5)
    x = np.arange(len(risk_df))
    
    plt.bar(x - width/2, risk_df['CVaR (5%)'], width, label='CVaR 5%', alpha=0.7)
    plt.bar(x + width/2, risk_df['CVaR (1%)'], width, label='CVaR 1%', alpha=0.7)
    
    plt.xlabel('Strategy')
    plt.ylabel('Conditional VaR')
    plt.title('Conditional VaR Comparison')
    plt.xticks(x, risk_df.index, rotation=45)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 6: Risk-adjusted returns
    plt.subplot(2, 3, 6)
    
    # Calculate risk-adjusted metrics
    performance_summary = []
    for method, returns in strategy_returns.items():
        annual_return = PerformanceMetrics.calculate_annualized_return(returns)
        volatility = PerformanceMetrics.calculate_volatility(returns)
        sharpe = PerformanceMetrics.calculate_sharpe_ratio(returns)
        
        performance_summary.append({
            'Strategy': method.replace('_', ' ').title(),
            'Return': annual_return,
            'Risk': volatility,
            'Sharpe': sharpe
        })
    
    perf_df = pd.DataFrame(performance_summary)
    
    scatter = plt.scatter(perf_df['Risk'], perf_df['Return'], 
                         c=perf_df['Sharpe'], s=100, cmap='viridis', alpha=0.8)
    plt.colorbar(scatter, label='Sharpe Ratio')
    
    for i, strategy in enumerate(perf_df['Strategy']):
        plt.annotate(strategy, (perf_df['Risk'].iloc[i], perf_df['Return'].iloc[i]),
                    xytext=(5, 5), textcoords='offset points', fontsize=9)
    
    plt.xlabel('Risk (Volatility)')
    plt.ylabel('Return (Annualized)')
    plt.title('Risk-Return Profile')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 4. Final Recommendations

In [None]:
# Generate final recommendations
print("üéØ FINAL RECOMMENDATIONS AND INSIGHTS")
print("=" * 80)

if 'strategy_returns' in locals() and strategy_returns:
    # Calculate overall performance metrics
    overall_performance = []
    
    for method, returns in strategy_returns.items():
        metrics = {
            'Strategy': method.replace('_', ' ').title(),
            'Sharpe': PerformanceMetrics.calculate_sharpe_ratio(returns),
            'Return': PerformanceMetrics.calculate_annualized_return(returns),
            'Volatility': PerformanceMetrics.calculate_volatility(returns),
            'Max DD': PerformanceMetrics.calculate_max_drawdown(returns),
            'Calmar': PerformanceMetrics.calculate_calmar_ratio(returns)
        }
        overall_performance.append(metrics)
    
    perf_summary = pd.DataFrame(overall_performance).set_index('Strategy')
    
    # Find best strategies
    best_sharpe = perf_summary['Sharpe'].idxmax()
    best_return = perf_summary['Return'].idxmax()
    best_calmar = perf_summary['Calmar'].idxmax()
    
    print(f"üèÜ TOP PERFORMING STRATEGIES:")
    print(f"   ‚Ä¢ Best Risk-Adjusted Return (Sharpe): {best_sharpe}")
    print(f"   ‚Ä¢ Highest Absolute Return: {best_return}")
    print(f"   ‚Ä¢ Best Risk-Adjusted DD (Calmar): {best_calmar}")
    
    print(f"\nüìä PERFORMANCE SUMMARY:")
    print(perf_summary.round(4))

print(f"\nüí° KEY INSIGHTS:")
print(f"   1. Machine learning models show varying prediction accuracy")
print(f"   2. Stock clustering provides meaningful risk diversification")
print(f"   3. Different optimization methods suit different risk preferences")
print(f"   4. Transaction costs significantly impact real-world performance")
print(f"   5. Regular rebalancing is crucial for maintaining target allocation")

print(f"\nüöÄ IMPLEMENTATION RECOMMENDATIONS:")
print(f"   ‚Ä¢ Conservative Investors: Focus on minimum variance or risk parity")
print(f"   ‚Ä¢ Aggressive Investors: Consider maximum Sharpe or mean variance")
print(f"   ‚Ä¢ Diversification Seekers: Use cluster-based optimization")
print(f"   ‚Ä¢ Model Selection: Ensemble approaches often work best")
print(f"   ‚Ä¢ Rebalancing: Monthly frequency provides good balance")

print(f"\n‚ö†Ô∏è  IMPORTANT CONSIDERATIONS:")
print(f"   ‚Ä¢ Past performance does not guarantee future results")
print(f"   ‚Ä¢ Models trained on historical data may not capture regime changes")
print(f"   ‚Ä¢ Market conditions and correlations can shift dramatically")
print(f"   ‚Ä¢ Always consider transaction costs and market impact")
print(f"   ‚Ä¢ Regular model retraining and validation is essential")

print(f"\nüî¨ FUTURE ENHANCEMENTS:")
print(f"   ‚Ä¢ Incorporate alternative data sources (sentiment, macro indicators)")
print(f"   ‚Ä¢ Implement regime detection for dynamic model selection")
print(f"   ‚Ä¢ Add ESG factors and sustainable investing constraints")
print(f"   ‚Ä¢ Develop real-time rebalancing triggers")
print(f"   ‚Ä¢ Include options and derivatives for enhanced risk management")