## 1. Import Libraries and Setup

In [None]:
# Import comprehensive libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Model loading and metrics
import pickle
import joblib
import json
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, silhouette_score

import warnings
warnings.filterwarnings('ignore')

# Import custom modules
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

from src.exception import CustomException
from src.logger import logging

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

print("üöÄ COMPREHENSIVE MODEL COMPARISON ANALYSIS")
print("üìä Evaluating: Traditional ML | Deep Learning ANNs | Customer Segmentation")
print("üéØ Goal: Production-ready model recommendation with business insights")

## 2. Load Dataset and All Model Results

In [None]:
# Load the original dataset
df = pd.read_csv('../data/ecommerce_customer.csv')
print(f"üìã Original dataset shape: {df.shape}")
print(f"üí∞ Target range: ${df['Yearly Amount Spent'].min():.2f} - ${df['Yearly Amount Spent'].max():.2f}")
print(f"üìä Average spending: ${df['Yearly Amount Spent'].mean():.2f}")

# Initialize results storage
all_results = {
    'traditional_ml': {},
    'deep_learning': {},
    'clustering': {}
}

model_availability = []

# Load Traditional ML results
try:
    with open('../models/regression/model_results.pkl', 'rb') as f:
        all_results['traditional_ml'] = pickle.load(f)
    print("‚úÖ Traditional ML results loaded")
    model_availability.append('Traditional ML')
except FileNotFoundError:
    print("‚ö†Ô∏è  Traditional ML results not found - run notebook 02_Regression_Models.ipynb first")

# Load Deep Learning results  
try:
    with open('../models/deep_learning/dl_results.json', 'r') as f:
        dl_data = json.load(f)
        all_results['deep_learning'] = dl_data['results']
    print("‚úÖ Deep Learning results loaded")
    model_availability.append('Deep Learning')
except FileNotFoundError:
    print("‚ö†Ô∏è  Deep Learning results not found - run notebook 05_Deep_Learning_Regression.ipynb first")

# Load Clustering results
try:
    with open('../models/clustering/clustering_results.pkl', 'rb') as f:
        clustering_data = pickle.load(f)
        all_results['clustering'] = clustering_data['results']
    print("‚úÖ Clustering results loaded")
    model_availability.append('Customer Segmentation')
except FileNotFoundError:
    print("‚ö†Ô∏è  Clustering results not found - run notebook 03_Customer_Segmentation.ipynb first")

print(f"\nüìà Available model categories: {', '.join(model_availability)}")

## 3. Performance Comparison Dashboard

In [None]:
# Create comprehensive comparison DataFrame
comparison_data = []

# Add Traditional ML results
for model_name, metrics in all_results['traditional_ml'].items():
    comparison_data.append({
        'Model': model_name,
        'Category': 'Traditional ML',
        'Test R¬≤': metrics.get('test_r2', 0),
        'Test RMSE': metrics.get('test_rmse', float('inf')),
        'Test MAE': metrics.get('test_mae', float('inf')),
        'Train R¬≤': metrics.get('train_r2', 0),
        'Interpretability': 'High' if 'linear' in model_name.lower() else 'Medium',
        'Training Speed': 'Fast',
        'Prediction Speed': 'Very Fast'
    })

# Add Deep Learning results
for model_name, metrics in all_results['deep_learning'].items():
    comparison_data.append({
        'Model': model_name,
        'Category': 'Deep Learning',
        'Test R¬≤': metrics.get('test_r2', 0),
        'Test RMSE': metrics.get('test_rmse', float('inf')),
        'Test MAE': metrics.get('test_mae', float('inf')),
        'Train R¬≤': metrics.get('train_r2', 0),
        'Interpretability': 'Low',
        'Training Speed': 'Slow',
        'Prediction Speed': 'Fast'
    })

# Create comparison DataFrame
if comparison_data:
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Test R¬≤', ascending=False)
    
    print("üèÜ MODEL PERFORMANCE LEADERBOARD")
    print("="*60)
    display(comparison_df.round(4))
    
    # Identify best models
    best_overall = comparison_df.iloc[0]
    best_traditional = comparison_df[comparison_df['Category'] == 'Traditional ML'].iloc[0] if 'Traditional ML' in comparison_df['Category'].values else None
    best_dl = comparison_df[comparison_df['Category'] == 'Deep Learning'].iloc[0] if 'Deep Learning' in comparison_df['Category'].values else None
    
    print(f"\nü•á BEST OVERALL: {best_overall['Model']} ({best_overall['Category']})")
    print(f"   R¬≤ Score: {best_overall['Test R¬≤']:.4f} | RMSE: {best_overall['Test RMSE']:.2f}")
    
    if best_traditional is not None:
        print(f"\nüèÖ BEST TRADITIONAL ML: {best_traditional['Model']}")
        print(f"   R¬≤ Score: {best_traditional['Test R¬≤']:.4f} | RMSE: {best_traditional['Test RMSE']:.2f}")
    
    if best_dl is not None:
        print(f"\nü§ñ BEST DEEP LEARNING: {best_dl['Model']}")
        print(f"   R¬≤ Score: {best_dl['Test R¬≤']:.4f} | RMSE: {best_dl['Test RMSE']:.2f}")
        
else:
    print("‚ö†Ô∏è  No regression model results available for comparison")
    print("   Run notebooks 02, 05 first to generate model results")