## 1. Import Libraries and Setup

In [None]:
# Import comprehensive libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

print("=" * 80)
print("COMPREHENSIVE MODEL COMPARISON ANALYSIS")
print("=" * 80)
print("Comparing: Deep Learning (3 ANNs) vs Ridge Regression Baseline")
print("Goal: Production-ready model recommendation with business insights")
print("=" * 80)

üöÄ COMPREHENSIVE MODEL COMPARISON ANALYSIS
üìä Comparing: Deep Learning (3 ANNs) vs Ridge Regression Baseline
üéØ Goal: Production-ready model recommendation with business insights


## 2. Load Model Comparison Results

Load the results from deep learning training

In [None]:
# Load the original dataset
df = pd.read_csv('../data/ecommerce_customer.csv')
print(f"Dataset: {df.shape[0]} customers, {df.shape[1]} features")
print(f"Spending range: ${df['Yearly Amount Spent'].min():.2f} - ${df['Yearly Amount Spent'].max():.2f}")
print(f"Average spending: ${df['Yearly Amount Spent'].mean():.2f}")
print(f"Std deviation: ${df['Yearly Amount Spent'].std():.2f}\n")

# Load model comparison results from deep learning notebook
results_df = pd.read_csv('../models/deep_learning/model_comparison.csv')

print("=" * 80)
print("MODEL PERFORMANCE SUMMARY")
print("=" * 80)
display(results_df)

# Find best model
best_idx = results_df['Test_R2'].idxmax()
best_model = results_df.loc[best_idx]

print("\n" + "=" * 80)
print(f"BEST MODEL: {best_model['Model']}")
print("=" * 80)
print(f"  Test R¬≤:    {best_model['Test_R2']:.4f} ({best_model['Test_R2']*100:.2f}% variance explained)")
print(f"  Test RMSE:  ${best_model['Test_RMSE']:.2f}")
print(f"  Test MAE:   ${best_model['Test_MAE']:.2f}")
print(f"  Train R¬≤:   {best_model['Train_R2']:.4f}")
print("=" * 80)

üìã Dataset: 500 customers, 8 features
üí∞ Spending range: $256.67 - $765.52
üìä Average spending: $499.31
üìà Std deviation: $79.31

MODEL PERFORMANCE SUMMARY


Unnamed: 0,Model,Train_RMSE,Test_RMSE,Train_MAE,Test_MAE,Train_R2,Test_R2
0,Simple_ANN,11.290036,13.633242,8.894033,11.306932,0.980611,0.962465
1,Deep_ANN,374.011415,374.653273,373.811004,374.368316,-20.27838,-27.346669
2,Wide_Deep,24.134516,25.099529,16.185155,18.097296,0.911397,0.872775
3,Ridge_Regression,9.790767,10.460888,7.720888,8.538494,0.985418,0.977901



üèÜ BEST MODEL: Ridge_Regression
  Test R¬≤:    0.9779 (97.79% variance explained)
  Test RMSE:  $10.46
  Test MAE:   $8.54
  Train R¬≤:   0.9854


## 3. Interactive Performance Comparison

Create comprehensive comparison visualizations

In [18]:
# Create interactive Plotly comparison with adjusted sizing
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('R¬≤ Score (Higher is Better)', 'RMSE (Lower is Better)', 
                    'MAE (Lower is Better)', 'Train vs Test R¬≤'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'scatter'}]],
    vertical_spacing=0.15,
    horizontal_spacing=0.12
)

# Color mapping for models
colors = {
    'Simple_ANN': '#3498db',
    'Deep_ANN': '#e74c3c',
    'Wide_Deep': '#2ecc71',
    'Ridge_Regression': '#f39c12'
}

model_colors = [colors.get(model, '#95a5a6') for model in results_df['Model']]

# R¬≤ Score comparison - add minimal padding for text visibility
min_r2 = results_df['Test_R2'].min()
max_r2 = results_df['Test_R2'].max()
r2_range = [min_r2 - 0.002, max_r2 + 0.002]  # Minimal padding since values are close to 1.0

fig.add_trace(
    go.Bar(x=results_df['Model'], y=results_df['Test_R2'],
           marker_color=model_colors,
           text=[f"{val:.4f}" for val in results_df['Test_R2']],
           textposition='outside',
           textfont=dict(size=10),
           name='Test R¬≤',
           showlegend=False),
    row=1, col=1
)

# RMSE comparison - add range padding for text
max_rmse = results_df['Test_RMSE'].max()
rmse_range = [0, max_rmse * 1.15]

fig.add_trace(
    go.Bar(x=results_df['Model'], y=results_df['Test_RMSE'],
           marker_color=model_colors,
           text=[f"${val:.2f}" for val in results_df['Test_RMSE']],
           textposition='outside',
           textfont=dict(size=11),
           name='Test RMSE',
           showlegend=False),
    row=1, col=2
)

# MAE comparison - add range padding for text
max_mae = results_df['Test_MAE'].max()
mae_range = [0, max_mae * 1.15]

fig.add_trace(
    go.Bar(x=results_df['Model'], y=results_df['Test_MAE'],
           marker_color=model_colors,
           text=[f"${val:.2f}" for val in results_df['Test_MAE']],
           textposition='outside',
           textfont=dict(size=11),
           name='Test MAE',
           showlegend=False),
    row=2, col=1
)

# Train vs Test R¬≤ scatter
fig.add_trace(
    go.Scatter(x=results_df['Train_R2'], y=results_df['Test_R2'],
               mode='markers+text',
               marker=dict(size=15, color=model_colors, line=dict(width=2, color='white')),
               text=results_df['Model'],
               textposition='top center',
               textfont=dict(size=10),
               name='Models',
               showlegend=False),
    row=2, col=2
)

# Add perfect generalization line
fig.add_trace(
    go.Scatter(x=[0.96, 1.0], y=[0.96, 1.0],
               mode='lines',
               line=dict(dash='dash', color='gray', width=2),
               name='Perfect Generalization',
               showlegend=False),
    row=2, col=2
)

# Update axes with better formatting
fig.update_xaxes(title_text="Model", tickangle=45, tickfont=dict(size=10), row=1, col=1)
fig.update_xaxes(title_text="Model", tickangle=45, tickfont=dict(size=10), row=1, col=2)
fig.update_xaxes(title_text="Model", tickangle=45, tickfont=dict(size=10), row=2, col=1)
fig.update_xaxes(title_text="Train R¬≤", row=2, col=2)

fig.update_yaxes(title_text="R¬≤ Score", range=r2_range, row=1, col=1)
fig.update_yaxes(title_text="RMSE ($)", range=rmse_range, row=1, col=2)
fig.update_yaxes(title_text="MAE ($)", range=mae_range, row=2, col=1)
fig.update_yaxes(title_text="Test R¬≤", row=2, col=2)

# Update layout with larger size and better margins
fig.update_layout(
    height=900,
    width=1400,
    title_text="<b>Comprehensive Model Performance Comparison</b>",
    title_x=0.5,
    title_font=dict(size=18),
    showlegend=False,
    margin=dict(t=100, b=80, l=80, r=80)
)

fig.show()

## 4. Production Deployment Recommendation

Multi-criteria model selection scoring

In [None]:
# Create scoring matrix for production readiness
scoring_criteria = {
    'Model': results_df['Model'].tolist(),
    'Accuracy': (results_df['Test_R2'] / results_df['Test_R2'].max() * 100).round(2).tolist(),
    'Error_Min': (results_df['Test_MAE'].min() / results_df['Test_MAE'] * 100).round(2).tolist(),
    'Interpretability': [60, 20, 20, 100],  # Manual scoring (0-100)
    'Speed': [100, 70, 70, 100],  # Inference speed scoring
    'Maintenance': [80, 40, 40, 100],  # Ease of updates and monitoring
}

scoring_df = pd.DataFrame(scoring_criteria)

# Calculate weighted overall score
weights = {
    'Accuracy': 0.35,
    'Error_Min': 0.30,
    'Interpretability': 0.15,
    'Speed': 0.10,
    'Maintenance': 0.10
}

scoring_df['Overall_Score'] = (
    scoring_df['Accuracy'] * weights['Accuracy'] +
    scoring_df['Error_Min'] * weights['Error_Min'] +
    scoring_df['Interpretability'] * weights['Interpretability'] +
    scoring_df['Speed'] * weights['Speed'] +
    scoring_df['Maintenance'] * weights['Maintenance']
).round(2)

scoring_df = scoring_df.sort_values('Overall_Score', ascending=False)

print("=" * 80)
print("PRODUCTION READINESS SCORECARD")
print("=" * 80)
print("Scoring Criteria (0-100 scale):")
print("  ‚Ä¢ Accuracy (35%): Model prediction accuracy (R¬≤)")
print("  ‚Ä¢ Error_Min (30%): Prediction error minimization (MAE)")
print("  ‚Ä¢ Interpretability (15%): Explainability for business stakeholders")
print("  ‚Ä¢ Speed (10%): Training and inference performance")
print("  ‚Ä¢ Maintenance (10%): Ease of updates and monitoring")
print("=" * 80)
display(scoring_df)

# Recommendation
recommended_model = scoring_df.iloc[0]['Model']
recommended_score = scoring_df.iloc[0]['Overall_Score']

print("\n" + "=" * 80)
print("PRODUCTION DEPLOYMENT RECOMMENDATION")
print("=" * 80)
print(f"\nRECOMMENDED MODEL: {recommended_model}")
print(f"   Overall Score: {recommended_score}/100")
print(f"\nJustification:")

if recommended_model == 'Ridge_Regression':
    print("   ‚úì Best overall performance (accuracy + error minimization)")
    print("   ‚úì Highest interpretability - easy to explain to stakeholders")
    print("   ‚úì Fastest inference - suitable for real-time predictions")
    print("   ‚úì Low maintenance overhead")
    print("   ‚úì Proven stability with linear relationships")
elif 'Wide_Deep' in recommended_model:
    print("   ‚úì Best deep learning performance")
    print("   ‚úì Captures both linear and non-linear patterns")
    print("   ‚úì Scalable for future feature additions")
    print("   ‚úì Good generalization (low overfitting)")
elif 'Deep_ANN' in recommended_model:
    print("   ‚úì Stable convergence and training")
    print("   ‚úì Good capacity for complex patterns")
    print("   ‚úì Reliable validation performance")
else:
    print("   ‚úì Fastest training time")
    print("   ‚úì Good baseline performance")
    print("   ‚úì Simple architecture")

print(f"\nAlternative/Backup Model: {scoring_df.iloc[1]['Model']} (Score: {scoring_df.iloc[1]['Overall_Score']}/100)")
print("=" * 80)

PRODUCTION READINESS SCORECARD
Scoring Criteria (0-100 scale):
  ‚Ä¢ Accuracy (35%): Model prediction accuracy (R¬≤)
  ‚Ä¢ Error_Min (30%): Prediction error minimization (MAE)
  ‚Ä¢ Interpretability (15%): Explainability for business stakeholders
  ‚Ä¢ Speed (10%): Training and inference performance
  ‚Ä¢ Maintenance (10%): Ease of updates and monitoring


Unnamed: 0,Model,Accuracy,Error_Min,Interpretability,Speed,Maintenance,Overall_Score
3,Ridge_Regression,100.0,100.0,100,100,100,100.0
0,Simple_ANN,98.42,75.52,60,100,80,84.1
2,Wide_Deep,89.25,47.18,20,70,40,59.39
1,Deep_ANN,-2796.47,2.28,20,70,40,-964.08



üéØ PRODUCTION DEPLOYMENT RECOMMENDATION

‚úÖ RECOMMENDED MODEL: Ridge_Regression
   Overall Score: 100.0/100

üìã Justification:
   ‚úì Best overall performance (accuracy + error minimization)
   ‚úì Highest interpretability - easy to explain to stakeholders
   ‚úì Fastest inference - suitable for real-time predictions
   ‚úì Low maintenance overhead
   ‚úì Proven stability with linear relationships

üí° Alternative/Backup Model: Simple_ANN (Score: 84.1/100)
