In [None]:
# Model Comparison Dashboard
# This notebook creates comprehensive model comparison visualizations

import sys
sys.path.append('..')

import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mlflow.tracking import MlflowClient

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("üìä Model Comparison Dashboard")
print("=" * 60)

# Connect to MLflow
client = MlflowClient()
experiment = mlflow.get_experiment_by_name("wine_quality_comparison")

if experiment is None:
    print("‚ùå Experiment 'wine_quality_comparison' not found!")
    print("   Please run 02_model_training.ipynb first.")
else:
    experiment_id = experiment.experiment_id
    print(f"‚úÖ Found experiment: {experiment.name} (ID: {experiment_id})")
    
    # Get all runs
    runs = mlflow.search_runs(experiment_ids=[experiment_id])
    print(f"   Total runs: {len(runs)}")
    
    # Extract key metrics
    comparison_data = []
    for _, run in runs.iterrows():
        comparison_data.append({
            'Model': run['params.model_type'],
            'Run Name': run['tags.mlflow.runName'],
            'AUC': run['metrics.auc'],
            'Accuracy': run['metrics.accuracy'],
            'Precision': run['metrics.precision'],
            'Recall': run['metrics.recall'],
            'F1 Score': run['metrics.f1_score'],
            'Run ID': run['run_id']
        })
    
    df_comparison = pd.DataFrame(comparison_data)
    
    # Display comparison table
    print("\nüìã Model Performance Comparison:")
    print("-" * 60)
    display(df_comparison.sort_values('AUC', ascending=False))
    
    # 1. AUC Comparison
    print("\nüìä Visualization 1: AUC Score Comparison")
    plt.figure(figsize=(10, 6))
    ax = sns.barplot(data=df_comparison.sort_values('AUC', ascending=False), 
                     x='Model', y='AUC', palette='viridis')
    plt.title('Model Comparison - AUC Scores', fontsize=16, fontweight='bold')
    plt.xlabel('Model', fontsize=12)
    plt.ylabel('AUC Score', fontsize=12)
    plt.ylim(0.7, 1.0)
    
    # Add value labels on bars
    for container in ax.containers:
        ax.bar_label(container, fmt='%.4f', fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    # 2. Multi-metric comparison
    print("\nüìä Visualization 2: Multi-Metric Comparison")
    metrics_to_plot = ['AUC', 'Accuracy', 'Precision', 'Recall', 'F1 Score']
    
    fig, axes = plt.subplots(1, len(metrics_to_plot), figsize=(20, 4))
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx]
        df_sorted = df_comparison.sort_values(metric, ascending=False)
        ax.barh(df_sorted['Model'], df_sorted[metric], color=plt.cm.viridis(idx/len(metrics_to_plot)))
        ax.set_xlabel(metric, fontsize=10, fontweight='bold')
        ax.set_xlim(0.7, 1.0)
        ax.grid(axis='x', alpha=0.3)
        
        # Add value labels
        for i, v in enumerate(df_sorted[metric]):
            ax.text(v + 0.01, i, f'{v:.3f}', va='center', fontsize=9)
    
    plt.tight_layout()
    plt.suptitle('Comprehensive Model Performance Comparison', 
                 fontsize=16, fontweight='bold', y=1.02)
    plt.show()
    
    # 3. Radar chart for all metrics
    print("\nüìä Visualization 3: Radar Chart - Overall Performance")
    from math import pi
    
    categories = ['AUC', 'Accuracy', 'Precision', 'Recall', 'F1 Score']
    N = len(categories)
    
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
    
    colors = plt.cm.Set3(range(len(df_comparison)))
    
    for idx, row in df_comparison.iterrows():
        values = [row['AUC'], row['Accuracy'], row['Precision'], 
                 row['Recall'], row['F1 Score']]
        values += values[:1]
        
        ax.plot(angles, values, 'o-', linewidth=2, label=row['Model'], color=colors[idx])
        ax.fill(angles, values, alpha=0.15, color=colors[idx])
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories, size=12)
    ax.set_ylim(0.7, 1.0)
    ax.set_title('Model Performance Radar Chart', size=16, fontweight='bold', pad=20)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    ax.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    # 4. Best model summary
    best_model = df_comparison.loc[df_comparison['AUC'].idxmax()]
    print("\nüèÜ Best Model Summary:")
    print("=" * 60)
    print(f"   Model: {best_model['Model']}")
    print(f"   Run Name: {best_model['Run Name']}")
    print(f"   AUC: {best_model['AUC']:.4f}")
    print(f"   Accuracy: {best_model['Accuracy']:.4f}")
    print(f"   Precision: {best_model['Precision']:.4f}")
    print(f"   Recall: {best_model['Recall']:.4f}")
    print(f"   F1 Score: {best_model['F1 Score']:.4f}")
    print(f"   Run ID: {best_model['Run ID']}")
    
    # 5. Save comparison report
    print("\nüíæ Saving comparison report...")
    df_comparison.to_csv('model_comparison_report.csv', index=False)
    print("   ‚úÖ Saved to: model_comparison_report.csv")
    
    print("\nüéØ Next step: Run 04_model_monitoring.ipynb to monitor the best model")