# Notebook 12: Unified Comparison Dashboard

**Purpose**: Aggregate all results and provide comprehensive analysis across Classical, Neural, and QML paradigms.

**Inputs**:
- `classical_metrics.csv`
- `neural_metrics.csv`
- `qml_metrics.csv`

**Outputs**:
- Final dashboard visualizations
- Summary report

---

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch
import json

# Paths
BASE_DIR = Path('.').resolve().parent
RESULTS_DIR = BASE_DIR / 'results'
FIGURES_DIR = BASE_DIR / 'figures'

# Style
plt.style.use('seaborn-v0_8-whitegrid')
COLORS = {
    'Classical': '#3498db',
    'Neural': '#e74c3c',
    'QML': '#9b59b6'
}

In [None]:
# Load all metrics
classical_df = pd.read_csv(RESULTS_DIR / 'classical_metrics.csv')
neural_df = pd.read_csv(RESULTS_DIR / 'neural_metrics.csv')
qml_df = pd.read_csv(RESULTS_DIR / 'qml_metrics.csv')

classical_df['category'] = 'Classical'
neural_df['category'] = 'Neural'
qml_df['category'] = 'QML'

# Combine all
all_df = pd.concat([classical_df, neural_df, qml_df], ignore_index=True)

print(f"Total models: {len(all_df)}")
print(f"  Classical: {len(classical_df)}")
print(f"  Neural: {len(neural_df)}")
print(f"  QML: {len(qml_df)}")

## 1. Complete Metrics Table

In [None]:
# Display complete results
display_cols = ['model', 'category', 'accuracy', 'precision', 'recall', 'f1_score', 'roc_auc', 'train_time']
display_df = all_df[display_cols].sort_values('f1_score', ascending=False)

print("\n" + "="*100)
print("COMPLETE MODEL COMPARISON - SORTED BY F1 SCORE")
print("="*100)
print(display_df.to_string(index=False))

## 2. Metric-wise Bar Plots

In [None]:
# Comprehensive metric comparison
metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc']

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i, metric in enumerate(metrics):
    ax = axes[i]
    data = all_df.sort_values(metric, ascending=True)
    colors_list = [COLORS[c] for c in data['category']]
    
    bars = ax.barh(data['model'], data[metric], color=colors_list)
    ax.set_xlabel(metric.replace('_', ' ').title())
    ax.set_title(f'{metric.replace("_", " ").title()}', fontsize=12, fontweight='bold')
    ax.set_xlim([0, 1])
    
    # Add value labels
    for bar, val in zip(bars, data[metric]):
        if pd.notna(val):
            ax.text(val + 0.01, bar.get_y() + bar.get_height()/2, 
                   f'{val:.3f}', va='center', fontsize=7)

# Legend
legend_elements = [Patch(facecolor=color, label=cat) for cat, color in COLORS.items()]
axes[-1].legend(handles=legend_elements, loc='center', fontsize=14)
axes[-1].set_title('Legend', fontsize=12, fontweight='bold')
axes[-1].axis('off')

plt.suptitle('Comprehensive Model Comparison', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_DIR / 'unified_metrics_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

## 3. ROC-AUC Comparison

In [None]:
# ROC-AUC comparison grouped by category
fig, ax = plt.subplots(figsize=(14, 8))

# Group by category and plot
categories = ['Classical', 'Neural', 'QML']
x_positions = []
x_labels = []
current_x = 0

for cat in categories:
    cat_data = all_df[all_df['category'] == cat].sort_values('roc_auc', ascending=False)
    positions = range(current_x, current_x + len(cat_data))
    
    ax.bar(positions, cat_data['roc_auc'], color=COLORS[cat], label=cat, alpha=0.8)
    
    x_positions.extend(positions)
    x_labels.extend(cat_data['model'])
    
    current_x += len(cat_data) + 1  # Gap between categories

ax.set_xticks(x_positions)
ax.set_xticklabels(x_labels, rotation=45, ha='right', fontsize=9)
ax.set_ylabel('ROC-AUC', fontsize=12)
ax.set_title('ROC-AUC Comparison by Category', fontsize=14, fontweight='bold')
ax.legend(fontsize=12)
ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Random')
ax.set_ylim([0, 1])

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'unified_roc_auc.png', dpi=150)
plt.show()

## 4. Runtime Comparison

In [None]:
# Training time by category (log scale)
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Bar chart
ax1 = axes[0]
data = all_df.sort_values('train_time', ascending=True)
colors_list = [COLORS[c] for c in data['category']]

ax1.barh(data['model'], data['train_time'], color=colors_list)
ax1.set_xlabel('Training Time (seconds) - Log Scale')
ax1.set_xscale('log')
ax1.set_title('Training Time Comparison', fontsize=12, fontweight='bold')

# Box plot by category
ax2 = axes[1]
all_df.boxplot(column='train_time', by='category', ax=ax2)
ax2.set_ylabel('Training Time (seconds)')
ax2.set_title('Training Time Distribution', fontsize=12, fontweight='bold')
ax2.set_yscale('log')
plt.suptitle('')

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'unified_runtime.png', dpi=150)
plt.show()

## 5. Radar Plot (Multi-Metric)

In [None]:
# Radar plot for top models from each category
def radar_plot(ax, metrics_values, labels, title, color):
    angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
    metrics_values = metrics_values + [metrics_values[0]]
    angles = angles + [angles[0]]
    
    ax.plot(angles, metrics_values, 'o-', linewidth=2, color=color)
    ax.fill(angles, metrics_values, alpha=0.25, color=color)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)
    ax.set_title(title, fontsize=11, fontweight='bold')
    ax.set_ylim([0, 1])

# Get best model from each category
best_models = {}
for cat in categories:
    cat_data = all_df[all_df['category'] == cat]
    best_idx = cat_data['f1_score'].idxmax()
    best_models[cat] = all_df.loc[best_idx]

radar_metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc']
radar_labels = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC-AUC']

fig, axes = plt.subplots(1, 3, figsize=(15, 5), subplot_kw=dict(polar=True))

for i, (cat, model_data) in enumerate(best_models.items()):
    values = [model_data[m] for m in radar_metrics]
    radar_plot(axes[i], values, radar_labels, 
               f'{cat}: {model_data["model"]}', COLORS[cat])

plt.suptitle('Best Model per Category - Multi-Metric Profile', fontsize=14, fontweight='bold', y=1.05)
plt.tight_layout()
plt.savefig(FIGURES_DIR / 'unified_radar.png', dpi=150, bbox_inches='tight')
plt.show()

## 6. Summary Statistics

In [None]:
# Category-wise summary
summary_stats = all_df.groupby('category').agg({
    'accuracy': ['mean', 'std', 'max'],
    'f1_score': ['mean', 'std', 'max'],
    'roc_auc': ['mean', 'std', 'max'],
    'train_time': ['mean', 'min', 'max']
}).round(4)

print("\n" + "="*80)
print("CATEGORY-WISE SUMMARY STATISTICS")
print("="*80)
print(summary_stats)

## 7. Best Paradigm per Metric

In [None]:
print("\n" + "="*80)
print("BEST PARADIGM PER METRIC")
print("="*80)

for metric in ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc']:
    best_idx = all_df[metric].idxmax()
    best_model = all_df.loc[best_idx]
    print(f"\n{metric.replace('_', ' ').title():15} â†’ {best_model['category']:10} | {best_model['model']:25} | {best_model[metric]:.4f}")

# Fastest model
fastest_idx = all_df['train_time'].idxmin()
fastest = all_df.loc[fastest_idx]
print(f"\n{'Fastest':15} â†’ {fastest['category']:10} | {fastest['model']:25} | {fastest['train_time']:.4f}s")

## 8. Final Analysis & Conclusions

In [None]:
# Compute category rankings
category_rankings = {}
for metric in ['f1_score', 'roc_auc']:
    means = all_df.groupby('category')[metric].mean().sort_values(ascending=False)
    category_rankings[metric] = means.index.tolist()

print("\n" + "="*80)
print("FINAL ANALYSIS & CONCLUSIONS")
print("="*80)

print("\nðŸ“Š PERFORMANCE RANKINGS (by average)")
print("-" * 40)
for metric, ranking in category_rankings.items():
    print(f"{metric.replace('_', ' ').title()}: {' > '.join(ranking)}")

print("\nâš¡ COMPUTATIONAL EFFICIENCY")
print("-" * 40)
time_means = all_df.groupby('category')['train_time'].mean().sort_values()
print(f"Training Time (avg): {' < '.join([f'{cat} ({val:.2f}s)' for cat, val in time_means.items()])}")

print("\nðŸŽ¯ KEY FINDINGS")
print("-" * 40)

# Classical analysis
classical_best = classical_df.loc[classical_df['f1_score'].idxmax()]
print(f"â€¢ Best Classical Model: {classical_best['model']} (F1: {classical_best['f1_score']:.4f})")
print(f"  - Fast training, interpretable, suitable for production")

# Neural analysis
neural_best = neural_df.loc[neural_df['f1_score'].idxmax()]
print(f"\nâ€¢ Best Neural Model: {neural_best['model']} (F1: {neural_best['f1_score']:.4f})")
print(f"  - More complex patterns, requires more data for best performance")

# QML analysis
qml_best = qml_df.loc[qml_df['f1_score'].idxmax()]
print(f"\nâ€¢ Best QML Model: {qml_best['model']} (F1: {qml_best['f1_score']:.4f})")
print(f"  - Experimental, limited by qubit count and circuit depth")
print(f"  - Shows promise but needs quantum hardware for practical advantage")

In [None]:
print("\n" + "="*80)
print("HONEST QML ASSESSMENT")
print("="*80)

print("""
STRENGTHS:
âœ“ Novel approach to feature space exploration
âœ“ Potential exponential speedup for specific problems
âœ“ Interesting research direction for complex patterns

LIMITATIONS (Current Study):
âœ— Limited qubit count (4 qubits) restricts expressivity
âœ— Simulator overhead - no real quantum advantage yet
âœ— Small dataset doesn't showcase QML strengths
âœ— Training time significantly higher than classical
âœ— Hyperparameter tuning is computationally expensive

RECOMMENDATIONS:
â†’ For production: Use classical ML (Random Forest, SVM, or XGBoost)
â†’ For research: Explore hybrid architectures with more qubits
â†’ Future work: Test on larger datasets with real quantum hardware
""")

In [None]:
# Save final summary
final_summary = {
    'total_models': len(all_df),
    'models_by_category': {
        'classical': len(classical_df),
        'neural': len(neural_df),
        'qml': len(qml_df)
    },
    'best_overall': {
        'model': all_df.loc[all_df['f1_score'].idxmax(), 'model'],
        'category': all_df.loc[all_df['f1_score'].idxmax(), 'category'],
        'f1_score': float(all_df['f1_score'].max())
    },
    'best_per_category': {
        cat: {
            'model': best_models[cat]['model'],
            'f1_score': float(best_models[cat]['f1_score'])
        } for cat in categories
    },
    'avg_metrics_by_category': {
        cat: {
            'f1_score': float(all_df[all_df['category'] == cat]['f1_score'].mean()),
            'roc_auc': float(all_df[all_df['category'] == cat]['roc_auc'].mean()),
            'train_time': float(all_df[all_df['category'] == cat]['train_time'].mean())
        } for cat in categories
    }
}

with open(RESULTS_DIR / 'final_summary.json', 'w') as f:
    json.dump(final_summary, f, indent=2)

print("\nâœ… Saved final summary to results/final_summary.json")
print("\n" + "="*80)
print("âœ… NOTEBOOK 12 COMPLETE - BENCHMARK PIPELINE FINISHED!")
print("="*80)