In [None]:
# Import Required Libraries
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import json
import pickle
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úì All libraries imported successfully!")
print(f"Device: {device}")

## 1. Load Best Models and Results

In [None]:
# Load results from each model's directory
results_dirs = {
    'SimpleNN': 'simplenn_results',
    'MLP': 'mlp_results',
    'CNN': 'cnn_results',
    'Transfer Learning': 'transfer_learning_results'
}

# Function to load the most recent summary report
def load_latest_summary(results_dir):
    """Load the most recent summary report from results directory"""
    try:
        # Find all summary_report files
        files = [f for f in os.listdir(results_dir) if f.startswith('summary_report_')]
        if not files:
            print(f"  ‚úó No summary reports found in {results_dir}")
            return None
        
        # Sort by timestamp (most recent first)
        latest_file = sorted(files, reverse=True)[0]
        
        with open(os.path.join(results_dir, latest_file), 'r') as f:
            summary = json.load(f)
        return summary
    except Exception as e:
        print(f"  ‚úó Error loading summary from {results_dir}: {e}")
        return None

# Load all summaries
print("Loading model results...\n")
all_summaries = {}
for model_name, results_dir in results_dirs.items():
    print(f"  Loading {model_name}...")
    summary = load_latest_summary(results_dir)
    if summary:
        all_summaries[model_name] = summary
        print(f"    ‚úì Val Acc: {summary['best_val_accuracy']:.4f}, Test Acc: {summary['best_test_accuracy']:.4f}")
    else:
        print(f"    ‚úó Failed to load")

print(f"\n‚úì Successfully loaded {len(all_summaries)}/4 models")

## 2. Performance Comparison Table

In [None]:
# Create comparison DataFrame
comparison_data = []
for model_name, summary in all_summaries.items():
    comparison_data.append({
        'Model': model_name,
        'Val Accuracy': summary['best_val_accuracy'],
        'Test Accuracy': summary['best_test_accuracy'],
        'Tuning Combinations': summary['total_combinations'],
        'Top 5 Mean Val Acc': np.mean(summary['top5_accuracies']),
    })

comparison_df = pd.DataFrame(comparison_data).sort_values('Test Accuracy', ascending=False)

print("\n" + "="*80)
print("INTER-MODEL COMPARISON".center(80))
print("="*80)
print("\nPerformance Metrics (sorted by Test Accuracy):")
print(comparison_df.to_string(index=False))
print("="*80)

## 3. Best Hyperparameters Comparison

In [None]:
# Display best hyperparameters for each model
print("\n" + "="*80)
print("BEST HYPERPARAMETERS BY MODEL".center(80))
print("="*80)

for model_name, summary in all_summaries.items():
    print(f"\n{model_name}:")
    print("-" * 80)
    print(f"  Best Validation Accuracy: {summary['best_val_accuracy']:.4f}")
    print(f"  Best Test Accuracy: {summary['best_test_accuracy']:.4f}")
    print(f"  \n  Hyperparameters:")
    for key, value in summary['best_hyperparameters'].items():
        print(f"    {key}: {value}")

print("\n" + "="*80)

## 4. Accuracy Comparison Visualizations

In [None]:
# Create comparison plots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Inter-Model Performance Comparison', fontsize=16, fontweight='bold')

# 1. Validation Accuracy Comparison
ax = axes[0, 0]
models = list(all_summaries.keys())
val_accs = [all_summaries[m]['best_val_accuracy'] for m in models]
colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
bars1 = ax.bar(models, val_accs, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
ax.set_ylabel('Validation Accuracy', fontweight='bold')
ax.set_title('Validation Accuracy Comparison', fontweight='bold')
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.3, axis='y')
for bar, acc in zip(bars1, val_accs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.4f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 2. Test Accuracy Comparison
ax = axes[0, 1]
test_accs = [all_summaries[m]['best_test_accuracy'] for m in models]
bars2 = ax.bar(models, test_accs, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
ax.set_ylabel('Test Accuracy', fontweight='bold')
ax.set_title('Test Accuracy Comparison', fontweight='bold')
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.3, axis='y')
for bar, acc in zip(bars2, test_accs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.4f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 3. Top 5 Average Validation Accuracy
ax = axes[1, 0]
top5_avgs = [np.mean(all_summaries[m]['top5_accuracies']) for m in models]
bars3 = ax.bar(models, top5_avgs, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
ax.set_ylabel('Average Validation Accuracy', fontweight='bold')
ax.set_title('Top 5 Average Validation Accuracy', fontweight='bold')
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.3, axis='y')
for bar, avg in zip(bars3, top5_avgs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{avg:.4f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 4. Accuracy Gap (Val - Test)
ax = axes[1, 1]
gaps = [all_summaries[m]['best_val_accuracy'] - all_summaries[m]['best_test_accuracy'] for m in models]
bars4 = ax.bar(models, gaps, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
ax.set_ylabel('Accuracy Gap (Val - Test)', fontweight='bold')
ax.set_title('Overfitting Analysis (Gap)', fontweight='bold')
ax.axhline(y=0, color='r', linestyle='--', alpha=0.5)
ax.grid(True, alpha=0.3, axis='y')
for bar, gap in zip(bars4, gaps):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{gap:.4f}', ha='center', va='bottom' if gap >= 0 else 'top', fontweight='bold')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.savefig('inter_model_comparison.png', dpi=300, bbox_inches='tight')
print("‚úì Comparison plot saved as 'inter_model_comparison.png'")
plt.show()

## 5. Loss Curve Comparison

In [None]:
# Function to load training history
def load_best_history(results_dir):
    """Load the best model's training history"""
    try:
        files = [f for f in os.listdir(results_dir) if f.startswith('best_model_history_')]
        if not files:
            return None
        latest_file = sorted(files, reverse=True)[0]
        with open(os.path.join(results_dir, latest_file), 'rb') as f:
            history = pickle.load(f)
        return history
    except Exception as e:
        print(f"Error loading history from {results_dir}: {e}")
        return None

# Load histories for all models
print("\nLoading training histories...")
histories = {}
for model_name, results_dir in results_dirs.items():
    history = load_best_history(results_dir)
    if history:
        histories[model_name] = history
        print(f"  ‚úì {model_name}: {len(history['train_loss'])} epochs")
    else:
        print(f"  ‚úó {model_name}: Failed to load")

print(f"\n‚úì Successfully loaded {len(histories)}/4 histories")

In [None]:
# Plot training loss curves for all models
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Training Loss Curves - All Models (Best Configuration)', fontsize=16, fontweight='bold')

model_names = list(histories.keys())
colors_loss = ['blue', 'green', 'red', 'purple']

# Individual plots for each model
for idx, (model_name, history) in enumerate(histories.items()):
    ax = axes[idx // 2, idx % 2]
    
    epochs = range(1, len(history['train_loss']) + 1)
    ax.plot(epochs, history['train_loss'], 'b-', label='Training Loss', linewidth=2.5, marker='o', markersize=5)
    ax.plot(epochs, history['val_loss'], 'r-', label='Validation Loss', linewidth=2.5, marker='s', markersize=5)
    
    ax.set_title(f'{model_name}', fontweight='bold', fontsize=13)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.legend(loc='upper right', fontsize=11)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('loss_curves_all_models.png', dpi=300, bbox_inches='tight')
print("‚úì Loss curves plot saved as 'loss_curves_all_models.png'")
plt.show()

In [None]:
# Plot overlaid loss curves for comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Normalize epochs to 0-1 for comparison (since models train for different durations)
colors_map = {'SimpleNN': 'blue', 'MLP': 'green', 'CNN': 'red', 'Transfer Learning': 'purple'}

# Training Loss
for model_name, history in histories.items():
    epochs_normalized = np.linspace(0, 1, len(history['train_loss']))
    ax1.plot(epochs_normalized, history['train_loss'], label=model_name, 
             color=colors_map.get(model_name, 'black'), linewidth=2.5, marker='o', markersize=4)

ax1.set_title('Training Loss Comparison (Normalized Epochs)', fontweight='bold', fontsize=13)
ax1.set_xlabel('Normalized Epoch')
ax1.set_ylabel('Training Loss')
ax1.legend(fontsize=11, loc='best')
ax1.grid(True, alpha=0.3)

# Validation Loss
for model_name, history in histories.items():
    epochs_normalized = np.linspace(0, 1, len(history['val_loss']))
    ax2.plot(epochs_normalized, history['val_loss'], label=model_name, 
             color=colors_map.get(model_name, 'black'), linewidth=2.5, marker='s', markersize=4)

ax2.set_title('Validation Loss Comparison (Normalized Epochs)', fontweight='bold', fontsize=13)
ax2.set_xlabel('Normalized Epoch')
ax2.set_ylabel('Validation Loss')
ax2.legend(fontsize=11, loc='best')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('loss_curves_comparison_normalized.png', dpi=300, bbox_inches='tight')
print("‚úì Normalized loss curves plot saved as 'loss_curves_comparison_normalized.png'")
plt.show()

## 6. Final Ranking and Recommendations

In [None]:
# Create final ranking
print("\n" + "="*80)
print("FINAL RANKING - BEST TO WORST".center(80))
print("="*80)

ranking = comparison_df.sort_values('Test Accuracy', ascending=False).reset_index(drop=True)
ranking['Rank'] = range(1, len(ranking) + 1)

print("\n" + ranking[['Rank', 'Model', 'Val Accuracy', 'Test Accuracy', 'Top 5 Mean Val Acc']].to_string(index=False))

print("\n" + "="*80)
print("RECOMMENDATIONS".center(80))
print("="*80)

best_model = ranking.iloc[0]
print(f"\nüèÜ BEST OVERALL MODEL: {best_model['Model']}")
print(f"   - Test Accuracy: {best_model['Test Accuracy']:.4f}")
print(f"   - Validation Accuracy: {best_model['Val Accuracy']:.4f}")
print(f"   - Recommendation: Use this model for production deployment")

# Check for overfitting
gaps = comparison_df.copy()
gaps['Gap'] = gaps['Val Accuracy'] - gaps['Test Accuracy']
best_generalization = gaps.sort_values('Gap').iloc[0]

print(f"\nüí™ BEST GENERALIZATION (Lowest Overfitting): {best_generalization['Model']}")
print(f"   - Val-Test Gap: {best_generalization['Gap']:.4f}")
print(f"   - Recommendation: More robust to unseen data")

# Most efficient
efficient = ranking.sort_values('Tuning Combinations').iloc[0]
print(f"\n‚ö° MOST EFFICIENT: {efficient['Model']}")
print(f"   - Tuning Combinations: {int(efficient['Tuning Combinations'])}")
print(f"   - Test Accuracy: {efficient['Test Accuracy']:.4f}")
print(f"   - Recommendation: Faster tuning process")

print("\n" + "="*80)

## 7. Summary Report

In [None]:
# Save comprehensive comparison report
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
report = {
    'timestamp': timestamp,
    'comparison': comparison_df.to_dict('records'),
    'best_model': {
        'name': best_model['Model'],
        'test_accuracy': float(best_model['Test Accuracy']),
        'val_accuracy': float(best_model['Val Accuracy']),
        'hyperparameters': all_summaries[best_model['Model']]['best_hyperparameters']
    },
    'all_model_details': {model: summary for model, summary in all_summaries.items()}
}

with open(f'inter_model_comparison_report_{timestamp}.json', 'w') as f:
    json.dump(report, f, indent=4, default=str)

print(f"\n‚úì Comprehensive comparison report saved to: inter_model_comparison_report_{timestamp}.json")

# Save comparison table as CSV
comparison_df.to_csv(f'inter_model_comparison_{timestamp}.csv', index=False)
print(f"‚úì Comparison table saved to: inter_model_comparison_{timestamp}.csv")

print(f"\n{'='*80}")
print("INTER-MODEL COMPARISON COMPLETE!".center(80))
print(f"{'='*80}")