# Enhanced Report Display - Individual Models vs Ensemble

This notebook displays comprehensive results comparing individual feature-based models with ensemble performance.

In [None]:
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from datetime import datetime
import glob
from tensorflow import keras

In [None]:
# Find the latest ensemble results file
ensemble_results_files = sorted(glob.glob("ensemble_results/ensemble_results_*.json"), reverse=True)
if not ensemble_results_files:
    raise FileNotFoundError("No ensemble results file found in ensemble_results/ directory.")
ENSEMBLE_RESULTS_PATH = ensemble_results_files[0]
ENSEMBLE_RESULTS_DIR = os.path.dirname(ENSEMBLE_RESULTS_PATH)

with open(ENSEMBLE_RESULTS_PATH, "r") as file:
    ensemble_results = json.load(file)

print(f"Loaded ensemble results from: {ENSEMBLE_RESULTS_PATH}")

In [None]:
def show_accuracy(accuracy_list, title="Accuracy"):
    """Display accuracy statistics."""
    print(f"{title} List:", accuracy_list)
    print(f"{title} mean: {np.array(accuracy_list).mean():.4f}")
    print(f"{title} std: {np.array(accuracy_list).std():.4f}")
    print(f"{title} min: {np.array(accuracy_list).min():.4f}")
    print(f"{title} max: {np.array(accuracy_list).max():.4f}")

In [None]:
def display_classification_reports(classification_reports, mappings, title="Classification Report"):
    """Display classification reports."""
    for i, report in enumerate(classification_reports):
        dfReport = pd.DataFrame(report).transpose()
        dfReport.rename(index=mappings, inplace=True)
        print(f"\n{title} {i+1}:")
        print(dfReport)

In [None]:
def plot_history(history, title="Training History"):
    """Plot training history."""
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history["accuracy"])
    plt.plot(history["val_accuracy"])
    plt.title(f"{title} - Accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")

    plt.subplot(1, 2, 2)
    plt.plot(history["loss"])
    plt.plot(history["val_loss"])
    plt.title(f"{title} - Loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")

    plt.tight_layout()
    plt.show()

In [None]:
def plot_confusion(confusion_matrices, mappings, title="Confusion Matrix"):
    """Plot confusion matrices."""
    for i, conf_matrix in enumerate(confusion_matrices):
        conf_matrix = np.array(conf_matrix)
        if conf_matrix.ndim == 1:
            conf_matrix = conf_matrix.reshape(-1, 1)
        
        labels = [mappings[str(i)] for i in range(len(mappings))]
        
        plt.figure(figsize=(10, 8))
        sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
        plt.yticks(rotation=45)
        plt.title(f"{title} - Cross validation {i+1}")
        plt.xlabel("Predicted Labels")
        plt.ylabel("True Labels")
        plt.tight_layout()
        plt.show()

In [None]:
# Individual Model Performance Analysis
print("="*60)
print("INDIVIDUAL MODEL PERFORMANCE ANALYSIS")
print("="*60)

individual_results = ensemble_data['individual_results']
instrument_mappings = ensemble_data['instrument_mappings']['name']

# Create performance summary DataFrame
performance_data = []
for feature_type, results in individual_results.items():
    accuracies = results['accuracy_list']
    losses = results['loss_list']
    
    performance_data.append({
        'Feature Type': feature_type.replace('_', ' ').title(),
        'Mean Accuracy': np.mean(accuracies),
        'Std Accuracy': np.std(accuracies),
        'Min Accuracy': np.min(accuracies),
        'Max Accuracy': np.max(accuracies),
        'Mean Loss': np.mean(losses),
        'Std Loss': np.std(losses)
    })

performance_df = pd.DataFrame(performance_data)
performance_df = performance_df.sort_values('Mean Accuracy', ascending=False)

print("\nIndividual Model Performance Summary:")
print(performance_df.round(4))

# Show detailed results for each feature type
for feature_type, results in individual_results.items():
    print(f"\n{'='*40}")
    print(f"{feature_type.replace('_', ' ').title()} Model")
    print(f"{'='*40}")
    
    show_accuracy(results['accuracy_list'], f"{feature_type} Accuracy")
    show_accuracy(results['loss_list'], f"{feature_type} Loss")
    
    # Display classification reports
    display_classification_reports(
        results['classification_reports'], 
        instrument_mappings, 
        f"{feature_type} Classification Report"
    )
    
    # Plot confusion matrices
    plot_confusion(
        results['confusion_matrices'], 
        instrument_mappings, 
        f"{feature_type} Confusion Matrix"
    )
    
    # Plot training histories (first fold only for brevity)
    if results['histories']:
        plot_history(
            results['histories'][0], 
            f"{feature_type} Training History (Fold 1)"
        )

In [None]:
# Ensemble Performance Analysis
print("\n" + "="*60)
print("ENSEMBLE PERFORMANCE ANALYSIS")
print("="*60)

ensemble_results = ensemble_data['ensemble_results']

print("\nEnsemble Model Performance:")
show_accuracy(ensemble_results['accuracy_list'], "Ensemble Accuracy")

# Display ensemble classification reports
display_classification_reports(
    ensemble_results['classification_reports'], 
    instrument_mappings, 
    "Ensemble Classification Report"
)

# Plot ensemble confusion matrices
plot_confusion(
    ensemble_results['confusion_matrices'], 
    instrument_mappings, 
    "Ensemble Confusion Matrix"
)

In [None]:
# Comparison Visualizations
print("\n" + "="*60)
print("COMPARISON VISUALIZATIONS")
print("="*60)

# 1. Performance Comparison Bar Chart
plt.figure(figsize=(15, 8))

feature_names = [ft.replace('_', '\n') for ft in performance_df['Feature Type']]
mean_accuracies = performance_df['Mean Accuracy'].values
std_accuracies = performance_df['Std Accuracy'].values

# Individual models
bars1 = plt.bar(range(len(feature_names)), mean_accuracies, 
                yerr=std_accuracies, capsize=5, alpha=0.8, 
                label='Individual Models')

# Ensemble (horizontal line)
ensemble_mean = np.mean(ensemble_results['accuracy_list'])
ensemble_std = np.std(ensemble_results['accuracy_list'])
plt.axhline(y=ensemble_mean, color='red', linestyle='--', linewidth=3, 
            label=f'Ensemble ({ensemble_mean:.4f})')
plt.fill_between([-0.5, len(feature_names)-0.5], 
                 ensemble_mean - ensemble_std, ensemble_mean + ensemble_std, 
                 alpha=0.2, color='red')

plt.xlabel('Feature Types')
plt.ylabel('Accuracy')
plt.title('Individual Models vs Ensemble Performance')
plt.xticks(range(len(feature_names)), feature_names, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# 2. Box Plot of Accuracy Distributions
plt.figure(figsize=(15, 8))

data_for_box = [individual_results[ft]['accuracy_list'] 
                for ft in performance_df['Feature Type'].str.replace(' ', '_').str.lower()]
labels = [ft.replace('_', '\n') for ft in performance_df['Feature Type']]

box_plot = plt.boxplot(data_for_box, labels=labels)
plt.axhline(y=ensemble_mean, color='red', linestyle='--', linewidth=2, 
            label=f'Ensemble Mean ({ensemble_mean:.4f})')
plt.ylabel('Accuracy')
plt.title('Accuracy Distribution Across Folds')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 3. Improvement Analysis
plt.figure(figsize=(12, 6))

improvements = []
for ft in performance_df['Feature Type'].str.replace(' ', '_').str.lower():
    individual_mean = np.mean(individual_results[ft]['accuracy_list'])
    improvement = ensemble_mean - individual_mean
    improvements.append(improvement)

colors = ['green' if imp > 0 else 'red' for imp in improvements]
bars = plt.bar(range(len(feature_names)), improvements, color=colors, alpha=0.7)
plt.axhline(y=0, color='black', linestyle='-', linewidth=1)

plt.xlabel('Feature Types')
plt.ylabel('Improvement (Ensemble - Individual)')
plt.title('Ensemble Improvement Over Individual Models')
plt.xticks(range(len(feature_names)), feature_names, rotation=45)
plt.grid(True, alpha=0.3)

# Add value labels on bars
for i, (bar, imp) in enumerate(zip(bars, improvements)):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
            f'{imp:.4f}', ha='center', va='bottom' if imp > 0 else 'top')

plt.tight_layout()
plt.show()

In [None]:
# Statistical Summary
print("\n" + "="*60)
print("STATISTICAL SUMMARY")
print("="*60)

# Find best and worst individual models
best_individual = performance_df.iloc[0]
worst_individual = performance_df.iloc[-1]

print(f"\nBest Individual Model: {best_individual['Feature Type']}")
print(f"  Accuracy: {best_individual['Mean Accuracy']:.4f} ± {best_individual['Std Accuracy']:.4f}")

print(f"\nWorst Individual Model: {worst_individual['Feature Type']}")
print(f"  Accuracy: {worst_individual['Mean Accuracy']:.4f} ± {worst_individual['Std Accuracy']:.4f}")

print(f"\nEnsemble Performance:")
print(f"  Accuracy: {ensemble_mean:.4f} ± {ensemble_std:.4f}")

# Calculate improvements
improvement_over_best = ensemble_mean - best_individual['Mean Accuracy']
improvement_over_worst = ensemble_mean - worst_individual['Mean Accuracy']

print(f"\nImprovement over best individual: {improvement_over_best:.4f} ({improvement_over_best*100:.2f}%)")
print(f"Improvement over worst individual: {improvement_over_worst:.4f} ({improvement_over_worst*100:.2f}%)")

# Model consistency analysis
print(f"\nModel Consistency (lower std = more consistent):")
consistency_ranking = performance_df.sort_values('Std Accuracy')
for _, row in consistency_ranking.iterrows():
    print(f"  {row['Feature Type']}: {row['Std Accuracy']:.4f}")

# Feature type analysis by category
print(f"\nFeature Type Analysis by Category:")
spectral_features = ['mel_spectrogram', 'stft', 'constant_q', 'harmonic_percussive']
cepstral_features = ['mfcc']
harmonic_features = ['chromagram', 'cqt', 'tonnetz']
other_features = ['spectral_contrast', 'onset_strength']

feature_categories = {
    'Spectral': spectral_features,
    'Cepstral': cepstral_features,
    'Harmonic': harmonic_features,
    'Other': other_features
}

for category, features in feature_categories.items():
    category_accuracies = []
    for feature in features:
        if feature in individual_results:
            category_accuracies.extend(individual_results[feature]['accuracy_list'])
    
    if category_accuracies:
        category_mean = np.mean(category_accuracies)
        category_std = np.std(category_accuracies)
        print(f"  {category}: {category_mean:.4f} ± {category_std:.4f}")

In [None]:
# Save comprehensive report
def save_comprehensive_report():
    """Save a comprehensive analysis report."""
    report = {
        'timestamp': datetime.now().isoformat(),
        'ensemble_path': ENSEMBLE_REPORT_PATH,
        'summary': {
            'num_instruments': len(instrument_mappings),
            'num_feature_types': len(individual_results),
            'num_folds': len(ensemble_results['accuracy_list']),
            'training_config': ensemble_data['training_config']
        },
        'performance': {
            'individual_models': performance_df.to_dict('records'),
            'ensemble': {
                'mean': ensemble_mean,
                'std': ensemble_std,
                'min': np.min(ensemble_results['accuracy_list']),
                'max': np.max(ensemble_results['accuracy_list'])
            }
        },
        'comparison': {
            'best_individual': best_individual['Feature Type'],
            'best_individual_accuracy': best_individual['Mean Accuracy'],
            'improvement_over_best': improvement_over_best,
            'improvement_over_worst': improvement_over_worst
        },
        'instrument_classes': list(instrument_mappings.values())
    }
    
    # Save report
    report_path = os.path.join(ENSEMBLE_REPORT_PATH, 'comprehensive_analysis.json')
    with open(report_path, 'w') as f:
        json.dump(report, f, indent=2, default=str)
    
    print(f"\nComprehensive analysis report saved to: {report_path}")
    
    return report

# Generate and save report
comprehensive_report = save_comprehensive_report()