# Notebook 05: Comparative Analysis

## Objectives
1. Load results from both models
2. Compare performance metrics
3. Statistical significance testing
4. Visualize comparisons
5. Generate final report

In [None]:
import sys
sys.path.append('..')

from src.utils.helpers import load_config, load_results
from src.models.random_forest import RandomForestSpeakerClassifier
from src.models.cnn_1d import CNN1DSpeakerClassifier
from src.evaluation.metrics import evaluate_model, compare_models, statistical_significance_test
from src.evaluation.visualization import plot_model_comparison, plot_confusion_matrix
from src.data.dataset import FeatureDataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

config = load_config('../config/config.yaml')

In [None]:
# Load test features
rf_test = FeatureDataset.load_pickle('../data/processed/test_aggregated.pkl')
X_test_rf, y_test = rf_test.get_data()

cnn_test = FeatureDataset.load_hdf5('../data/processed/test_sequential.h5')
X_test_cnn, y_test_cnn = cnn_test.get_data()

print(f'RF test features: {X_test_rf.shape}')
print(f'CNN test features: {X_test_cnn.shape}')

In [None]:
# Load models
num_speakers = len(set(y_test))

rf_model = RandomForestSpeakerClassifier(num_speakers, config)
rf_model.load('../models/random_forest_best.pkl')

cnn_model = CNN1DSpeakerClassifier(num_speakers, X_test_cnn.shape[1:], config)
cnn_model.load('../models/cnn_best.h5')

In [None]:
# Evaluate both models
rf_results = evaluate_model(rf_model, X_test_rf, y_test)
cnn_results = evaluate_model(cnn_model, X_test_cnn, y_test_cnn)

print('Random Forest Results:')
for metric, value in rf_results['metrics'].items():
    print(f'  {metric}: {value:.4f}')

print('\nCNN 1D Results:')
for metric, value in cnn_results['metrics'].items():
    print(f'  {metric}: {value:.4f}')

In [None]:
# Compare models
comparison_df = compare_models(
    rf_results['predictions'],
    cnn_results['predictions'],
    y_test,
    model1_name='Random Forest',
    model2_name='CNN 1D'
)

print('\nModel Comparison:')
print(comparison_df.to_string(index=False))

In [None]:
# Plot comparison
fig = plot_model_comparison(
    comparison_df,
    title='Random Forest vs CNN 1D - Performance Comparison',
    save_path='../results/comparison/model_comparison.png'
)
plt.show()

In [None]:
# Plot confusion matrices side by side
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

import seaborn as sns
sns.heatmap(rf_results['confusion_matrix_normalized'], annot=True, fmt='.2f', 
            cmap='Blues', ax=axes[0])
axes[0].set_title('Random Forest')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')

sns.heatmap(cnn_results['confusion_matrix_normalized'], annot=True, fmt='.2f', 
            cmap='Blues', ax=axes[1])
axes[1].set_title('CNN 1D')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')

plt.tight_layout()
plt.savefig('../results/comparison/confusion_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Statistical significance test
rf_correct = (rf_results['predictions'] == y_test).astype(float)
cnn_correct = (cnn_results['predictions'] == y_test_cnn).astype(float)

stat_test = statistical_significance_test(
    rf_correct,
    cnn_correct,
    test='wilcoxon',
    alpha=0.05
)

print('\nStatistical Significance Test:')
print(f"Test: {stat_test['test']}")
print(f"P-value: {stat_test['p_value']:.4f}")
print(f"Significant: {stat_test['is_significant']}")
print(f"\n{stat_test['interpretation']}")

In [None]:
# Per-speaker comparison
fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(num_speakers)
width = 0.35

rf_per_speaker = [rf_results['per_speaker_accuracy'][i] for i in range(num_speakers)]
cnn_per_speaker = [cnn_results['per_speaker_accuracy'][i] for i in range(num_speakers)]

ax.bar(x - width/2, rf_per_speaker, width, label='Random Forest', alpha=0.7)
ax.bar(x + width/2, cnn_per_speaker, width, label='CNN 1D', alpha=0.7)

ax.set_xlabel('Speaker')
ax.set_ylabel('Accuracy')
ax.set_title('Per-Speaker Accuracy Comparison')
ax.set_xticks(x)
ax.set_xticklabels([f'S{i}' for i in range(num_speakers)])
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('../results/comparison/per_speaker_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Final summary
print('=' * 60)
print('FINAL SUMMARY')
print('=' * 60)
print(f'\nRandom Forest Accuracy: {rf_results["metrics"]["accuracy"]:.4f}')
print(f'CNN 1D Accuracy: {cnn_results["metrics"]["accuracy"]:.4f}')
print(f'\nImprovement: {(cnn_results["metrics"]["accuracy"] - rf_results["metrics"]["accuracy"]) * 100:.2f}%')
print(f'\nStatistical Significance: {"Yes" if stat_test["is_significant"] else "No"}')
print(f'P-value: {stat_test["p_value"]:.4f}')
print('\n' + '=' * 60)