# Model Comparison Analysis

This notebook compares the performance of different models trained on Day 6: Baseline XGBoost, SMOTE, Tuned XGBoost, Random Forest, and Ensemble Methods.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load results
baseline_df = pd.read_csv('../results/baseline_results.csv')
smote_df = pd.read_csv('../results/smote_results.csv')
tuned_df = pd.read_csv('../results/hyperparameter_tuning_results.csv')
rf_df = pd.read_csv('../results/random_forest_results.csv')
ensemble_df = pd.read_csv('../results/ensemble_results.csv')

# Prepare comparison data
comparison_data = []

for smell in baseline_df['smell'].unique():
    # Get best ensemble result (Voting or Stacking)
    best_ensemble = ensemble_df[ensemble_df['smell'] == smell]['val_f1'].max()
    
    comparison_data.append({
        'smell': smell,
        'Baseline XGBoost': baseline_df[baseline_df['smell'] == smell]['val_f1'].values[0],
        'XGBoost + SMOTE': smote_df[smote_df['smell'] == smell]['val_f1'].values[0],
        'XGBoost Tuned': tuned_df[tuned_df['smell'] == smell]['val_f1'].values[0],
        'Random Forest': rf_df[rf_df['smell'] == smell]['val_f1'].values[0],
        'Best Ensemble': best_ensemble
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df

In [None]:
# Visualization
fig, ax = plt.subplots(figsize=(14, 8))

comparison_df.set_index('smell').plot(kind='bar', ax=ax, width=0.8)

ax.set_xlabel('Code Smell', fontsize=12)
ax.set_ylabel('Validation F1 Score', fontsize=12)
ax.set_title('Model Comparison Across All Code Smells', fontsize=14, fontweight='bold')
ax.legend(loc='lower right')
ax.grid(axis='y', alpha=0.3)
ax.set_ylim(0, 1.1)

plt.tight_layout()
plt.savefig('../results/model_comparison.png', dpi=300)
plt.show()

In [None]:
# Summary Statistics
print("AVERAGE F1 SCORES:")
print("="*60)
print(comparison_df.mean(numeric_only=True))

print("\nIMPROVEMENT OVER BASELINE:")
print("="*60)
baseline_avg = comparison_df['Baseline XGBoost'].mean()
for col in comparison_df.columns[1:]:
    avg = comparison_df[col].mean()
    imp = (avg - baseline_avg) / baseline_avg * 100
    print(f"{col:20s}: {avg:.4f} ({imp:+.2f}%)")