# Results Visualization

This notebook visualizes and compares the performance of all trained models.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import (
    confusion_matrix, classification_report,
    roc_curve, auc, RocCurveDisplay,
    precision_recall_curve, f1_score, accuracy_score
)
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import learning_curve

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load data
X_train = np.load('data/X_train_fe.npy')
X_test = np.load('data/X_test_fe.npy')
y_train = np.load('data/y_train_encoded.npy')
y_test = np.load('data/y_test_encoded.npy')

# Load label encoder
le = joblib.load('data/label_encoder.joblib')
class_names = le.classes_

# Load predictions
predictions = np.load('data/predictions.npz')

# Load results
baseline_results = pd.read_csv('data/baseline_results.csv')
comparison_results = pd.read_csv('data/comparison_results.csv')
hp_results = pd.read_csv('data/hyperparameter_results.csv')

print(f'Classes: {class_names}')
print(f'Test set size: {len(y_test)}')

## Model Performance Comparison

In [None]:
# Combine all results
all_results = pd.concat([
    comparison_results[['Model', 'Test Accuracy', 'Test F1']],
    hp_results[['Model', 'Test Accuracy', 'Test F1']]
], ignore_index=True)

# Sort by Test F1
all_results = all_results.sort_values('Test F1', ascending=True)

# Create figure
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# Accuracy comparison
colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(all_results)))
axes[0].barh(all_results['Model'], all_results['Test Accuracy'], color=colors)
axes[0].set_xlabel('Accuracy', fontsize=12)
axes[0].set_title('Test Accuracy by Model', fontsize=14, fontweight='bold')
axes[0].set_xlim(0, 1)
for i, v in enumerate(all_results['Test Accuracy']):
    axes[0].text(v + 0.01, i, f'{v:.3f}', va='center', fontsize=10)

# F1 Score comparison
axes[1].barh(all_results['Model'], all_results['Test F1'], color=colors)
axes[1].set_xlabel('F1 Score (Weighted)', fontsize=12)
axes[1].set_title('Test F1 Score by Model', fontsize=14, fontweight='bold')
axes[1].set_xlim(0, 1)
for i, v in enumerate(all_results['Test F1']):
    axes[1].text(v + 0.01, i, f'{v:.3f}', va='center', fontsize=10)

plt.tight_layout()
plt.savefig('data/model_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print('\nAll Models Ranked by F1 Score:')
print(all_results.sort_values('Test F1', ascending=False).to_string(index=False))

## Confusion Matrices

In [None]:
# Load models for confusion matrices
models = {
    'Random Forest': predictions['rf'],
    'Gradient Boosting': predictions['gradient_boost'],
    'AdaBoost': predictions['adaboost'],
    'Bagging (DT)': predictions['bagging_dt']
}

# Try to load XGBoost predictions
if 'xgboost' in predictions.files:
    models['XGBoost'] = predictions['xgboost']

# Create confusion matrix plots
n_models = len(models)
fig, axes = plt.subplots(2, (n_models + 1) // 2, figsize=(16, 12))
axes = axes.flatten()

for idx, (name, preds) in enumerate(models.items()):
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx],
                xticklabels=class_names, yticklabels=class_names)
    axes[idx].set_title(f'{name}', fontsize=12, fontweight='bold')
    axes[idx].set_xlabel('Predicted')
    axes[idx].set_ylabel('Actual')

# Hide empty subplots
for idx in range(len(models), len(axes)):
    axes[idx].set_visible(False)

plt.suptitle('Confusion Matrices - Top Models', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('data/confusion_matrices.png', dpi=150, bbox_inches='tight')
plt.show()

## ROC Curves (Multi-class)

In [None]:
# Load models that can provide probability estimates
rf_model = joblib.load('data/rf_classifier.joblib')
gb_model = joblib.load('data/gradient_boost.joblib')
ada_model = joblib.load('data/adaboost.joblib')

# Binarize the output for multi-class ROC
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
n_classes = y_test_bin.shape[1]

# Get probability predictions
model_probs = {
    'Random Forest': rf_model.predict_proba(X_test),
    'Gradient Boosting': gb_model.predict_proba(X_test),
    'AdaBoost': ada_model.predict_proba(X_test)
}

# Try XGBoost
try:
    xgb_model = joblib.load('data/xgboost.joblib')
    model_probs['XGBoost'] = xgb_model.predict_proba(X_test)
except:
    pass

# Plot ROC curves
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for class_idx in range(n_classes):
    ax = axes[class_idx]
    
    for model_name, y_proba in model_probs.items():
        fpr, tpr, _ = roc_curve(y_test_bin[:, class_idx], y_proba[:, class_idx])
        roc_auc = auc(fpr, tpr)
        ax.plot(fpr, tpr, linewidth=2, label=f'{model_name} (AUC = {roc_auc:.3f})')
    
    ax.plot([0, 1], [0, 1], 'k--', linewidth=1)
    ax.set_xlabel('False Positive Rate', fontsize=11)
    ax.set_ylabel('True Positive Rate', fontsize=11)
    ax.set_title(f'ROC Curve - {class_names[class_idx]}', fontsize=12, fontweight='bold')
    ax.legend(loc='lower right', fontsize=9)
    ax.set_xlim([0, 1])
    ax.set_ylim([0, 1.05])

plt.suptitle('ROC Curves by Class', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('data/roc_curves.png', dpi=150, bbox_inches='tight')
plt.show()

## Feature Importance

In [None]:
# Get feature importances from Random Forest
importances = rf_model.feature_importances_

# Load feature selector to get selected feature indices
selector = joblib.load('data/selector.joblib')
preprocessor_fe = joblib.load('data/preprocessor_fe.joblib')

# Get feature names
feature_names = preprocessor_fe.get_feature_names_out()
selected_mask = selector.get_support()
selected_features = feature_names[selected_mask]

# Create importance DataFrame
importance_df = pd.DataFrame({
    'feature': selected_features,
    'importance': importances
}).sort_values('importance', ascending=False)

# Plot top 25 features
fig, ax = plt.subplots(figsize=(12, 10))

top_25 = importance_df.head(25)
colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(top_25)))
bars = ax.barh(range(len(top_25)), top_25['importance'].values, color=colors)
ax.set_yticks(range(len(top_25)))
ax.set_yticklabels(top_25['feature'].values)
ax.set_xlabel('Importance', fontsize=12)
ax.set_title('Top 25 Feature Importances (Random Forest)', fontsize=14, fontweight='bold')
ax.invert_yaxis()

plt.tight_layout()
plt.savefig('data/feature_importance.png', dpi=150, bbox_inches='tight')
plt.show()

print('\nTop 10 Most Important Features:')
print(importance_df.head(10).to_string(index=False))

## Learning Curves

In [None]:
# Generate learning curves for best models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Random Forest learning curve
train_sizes, train_scores, test_scores = learning_curve(
    RandomForestClassifier(n_estimators=100, random_state=42),
    X_train, y_train,
    cv=5,
    n_jobs=-1,
    train_sizes=np.linspace(0.1, 1.0, 10),
    scoring='f1_weighted'
)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

axes[0].fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
axes[0].fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color='orange')
axes[0].plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
axes[0].plot(train_sizes, test_mean, 'o-', color='orange', label='Cross-validation Score')
axes[0].set_xlabel('Training Set Size', fontsize=11)
axes[0].set_ylabel('F1 Score', fontsize=11)
axes[0].set_title('Learning Curve - Random Forest', fontsize=12, fontweight='bold')
axes[0].legend(loc='lower right')
axes[0].grid(True)

# Gradient Boosting learning curve
train_sizes, train_scores, test_scores = learning_curve(
    GradientBoostingClassifier(n_estimators=100, random_state=42),
    X_train, y_train,
    cv=5,
    n_jobs=-1,
    train_sizes=np.linspace(0.1, 1.0, 10),
    scoring='f1_weighted'
)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

axes[1].fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
axes[1].fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color='orange')
axes[1].plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
axes[1].plot(train_sizes, test_mean, 'o-', color='orange', label='Cross-validation Score')
axes[1].set_xlabel('Training Set Size', fontsize=11)
axes[1].set_ylabel('F1 Score', fontsize=11)
axes[1].set_title('Learning Curve - Gradient Boosting', fontsize=12, fontweight='bold')
axes[1].legend(loc='lower right')
axes[1].grid(True)

plt.tight_layout()
plt.savefig('data/learning_curves.png', dpi=150, bbox_inches='tight')
plt.show()

## Final Summary

In [None]:
# Final model summary
print('='*70)
print('FINAL MODEL COMPARISON SUMMARY')
print('='*70)

# Sort all results by Test F1
final_ranking = all_results.sort_values('Test F1', ascending=False).reset_index(drop=True)
final_ranking.index = final_ranking.index + 1  # Start ranking from 1
final_ranking.index.name = 'Rank'

print('\nModels Ranked by Test F1 Score:')
print(final_ranking.to_string())

print('\n' + '='*70)
best_model = final_ranking.iloc[0]
print(f"BEST MODEL: {best_model['Model']}")
print(f"  - Test Accuracy: {best_model['Test Accuracy']:.4f}")
print(f"  - Test F1 Score: {best_model['Test F1']:.4f}")
print('='*70)

# Key insights
print('\nKEY INSIGHTS:')
print('  1. Ensemble methods (Bagging/Boosting) generally outperform simple classifiers')
print('  2. Feature engineering improved model performance across all classifiers')
print('  3. Gradient Boosting and Random Forest show best generalization')
print('  4. Class imbalance affects prediction of "Enrolled" students')

In [None]:
# Save final ranking
final_ranking.to_csv('data/final_model_ranking.csv')

print('\nAll visualizations saved to data/ directory:')
print('  - model_comparison.png')
print('  - confusion_matrices.png')
print('  - roc_curves.png')
print('  - feature_importance.png')
print('  - learning_curves.png')
print('  - final_model_ranking.csv')