# Phase 5: Evaluation

## Fannie Mae 2008Q1 Stress Testing - Credit Default Risk Modeling

---

### CRISP-DM Phase 5: Evaluate and Compare Model Performance

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_curve, 
    precision_recall_curve, roc_auc_score
)
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

print("Libraries imported successfully!")

## 5.1 Load Results from Phase 4

In [None]:
# Load results
with open('phase4_results.pkl', 'rb') as f:
    data = pickle.load(f)

results = data['results']
comparison_df = data['comparison_df']
best_model_name = data['best_model_name']
y_test = data['y_test']
features = data['features']

print(f"Loaded results for {len(results)} models")
print(f"Best model: {best_model_name}")

## 5.2 Model Comparison Summary

In [None]:
# Display comparison with highlighting
print("Model Performance Comparison:")
print("="*60)
display(comparison_df.round(4).style.highlight_max(axis=0, color='lightgreen'))

## 5.3 ROC Curves Comparison

In [None]:
# Plot ROC curves
fig, ax = plt.subplots(figsize=(10, 8))

colors = ['#3498db', '#2ecc71', '#e74c3c', '#9b59b6']

for i, (model_name, res) in enumerate(results.items()):
    fpr, tpr, _ = roc_curve(y_test, res['y_pred_proba'])
    ax.plot(fpr, tpr, 
            label=f"{model_name} (AUC={res['auc_roc']:.3f})",
            linewidth=2, color=colors[i % len(colors)])

# Add diagonal line
ax.plot([0, 1], [0, 1], 'k--', alpha=0.5, label='Random (AUC=0.5)')

# Add 70% target line
ax.axhline(y=0.7, color='orange', linestyle=':', alpha=0.7, label='Target AUC=0.70')

ax.set_xlabel('False Positive Rate', fontsize=12)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_title('ROC Curves Comparison - Credit Default Models', fontsize=14, fontweight='bold')
ax.legend(loc='lower right', fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])

plt.tight_layout()
plt.savefig('Phase5_ROC_Curves.png', dpi=150)
plt.show()

print("\n✓ ROC curves saved to Phase5_ROC_Curves.png")

## 5.4 Best Model Analysis

In [None]:
# Confusion Matrix for best model
best_results = results[best_model_name]
cm = confusion_matrix(y_test, best_results['y_pred'])

print(f"\nConfusion Matrix - {best_model_name}:")
print("="*50)
print(f"True Negatives (TN):  {cm[0,0]:,}")
print(f"False Positives (FP): {cm[0,1]:,}")
print(f"False Negatives (FN): {cm[1,0]:,}")
print(f"True Positives (TP):  {cm[1,1]:,}")

# Visualize confusion matrix
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
            xticklabels=['No Default', 'Default'],
            yticklabels=['No Default', 'Default'],
            annot_kws={'size': 16})
ax.set_xlabel('Predicted', fontsize=12)
ax.set_ylabel('Actual', fontsize=12)
ax.set_title(f'Confusion Matrix - {best_model_name}', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('Phase5_Confusion_Matrix.png', dpi=150)
plt.show()

In [None]:
# Classification Report
print(f"\nClassification Report - {best_model_name}:")
print("="*60)
print(classification_report(y_test, best_results['y_pred'], 
                          target_names=['No Default', 'Default']))

## 5.5 Feature Importance

In [None]:
# Feature Importance
model = best_results['model']

if hasattr(model, 'feature_importances_'):
    importances = model.feature_importances_
    feat_imp_df = pd.DataFrame({
        'Feature': features,
        'Importance': importances
    }).sort_values('Importance', ascending=True)
    
    # Plot
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.barh(feat_imp_df['Feature'], feat_imp_df['Importance'], color='steelblue')
    ax.set_xlabel('Importance', fontsize=12)
    ax.set_title(f'Feature Importance - {best_model_name}', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    plt.savefig('Phase5_Feature_Importance.png', dpi=150)
    plt.show()
    
    print("\nTop 10 Features:")
    display(feat_imp_df.tail(10).sort_values('Importance', ascending=False))

elif hasattr(model, 'coef_'):
    coefs = model.coef_[0]
    feat_imp_df = pd.DataFrame({
        'Feature': features,
        'Coefficient': coefs,
        'Abs_Importance': np.abs(coefs)
    }).sort_values('Abs_Importance', ascending=False)
    
    print("\nTop 10 Features (by absolute coefficient):")
    display(feat_imp_df.head(10))
else:
    print("Feature importance not available for this model type.")
    feat_imp_df = None

## 5.6 Model Metrics Comparison Chart

In [None]:
# Bar chart comparison
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC']
x = np.arange(len(metrics))
width = 0.2
colors = ['#3498db', '#2ecc71', '#e74c3c', '#9b59b6']

fig, ax = plt.subplots(figsize=(12, 6))

for i, (model_name, res) in enumerate(results.items()):
    values = [res['accuracy'], res['precision'], res['recall'], res['f1_score'], res['auc_roc']]
    ax.bar(x + i*width, values, width, label=model_name, alpha=0.8, color=colors[i])

# Add target line for AUC
ax.axhline(y=0.70, color='red', linestyle='--', alpha=0.7, label='AUC Target (0.70)')

ax.set_xlabel('Metrics', fontsize=12)
ax.set_ylabel('Score', fontsize=12)
ax.set_title('Model Performance Comparison', fontsize=14, fontweight='bold')
ax.set_xticks(x + width * 1.5)
ax.set_xticklabels(metrics, fontsize=11)
ax.legend(loc='upper left', fontsize=9)
ax.set_ylim(0, 1.1)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('Phase5_Metrics_Comparison.png', dpi=150)
plt.show()

In [None]:
# Save evaluation results for Phase 6
evaluation_data = {
    'comparison_df': comparison_df,
    'best_model_name': best_model_name,
    'best_auc': comparison_df['AUC-ROC'].max(),
    'confusion_matrix': cm,
    'feature_importance_df': feat_imp_df if 'feat_imp_df' in dir() else None,
    'results': results,
    'y_test': y_test,
    'features': features
}

with open('phase5_evaluation.pkl', 'wb') as f:
    pickle.dump(evaluation_data, f)

print("\n✓ Evaluation results saved to phase5_evaluation.pkl")

---
## ✅ Phase 5 Complete

**Evaluation Summary**:
- All models evaluated and compared
- ROC curves, confusion matrix, and feature importance generated
- Best model identified with detailed analysis

**Visualizations Created**:
- Phase5_ROC_Curves.png
- Phase5_Confusion_Matrix.png
- Phase5_Feature_Importance.png
- Phase5_Metrics_Comparison.png

**Next**: Phase 6 - Deployment