In [40]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                            classification_report, confusion_matrix, roc_curve, 
                            roc_auc_score, precision_recall_curve, average_precision_score,
                            matthews_corrcoef, balanced_accuracy_score)

In [41]:
# Load saved models and data
print("Loading saved models and data...")
import pickle
with open('churn_models_and_data.pkl', 'rb') as f:
    data = pickle.load(f)

trained_models = data['models']
X_test = data['X_test']
y_test = data['y_test']
feature_names = data['feature_names']

Loading saved models and data...


In [42]:
import os
if not os.path.exists('evaluation_plots'):
    os.makedirs('evaluation_plots')

In [43]:
# MODEL EVALUATION
print("\n=== MODEL EVALUATION ===")

# Initialize results dictionary
results = {
    'Model': [],
    'Accuracy': [],
    'Balanced Accuracy': [],
    'Precision': [],
    'Recall': [],
    'F1 Score': [],
    'AUC': [],
    'MCC': []  # Matthews Correlation Coefficient - good for imbalanced data
}


=== MODEL EVALUATION ===


In [44]:
# Function to create and save confusion matrix with detailed analysis
def plot_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    
    # Calculate confusion matrix statistics
    tn, fp, fn, tp = cm.ravel()
    total = tn + fp + fn + tp
    
    # Create figure with two subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Plot standard confusion matrix
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax1)
    ax1.set_title(f'{model_name} Confusion Matrix')
    ax1.set_xlabel('Predicted')
    ax1.set_ylabel('Actual')
    
    # Create a matrix for the right side with percentages and explanations
    cm_stats = np.array([
        [f"TN: {tn}\n({tn/total:.1%})\nTrue Negatives\n(Correctly predicted\nnon-churned)", 
         f"FP: {fp}\n({fp/total:.1%})\nFalse Positives\n(Incorrectly predicted\nas churned)"],
        [f"FN: {fn}\n({fn/total:.1%})\nFalse Negatives\n(Missed actual\nchurned customers)", 
         f"TP: {tp}\n({tp/total:.1%})\nTrue Positives\n(Correctly predicted\nchurned)"]
    ])
    
    # Plot the statistics explanation
    ax2.axis('off')
    ax2.table(cellText=cm_stats, loc='center', cellLoc='center', 
              colLabels=['Predicted NO', 'Predicted YES'],
              rowLabels=['Actual NO', 'Actual YES'])
    ax2.set_title('Confusion Matrix Explanation')
    
    plt.tight_layout()
    plt.savefig(f'evaluation_plots/{model_name.replace(" ", "_")}_confusion_matrix.png')
    plt.close()

In [45]:
# Function to create and save ROC curve
def plot_roc_curve(y_true, y_proba, model_name):
    plt.figure(figsize=(8, 6))
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    auc = roc_auc_score(y_true, y_proba)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {auc:.3f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend()
    plt.savefig(f'evaluation_plots/{model_name.replace(" ", "_")}_roc_curve.png')
    plt.close()
    return auc

In [46]:
# Function to create and save Precision-Recall curve
def plot_precision_recall_curve(y_true, y_proba, model_name):
    plt.figure(figsize=(8, 6))
    precision, recall, _ = precision_recall_curve(y_true, y_proba)
    avg_precision = average_precision_score(y_true, y_proba)
    plt.plot(recall, precision, label=f'AP = {avg_precision:.3f}')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - {model_name}')
    plt.legend()
    plt.savefig(f'evaluation_plots/{model_name.replace(" ", "_")}_pr_curve.png')
    plt.close()

In [47]:
for name, model in trained_models.items():
    print(f"\n--- Evaluating {name} ---")
    # Make predictions
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
  
    
    y_test = pd.Series(y_test).astype(int)
    y_pred = pd.Series(y_pred).astype(int)

    # Calculate metrics


    accuracy = accuracy_score(y_test, y_pred)
    balanced_acc = balanced_accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    mcc = matthews_corrcoef(y_test, y_pred)
    
    # Store results
    results['Model'].append(name)
    results['Accuracy'].append(accuracy)
    results['Balanced Accuracy'].append(balanced_acc)
    results['Precision'].append(precision)
    results['Recall'].append(recall)
    results['F1 Score'].append(f1)
    results['AUC'].append(auc)
    results['MCC'].append(mcc)
    
    # Print detailed metrics
    print(f"\nDetailed Metrics for {name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Balanced Accuracy: {balanced_acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"Matthews Correlation Coefficient: {mcc:.4f}")
    
    # Calculate confusion matrix metrics
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)
    false_positive_rate = fp / (fp + tn)
    false_negative_rate = fn / (fn + tp)
    
    print("\nConfusion Matrix Analysis:")
    print(f"True Negatives: {tn}")
    print(f"False Positives: {fp}")
    print(f"False Negatives: {fn}")
    print(f"True Positives: {tp}")
    print(f"Specificity (True Negative Rate): {specificity:.4f}")
    print(f"False Positive Rate: {false_positive_rate:.4f}")
    print(f"False Negative Rate: {false_negative_rate:.4f}")
    
    # Print classification report
    print(f"\nClassification Report for {name}:")
    print(classification_report(y_test, y_pred))
    
    # Plot confusion matrix
    plot_confusion_matrix(y_test, y_pred, name)
    
    # Plot ROC curve
    plot_roc_curve(y_test, y_proba, name)
    
    # Plot precision-recall curve
    plot_precision_recall_curve(y_test, y_proba, name)


--- Evaluating Logistic Regression ---

Detailed Metrics for Logistic Regression:
Accuracy: 0.9691
Balanced Accuracy: 0.9586
Precision: 0.8268
Recall: 0.9447
F1 Score: 0.8818
AUC: 0.9957
Matthews Correlation Coefficient: 0.8667

Confusion Matrix Analysis:
True Negatives: 19751
False Positives: 558
False Negatives: 156
True Positives: 2663
Specificity (True Negative Rate): 0.9725
False Positive Rate: 0.0275
False Negative Rate: 0.0553

Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     20309
           1       0.83      0.94      0.88      2819

    accuracy                           0.97     23128
   macro avg       0.91      0.96      0.93     23128
weighted avg       0.97      0.97      0.97     23128


--- Evaluating Random Forest ---

Detailed Metrics for Random Forest:
Accuracy: 0.9655
Balanced Accuracy: 0.8747
Precision: 0.9525
Recall: 0.7545
F1 Score: 0.8420
AUC: 0.9942
Matthews C

In [None]:
plt.figure(figsize=(10, 8))
for name, model in trained_models.items():
    y_proba = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    auc = roc_auc_score(y_test, y_proba)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.3f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend()
plt.savefig('evaluation_plots/roc_curve_comparison.png')
plt.close()

# Create comparison of metrics
results_df = pd.DataFrame(results)
results_df = results_df.set_index('Model')

# Save detailed metrics to CSV
results_df.to_csv('../data/model_evaluation_results.csv')

# Create radar chart for model comparison
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'AUC', 'MCC']
models = results_df.index

# Set up the radar chart
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, polar=True)

# Set the angles for each metric
angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False).tolist()
angles += angles[:1]  # Close the loop

# Plot each model
for i, model in enumerate(models):
    values = results_df.loc[model, metrics].values.tolist()
    values += values[:1]  # Close the loop
    
    ax.plot(angles, values, linewidth=2, linestyle='solid', label=model)
    ax.fill(angles, values, alpha=0.1)

# Set labels and title
ax.set_xticks(angles[:-1])
ax.set_xticklabels(metrics)
ax.set_title('Model Performance Comparison (Radar Chart)', size=15)
plt.legend(loc='upper right')
plt.tight_layout()
plt.savefig('evaluation_plots/radar_chart_comparison.png')
plt.close()

# Plot bar chart comparison of metrics
plt.figure(figsize=(14, 10))
for i, metric in enumerate(metrics):
    plt.subplot(3, 2, i+1)
    results_df[metric].plot(kind='bar')
    plt.title(f'Model Comparison - {metric}')
    plt.ylabel('Score')
    plt.ylim(0, 1)
    plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('evaluation_plots/metrics_comparison_detailed.png')
plt.close()

# Create another bar chart with all metrics together
plt.figure(figsize=(14, 8))
results_df[metrics].plot(kind='bar')
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('evaluation_plots/metrics_comparison.png')
plt.close()

print("\nEvaluation completed and saved to 'model_evaluation_results.csv'")
print("Evaluation plots have been saved to the 'evaluation_plots' directory")

# Display the best model based on different metrics
print("\nBest models by metric:")
for metric in metrics:
    best_model = results_df[metric].idxmax()
    best_score = results_df.loc[best_model, metric]
    print(f"Best model by {metric}: {best_model} ({best_score:.4f})")


Evaluation completed and saved to 'model_evaluation_results.csv'
Evaluation plots have been saved to the 'evaluation_plots' directory

Best models by metric:
Best model by Accuracy: Gradient Boosting (0.9866)
Best model by Precision: Random Forest (0.9525)
Best model by Recall: Gradient Boosting (0.9876)
Best model by F1 Score: Gradient Boosting (0.9471)
Best model by AUC: Gradient Boosting (0.9993)
Best model by MCC: Gradient Boosting (0.9404)


<Figure size 1400x800 with 0 Axes>