In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                           f1_score, roc_auc_score, confusion_matrix, 
                           classification_report, roc_curve, precision_recall_curve)
import joblib

# Load models
sgd_model = joblib.load('sgd_best_model.pkl')
rf_model = joblib.load('rf_best_model.pkl')

# Load and prepare test data
from hr_analytics_preparation import prepare_data
df = pd.read_csv('HR_data.csv')
X_train, X_test, y_train, y_test, preprocessor = prepare_data(df)

print("MODEL COMPARISON ANALYSIS")
print("="*60)

# Evaluate both models
models = {
    'Mini-batch GD Logistic Regression': sgd_model,
    'Random Forest': rf_model
}

results = {}

for name, model in models.items():
    print(f"\nEvaluating {name}...")
    
    # Predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred),
        'ROC-AUC': roc_auc_score(y_test, y_pred_proba)
    }
    
    print(f"Accuracy: {results[name]['Accuracy']:.4f}")
    print(f"Precision: {results[name]['Precision']:.4f}")
    print(f"Recall: {results[name]['Recall']:.4f}")
    print(f"F1-Score: {results[name]['F1-Score']:.4f}")
    print(f"ROC-AUC: {results[name]['ROC-AUC']:.4f}")

# Create comparison dataframe
results_df = pd.DataFrame(results).T
results_df = results_df.round(4)

print("\n" + "="*60)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*60)
print(results_df)

# Visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Model Comparison - HR Analytics Employee Turnover Prediction', fontsize=16)

# 1. Metrics comparison
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']
x_pos = np.arange(len(metrics))

for i, metric in enumerate(metrics):
    row, col = i // 3, i % 3
    values = [results[model][metric] for model in models.keys()]
    bars = axes[row, col].bar(models.keys(), values, alpha=0.7, color=['skyblue', 'lightcoral'])
    axes[row, col].set_title(f'{metric} Comparison')
    axes[row, col].set_ylabel(metric)
    axes[row, col].tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar, value in zip(bars, values):
        axes[row, col].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                           f'{value:.4f}', ha='center', va='bottom')

# 2. ROC Curve
plt.figure(figsize=(10, 8))
for name, model in models.items():
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    auc_score = roc_auc_score(y_test, y_pred_proba)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc_score:.4f})')

plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend()
plt.grid(True)
plt.show()

# 3. Precision-Recall Curve
plt.figure(figsize=(10, 8))
for name, model in models.items():
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    plt.plot(recall, precision, label=f'{name}')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve Comparison')
plt.legend()
plt.grid(True)
plt.show()

# 4. Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
for idx, (name, model) in enumerate(models.items()):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx])
    axes[idx].set_title(f'Confusion Matrix - {name}')
    axes[idx].set_xlabel('Predicted')
    axes[idx].set_ylabel('Actual')

plt.tight_layout()
plt.show()

print("\n" + "="*60)
print("METRICS PRIORITIZATION EXPLANATION")
print("="*60)
print("""
For employee turnover prediction, we prioritize the following metrics:

1. RECALL: Most important metric because we want to identify as many employees 
   who might leave as possible (minimize false negatives). Missing an employee 
   who will leave (false negative) is more costly than incorrectly predicting 
   someone will leave (false positive).

2. F1-Score: Balances precision and recall, giving us a single metric to 
   evaluate model performance considering both false positives and false negatives.

3. ROC-AUC: Measures the model's ability to distinguish between classes across 
   all classification thresholds.

4. PRECISION: Important but secondary - we want our predictions of who will 
   leave to be accurate, but it's better to be cautious and flag potential 
   leavers even if we're not 100% sure.

BUSINESS IMPACT:
- High Recall: Prevents surprise resignations, allows proactive retention efforts
- Moderate Precision: Avoids wasting too many resources on false alarms
- Overall: Better to be safe than sorry in employee retention
""")

# Final recommendation
best_model = results_df.loc[results_df['Recall'].idxmax()]
print(f"\nRECOMMENDED MODEL: {results_df['Recall'].idxmax()}")
print(f"Reason: Highest Recall ({best_model['Recall']}) - most important for employee retention")
print(f"Additional strength: F1-Score of {best_model['F1-Score']}")

# Save comparison results
results_df.to_csv('model_comparison_results.csv')
print("\nComparison results saved to 'model_comparison_results.csv'")

FileNotFoundError: [Errno 2] No such file or directory: 'sgd_best_model.pkl'