# Phase 4: Human Escalation System

**Objective:** Design and implement an intelligent escalation system

**Goals:**
- Define escalation criteria based on uncertainty
- Optimize thresholds for cost-benefit
- Evaluate system performance
- Analyze automation vs accuracy trade-offs
- Prepare for comprehensive evaluation

## 1. Setup and Imports

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import joblib
import sys
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append('../src')

# Custom modules
from escalation_system import (
    EscalationSystem,
    simulate_human_review,
    analyze_escalation_patterns
)

# Display settings
pd.set_option('display.max_columns', None)
%matplotlib inline
sns.set_style('whitegrid')

print("‚úÖ All libraries imported successfully")

## 2. Load Data and Uncertainty Estimates

In [None]:
# Load data splits
print("Loading data...")
X_val = pd.read_csv('../data/splits/X_val.csv')
X_test = pd.read_csv('../data/splits/X_test.csv')

# Preprocess data (same as Phase 2 & 3)
from sklearn.preprocessing import LabelEncoder
# Drop text columns
text_cols = ['title', 'desc']
existing_text_cols = [col for col in text_cols if col in X_val.columns]
if existing_text_cols:
    X_val = X_val.drop(columns=existing_text_cols)
    X_test = X_test.drop(columns=existing_text_cols)

# Convert non-numeric columns
non_numeric_cols = X_val.select_dtypes(include=['object', 'category']).columns
if len(non_numeric_cols) > 0:
    for col in non_numeric_cols:
        le = LabelEncoder()
        all_values = pd.concat([X_val[col], X_test[col]]).astype(str).unique()
        le.fit(all_values)
        X_val[col] = le.transform(X_val[col].astype(str))
        X_test[col] = le.transform(X_test[col].astype(str))

# Handle NaN values
X_val = X_val.fillna(0)
X_test = X_test.fillna(0)

# Load uncertainty estimates from Phase 3
uncertainty_data = joblib.load('../results/models/uncertainty_estimates.pkl')

# Extract validation data
proba_val = uncertainty_data['val']['proba']
uncertainty_val = uncertainty_data['val']['uncertainty']
y_val = uncertainty_data['val']['y_true']
y_pred_val = uncertainty_data['val']['y_pred']

# Extract test data
proba_test = uncertainty_data['test']['proba']
uncertainty_test = uncertainty_data['test']['uncertainty']
y_test = uncertainty_data['test']['y_true']
y_pred_test = uncertainty_data['test']['y_pred']

print(f"Validation samples: {len(y_val)}")
print(f"Test samples: {len(y_test)}")
print("‚úÖ Data loaded successfully")

## 3. Define Business Costs

In [None]:
# Define cost parameters (these should be adjusted based on business requirements)
print("Defining business costs...\n")

cost_params = {
    'cost_false_positive': 5.0,   # Cost of approving a bad loan (default)
    'cost_false_negative': 1.0,   # Cost of rejecting a good loan (opportunity cost)
    'cost_human_review': 0.5      # Cost of escalating to human expert
}

print("Business Cost Parameters:")
print(f"  False Positive (approve default): ${cost_params['cost_false_positive']}")
print(f"  False Negative (reject good):     ${cost_params['cost_false_negative']}")
print(f"  Human Review:                      ${cost_params['cost_human_review']}")
print(f"\nCost ratio (FP/FN): {cost_params['cost_false_positive']/cost_params['cost_false_negative']:.1f}:1")

## 4. Initialize Escalation System

In [None]:
# Initialize with default thresholds
escalation_system = EscalationSystem(
    uncertainty_threshold=0.1,
    confidence_threshold=0.7,
    **cost_params
)

print("‚úÖ Escalation system initialized")
print(f"\nInitial Configuration:")
print(f"  Uncertainty threshold: {escalation_system.uncertainty_threshold}")
print(f"  Confidence threshold:  {escalation_system.confidence_threshold}")

## 5. Baseline Evaluation (Before Optimization)

In [None]:
# Evaluate with initial thresholds
print("Evaluating with initial thresholds...\n")
metrics_initial = escalation_system.evaluate_system(
    y_val,
    y_pred_val,
    proba_val,
    uncertainty_val
)

print("Initial System Performance:")
print(f"  Automation Rate:      {metrics_initial['automation_rate']:.2%}")
print(f"  Samples Automated:    {metrics_initial['n_automated']}")
print(f"  Samples Escalated:    {metrics_initial['n_escalated']}")
print(f"\nAutomated Decisions:")
print(f"  Accuracy:             {metrics_initial['accuracy_automated']:.4f}")
print(f"  Precision:            {metrics_initial['precision_automated']:.4f}")
print(f"  Recall:               {metrics_initial['recall_automated']:.4f}")
print(f"  F1-Score:             {metrics_initial['f1_automated']:.4f}")
print(f"\nCosts:")
print(f"  Total Cost:           ${metrics_initial['total_cost']:.2f}")
print(f"  Baseline Cost:        ${metrics_initial['baseline_cost']:.2f}")
print(f"  Cost Savings:         ${metrics_initial['cost_savings']:.2f}")

## 6. Optimize Escalation Thresholds

In [None]:
# Optimize thresholds using grid search
print("\nOptimizing escalation thresholds using grid search...")
print("This may take a few minutes...\n")

best_unc_thresh, best_conf_thresh, optimization_results = escalation_system.optimize_thresholds(
    y_val,
    proba_val,
    y_pred_val,
    uncertainty_val,
    uncertainty_range=(0.05, 0.25),
    confidence_range=(0.55, 0.85),
    n_steps=15
)

print("\nTop 5 Configurations:")
print(optimization_results.head(5).to_string(index=False))

## 7. Visualize Optimization Results

In [None]:
# Plot cost vs automation rate trade-off
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Cost vs Automation Rate
axes[0, 0].scatter(optimization_results['automation_rate'], 
                   optimization_results['total_cost'], 
                   c=optimization_results['n_escalated'], 
                   cmap='viridis', s=50, alpha=0.6)
axes[0, 0].set_xlabel('Automation Rate', fontsize=12, fontweight='bold')
axes[0, 0].set_ylabel('Total Cost', fontsize=12, fontweight='bold')
axes[0, 0].set_title('Cost vs Automation Trade-off', fontsize=14, fontweight='bold')
axes[0, 0].grid(alpha=0.3)
cbar = plt.colorbar(axes[0, 0].collections[0], ax=axes[0, 0])
cbar.set_label('# Escalated', rotation=270, labelpad=15)

# Accuracy vs Automation Rate
axes[0, 1].scatter(optimization_results['automation_rate'],
                   optimization_results['accuracy_automated'],
                   c=optimization_results['total_cost'],
                   cmap='RdYlGn_r', s=50, alpha=0.6)
axes[0, 1].set_xlabel('Automation Rate', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('Automated Accuracy', fontsize=12, fontweight='bold')
axes[0, 1].set_title('Accuracy vs Automation', fontsize=14, fontweight='bold')
axes[0, 1].grid(alpha=0.3)
cbar = plt.colorbar(axes[0, 1].collections[0], ax=axes[0, 1])
cbar.set_label('Total Cost', rotation=270, labelpad=15)

# Uncertainty threshold impact
grouped = optimization_results.groupby('uncertainty_threshold').agg({
    'total_cost': 'mean',
    'automation_rate': 'mean'
})
axes[1, 0].plot(grouped.index, grouped['total_cost'], 'o-', linewidth=2, markersize=6)
axes[1, 0].set_xlabel('Uncertainty Threshold', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Average Total Cost', fontsize=12, fontweight='bold')
axes[1, 0].set_title('Impact of Uncertainty Threshold', fontsize=14, fontweight='bold')
axes[1, 0].grid(alpha=0.3)

# Confidence threshold impact
grouped = optimization_results.groupby('confidence_threshold').agg({
    'total_cost': 'mean',
    'automation_rate': 'mean'
})
axes[1, 1].plot(grouped.index, grouped['total_cost'], 's-', linewidth=2, markersize=6, color='coral')
axes[1, 1].set_xlabel('Confidence Threshold', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Average Total Cost', fontsize=12, fontweight='bold')
axes[1, 1].set_title('Impact of Confidence Threshold', fontsize=14, fontweight='bold')
axes[1, 1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../results/figures/threshold_optimization.png', dpi=300, bbox_inches='tight')
plt.show()
print("‚úÖ Optimization visualizations saved")

## 8. Evaluate Optimized System

In [None]:
# Evaluate with optimized thresholds
print("Evaluating with optimized thresholds...\n")
metrics_optimized = escalation_system.evaluate_system(
    y_val,
    y_pred_val,
    proba_val,
    uncertainty_val
)

print("Optimized System Performance:")
print(f"  Automation Rate:      {metrics_optimized['automation_rate']:.2%}")
print(f"  Samples Automated:    {metrics_optimized['n_automated']}")
print(f"  Samples Escalated:    {metrics_optimized['n_escalated']}")
print(f"\nAutomated Decisions:")
print(f"  Accuracy:             {metrics_optimized['accuracy_automated']:.4f}")
print(f"  Precision:            {metrics_optimized['precision_automated']:.4f}")
print(f"  Recall:               {metrics_optimized['recall_automated']:.4f}")
print(f"  F1-Score:             {metrics_optimized['f1_automated']:.4f}")
print(f"\nCosts:")
print(f"  Total Cost:           ${metrics_optimized['total_cost']:.2f}")
print(f"  Baseline Cost:        ${metrics_optimized['baseline_cost']:.2f}")
print(f"  Cost Savings:         ${metrics_optimized['cost_savings']:.2f}")

# Compare to initial
print(f"\nüìä Improvement vs Initial:")
print(f"  Cost Reduction:       {(metrics_initial['total_cost'] - metrics_optimized['total_cost']):.2f}")
print(f"  Automation Change:    {(metrics_optimized['automation_rate'] - metrics_initial['automation_rate']):.2%}")

## 9. Analyze Escalation Patterns

In [None]:
# Get detailed escalation information
escalate_mask, details = escalation_system.process_predictions(
    proba_val, uncertainty_val, return_details=True
)

# Analyze patterns
patterns = analyze_escalation_patterns(details, top_n=10)

print("Escalation Pattern Analysis:")
print(f"\nTotal Escalated: {patterns['total_escalated']}")
print(f"Escalation Rate: {patterns['escalation_rate']:.2%}")
print(f"\nAverage Metrics (Escalated Cases):")
print(f"  Uncertainty:  {patterns['avg_uncertainty']:.4f}")
print(f"  Confidence:   {patterns['avg_confidence']:.4f}")
print(f"  Probability:  {patterns['avg_probability']:.4f}")

print(f"\nTop Escalation Reasons:")
print(patterns['top_reasons'].to_string())

In [None]:
# Visualize escalation characteristics
escalated = details[details['escalate'] == True]
automated = details[details['escalate'] == False]

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Uncertainty distribution
axes[0, 0].hist(automated['uncertainty'], bins=50, alpha=0.6, label='Automated', color='green')
axes[0, 0].hist(escalated['uncertainty'], bins=50, alpha=0.6, label='Escalated', color='red')
axes[0, 0].axvline(escalation_system.uncertainty_threshold, color='black', linestyle='--', label='Threshold')
axes[0, 0].set_xlabel('Uncertainty', fontsize=12, fontweight='bold')
axes[0, 0].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[0, 0].set_title('Uncertainty Distribution', fontsize=14, fontweight='bold')
axes[0, 0].legend(fontsize=11)
axes[0, 0].grid(alpha=0.3)

# Confidence distribution
axes[0, 1].hist(automated['confidence'], bins=50, alpha=0.6, label='Automated', color='green')
axes[0, 1].hist(escalated['confidence'], bins=50, alpha=0.6, label='Escalated', color='red')
axes[0, 1].axvline(escalation_system.confidence_threshold, color='black', linestyle='--', label='Threshold')
axes[0, 1].set_xlabel('Confidence', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[0, 1].set_title('Confidence Distribution', fontsize=14, fontweight='bold')
axes[0, 1].legend(fontsize=11)
axes[0, 1].grid(alpha=0.3)

# Probability distribution
axes[1, 0].hist(automated['probability'], bins=50, alpha=0.6, label='Automated', color='green')
axes[1, 0].hist(escalated['probability'], bins=50, alpha=0.6, label='Escalated', color='red')
axes[1, 0].axvline(0.5, color='black', linestyle='--', label='Decision Boundary')
axes[1, 0].set_xlabel('Predicted Probability (Default)', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[1, 0].set_title('Probability Distribution', fontsize=14, fontweight='bold')
axes[1, 0].legend(fontsize=11)
axes[1, 0].grid(alpha=0.3)

# 2D scatter: Uncertainty vs Probability
axes[1, 1].scatter(automated['probability'], automated['uncertainty'], 
                   alpha=0.3, s=10, label='Automated', color='green')
axes[1, 1].scatter(escalated['probability'], escalated['uncertainty'],
                   alpha=0.3, s=10, label='Escalated', color='red')
axes[1, 1].axhline(escalation_system.uncertainty_threshold, color='black', linestyle='--', alpha=0.5)
axes[1, 1].axvline(0.5, color='black', linestyle='--', alpha=0.5)
axes[1, 1].set_xlabel('Predicted Probability (Default)', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Uncertainty', fontsize=12, fontweight='bold')
axes[1, 1].set_title('Escalation Decision Space', fontsize=14, fontweight='bold')
axes[1, 1].legend(fontsize=11)
axes[1, 1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../results/figures/escalation_characteristics.png', dpi=300, bbox_inches='tight')
plt.show()
print("‚úÖ Escalation characteristics saved")

## 10. Test Set Evaluation

In [None]:
# Evaluate on test set with optimized thresholds
print("Evaluating on test set...\n")
metrics_test = escalation_system.evaluate_system(
    y_test,
    y_pred_test,
    proba_test,
    uncertainty_test
)

print("Test Set Performance:")
print(f"  Automation Rate:      {metrics_test['automation_rate']:.2%}")
print(f"  Samples Automated:    {metrics_test['n_automated']}")
print(f"  Samples Escalated:    {metrics_test['n_escalated']}")
print(f"\nAutomated Decisions:")
print(f"  Accuracy:             {metrics_test['accuracy_automated']:.4f}")
print(f"  Precision:            {metrics_test['precision_automated']:.4f}")
print(f"  Recall:               {metrics_test['recall_automated']:.4f}")
print(f"  F1-Score:             {metrics_test['f1_automated']:.4f}")
print(f"\nCosts:")
print(f"  Total Cost:           ${metrics_test['total_cost']:.2f}")
print(f"  Baseline Cost:        ${metrics_test['baseline_cost']:.2f}")
print(f"  Cost Savings:         ${metrics_test['cost_savings']:.2f}")
print(f"  Savings Rate:         {metrics_test['cost_savings']/metrics_test['baseline_cost']*100:.1f}%")

## 11. Compare Performance Across Sets

In [None]:
# Create comparison dataframe
comparison = pd.DataFrame({
    'Metric': ['Automation Rate', 'Accuracy (Auto)', 'Precision (Auto)', 
               'Recall (Auto)', 'F1-Score (Auto)', 'Total Cost', 'Cost Savings'],
    'Validation': [
        f"{metrics_optimized['automation_rate']:.2%}",
        f"{metrics_optimized['accuracy_automated']:.4f}",
        f"{metrics_optimized['precision_automated']:.4f}",
        f"{metrics_optimized['recall_automated']:.4f}",
        f"{metrics_optimized['f1_automated']:.4f}",
        f"${metrics_optimized['total_cost']:.2f}",
        f"${metrics_optimized['cost_savings']:.2f}"
    ],
    'Test': [
        f"{metrics_test['automation_rate']:.2%}",
        f"{metrics_test['accuracy_automated']:.4f}",
        f"{metrics_test['precision_automated']:.4f}",
        f"{metrics_test['recall_automated']:.4f}",
        f"{metrics_test['f1_automated']:.4f}",
        f"${metrics_test['total_cost']:.2f}",
        f"${metrics_test['cost_savings']:.2f}"
    ]
})

print("\n" + "="*60)
print("PERFORMANCE COMPARISON: VALIDATION VS TEST")
print("="*60)
print(comparison.to_string(index=False))
print("="*60)

## 12. Save Escalation System

In [None]:
# Save escalation system
print("\nSaving escalation system...")
joblib.dump(escalation_system, '../results/models/escalation_system.pkl')
print("‚úÖ Escalation system saved")

# Save optimization results
optimization_results.to_csv('../results/reports/threshold_optimization_results.csv', index=False)
print("‚úÖ Optimization results saved")

# Save performance metrics
performance_summary = pd.DataFrame([
    {'Set': 'Validation (Initial)', **metrics_initial},
    {'Set': 'Validation (Optimized)', **metrics_optimized},
    {'Set': 'Test', **metrics_test}
])
performance_summary.to_csv('../results/reports/escalation_performance.csv', index=False)
print("‚úÖ Performance metrics saved")

## 13. Summary and Key Findings

In [None]:
print("="*60)
print("PHASE 4 COMPLETE: HUMAN ESCALATION SYSTEM")
print("="*60)

print("\n‚úÖ Achievements:")
print("   - Escalation system designed and implemented")
print("   - Thresholds optimized using cost-benefit analysis")
print("   - System evaluated on validation and test sets")
print("   - Escalation patterns analyzed")
print("   - All artifacts saved")

print("\nüìä Final Test Set Performance:")
print(f"   Automation Rate:      {metrics_test['automation_rate']:.2%}")
print(f"   Automated Accuracy:   {metrics_test['accuracy_automated']:.4f}")
print(f"   Cost Savings:         ${metrics_test['cost_savings']:.2f}")
print(f"   Savings Rate:         {metrics_test['cost_savings']/metrics_test['baseline_cost']*100:.1f}%")

print("\nüéØ Phase 4 Success Criteria:")
if metrics_test['automation_rate'] >= 0.70:
    print(f"   ‚úÖ Automation rate ‚â•70%: {metrics_test['automation_rate']:.2%}")
else:
    print(f"   ‚ö†Ô∏è  Automation rate: {metrics_test['automation_rate']:.2%}")

if metrics_test['accuracy_automated'] >= 0.85:
    print(f"   ‚úÖ Automated accuracy ‚â•85%: {metrics_test['accuracy_automated']:.4f}")
else:
    print(f"   ‚ö†Ô∏è  Automated accuracy: {metrics_test['accuracy_automated']:.4f}")

if metrics_test['cost_savings'] > 0:
    print(f"   ‚úÖ Positive cost savings: ${metrics_test['cost_savings']:.2f}")
else:
    print(f"   ‚ö†Ô∏è  Cost savings: ${metrics_test['cost_savings']:.2f}")

print("\nüöÄ Next: Phase 5 - Comprehensive Evaluation")
print("   - Full system evaluation with all components")
print("   - Interpretability analysis with SHAP")
print("   - Ablation studies")
print("   - Final visualizations and insights")
print("="*60)