# Customer Churn Prediction - Results Summary

This notebook provides a comprehensive summary of the churn prediction pipeline results.

**Note**: This notebook only reads and visualizes outputs. No core logic is executed here.


In [None]:
import json
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Add project root to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

# Set style
plt.style.use('seaborn-v0_8')
%matplotlib inline


## 1. Model Performance Comparison


In [None]:
# Load model metrics
reports_dir = project_root / "reports"

baseline_metrics_path = reports_dir / "baseline_metrics.json"
tree_metrics_path = reports_dir / "random_forest_metrics.json"

models_data = []

if baseline_metrics_path.exists():
    with open(baseline_metrics_path) as f:
        baseline_data = json.load(f)
    models_data.append({
        "model": "Baseline (Logistic Regression)",
        "split": "test",
        **baseline_data["splits"]["test"]
    })

if tree_metrics_path.exists():
    with open(tree_metrics_path) as f:
        tree_data = json.load(f)
    models_data.append({
        "model": "Random Forest",
        "split": "test",
        **tree_data["splits"]["test"]
    })

if models_data:
    comparison_df = pd.DataFrame(models_data)
    print("Model Performance Comparison (Test Set)")
    print("=" * 60)
    display(comparison_df[["model", "accuracy", "precision", "recall", "f1", "roc_auc", "pr_auc"]].round(4))
else:
    print("No model metrics found. Please run the training pipeline first.")


In [None]:
# Visualize model comparison
if models_data:
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    metrics_to_plot = ["accuracy", "precision", "recall", "f1", "roc_auc", "pr_auc"]
    
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx // 3, idx % 3]
        values = [m[metric] for m in models_data if metric in m and m[metric] is not None]
        labels = [m["model"] for m in models_data if metric in m and m[metric] is not None]
        
        if values:
            ax.bar(labels, values, alpha=0.7)
            ax.set_title(metric.replace("_", " ").title(), fontsize=12, fontweight="bold")
            ax.set_ylabel("Score")
            ax.set_ylim(0, 1)
            ax.grid(True, alpha=0.3)
            
            # Add value labels on bars
            for i, v in enumerate(values):
                ax.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")
    
    plt.tight_layout()
    plt.show()
else:
    print("No data to visualize.")


## 2. Threshold Analysis


In [None]:
# Load threshold analysis
threshold_analysis_path = reports_dir / "threshold_analysis.json"

if threshold_analysis_path.exists():
    with open(threshold_analysis_path) as f:
        threshold_data = json.load(f)
    
    print("Threshold Analysis Summary")
    print("=" * 60)
    print(f"Optimal Threshold: {threshold_data['optimal_threshold']:.4f}")
    print(f"\nOptimal Threshold Metrics:")
    optimal_metrics = threshold_data["optimal_threshold_metrics"]
    print(f"  Precision: {optimal_metrics['precision']:.4f}")
    print(f"  Recall: {optimal_metrics['recall']:.4f}")
    print(f"  F1-Score: {optimal_metrics['f1']:.4f}")
    print(f"  False Positives: {optimal_metrics['false_positives']}")
    print(f"  False Negatives: {optimal_metrics['false_negatives']}")
    
    # Create threshold analysis DataFrame
    threshold_df = pd.DataFrame(threshold_data["threshold_analysis"])
    
    # Plot precision-recall vs threshold
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    ax1.plot(threshold_df["threshold"], threshold_df["precision"], label="Precision", linewidth=2)
    ax1.plot(threshold_df["threshold"], threshold_df["recall"], label="Recall", linewidth=2)
    ax1.axvline(threshold_data["optimal_threshold"], color="red", linestyle="--", label="Optimal")
    ax1.set_xlabel("Threshold")
    ax1.set_ylabel("Score")
    ax1.set_title("Precision and Recall vs Threshold", fontsize=14, fontweight="bold")
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    ax2.plot(threshold_df["threshold"], threshold_df["total_cost"], label="Total Cost", linewidth=2, color="red")
    ax2.axvline(threshold_data["optimal_threshold"], color="green", linestyle="--", label="Optimal")
    ax2.set_xlabel("Threshold")
    ax2.set_ylabel("Total Cost")
    ax2.set_title("Total Cost vs Threshold", fontsize=14, fontweight="bold")
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Threshold analysis not found. Run threshold analysis first.")


## 3. Business Evaluation


In [None]:
# Load business evaluation
business_eval_path = reports_dir / "business_evaluation_default.json"

if business_eval_path.exists():
    with open(business_eval_path) as f:
        business_data = json.load(f)
    
    print("Business Evaluation Summary")
    print("=" * 60)
    
    scenario = business_data["scenario"]
    print(f"Scenario: {scenario['name']}")
    print(f"Retention Cost: ${scenario['retention_cost_per_customer']:.2f}")
    print(f"Churn Loss: ${scenario['churn_loss_per_customer']:.2f}")
    if scenario["intervention_budget"]:
        print(f"Budget: ${scenario['intervention_budget']:,.2f}")
    
    best_threshold = business_data["best_threshold"]
    print(f"\nBest Threshold: {best_threshold['threshold']:.4f}")
    print(f"Net Gain: ${best_threshold['net_gain']:,.2f}")
    print(f"ROI: {best_threshold['roi_percent']:.2f}%")
    print(f"Interventions: {best_threshold['total_interventions']:,}")
    print(f"Prevented Churns: {best_threshold['prevented_churns']:,}")
    
    # Comparison table
    comparison = business_data["comparison"]
    comparison_df = pd.DataFrame([
        {"Strategy": "Baseline", **comparison["baseline"]},
        {"Strategy": "Optimized", **comparison["optimized"]},
        {"Strategy": "Incremental", **comparison["incremental"]},
    ])
    
    print("\nComparison Table:")
    display(comparison_df[["Strategy", "net_gain", "interventions", "investment", "roi_percent"]].round(2))
    
    # Visualize comparison
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    strategies = comparison_df["Strategy"]
    net_gains = comparison_df["net_gain"]
    rois = comparison_df["roi_percent"]
    
    axes[0].bar(strategies, net_gains, alpha=0.7, color=["gray", "green", "blue"])
    axes[0].set_title("Net Gain by Strategy", fontsize=14, fontweight="bold")
    axes[0].set_ylabel("Net Gain ($)")
    axes[0].grid(True, alpha=0.3)
    for i, v in enumerate(net_gains):
        axes[0].text(i, v, f"${v:,.0f}", ha="center", va="bottom")
    
    axes[1].bar(strategies, rois, alpha=0.7, color=["gray", "green", "blue"])
    axes[1].set_title("ROI by Strategy", fontsize=14, fontweight="bold")
    axes[1].set_ylabel("ROI (%)")
    axes[1].grid(True, alpha=0.3)
    for i, v in enumerate(rois):
        axes[1].text(i, v, f"{v:.1f}%", ha="center", va="bottom")
    
    plt.tight_layout()
    plt.show()
else:
    print("Business evaluation not found. Run business evaluation first.")


## 4. Error Analysis


In [None]:
# Check if error analysis report exists
error_report_path = reports_dir / "error_analysis.md"

if error_report_path.exists():
    print("Error Analysis Report")
    print("=" * 60)
    print("\nSee error_analysis.md for detailed segment analysis.")
    
    # Try to load error analysis plot
    error_plot_path = reports_dir / "error_analysis.png"
    if error_plot_path.exists():
        from IPython.display import Image
        display(Image(str(error_plot_path)))
    else:
        print("Error analysis plot not found.")
else:
    print("Error analysis not found. Run error analysis first.")


## 5. Key Conclusions


In [None]:
print("Key Conclusions")
print("=" * 60)

conclusions = []

# Model performance
if models_data:
    best_model = max(models_data, key=lambda x: x.get("roc_auc", 0) or 0)
    conclusions.append(f"✓ Best Model: {best_model['model']} with ROC-AUC of {best_model.get('roc_auc', 'N/A'):.4f}")

# Threshold optimization
if threshold_analysis_path.exists():
    with open(threshold_analysis_path) as f:
        threshold_data = json.load(f)
    optimal = threshold_data["optimal_threshold"]
    conclusions.append(f"✓ Optimal Threshold: {optimal:.4f} (minimizes total cost)")

# Business impact
if business_eval_path.exists():
    with open(business_eval_path) as f:
        business_data = json.load(f)
    incremental = business_data["comparison"]["incremental"]
    conclusions.append(f"✓ Incremental Net Gain: ${incremental['net_gain']:,.2f}")
    conclusions.append(f"✓ Incremental ROI: {incremental['roi_percent']:.2f}%")

if conclusions:
    for conclusion in conclusions:
        print(conclusion)
else:
    print("Run the full pipeline to generate conclusions.")


## 6. Next Steps

Based on the results:

1. **Model Improvement**: Consider hyperparameter tuning if not already done
2. **Feature Engineering**: Review error analysis to identify segments needing better features
3. **Threshold Optimization**: Adjust threshold based on business constraints
4. **Monitoring**: Set up monitoring for model performance in production
5. **A/B Testing**: Test the model in production with controlled experiments
