# ⭐ Model Evaluation - Comprehensive Analysis

This notebook provides comprehensive evaluation of our predictive models for customer analytics.

## Objectives:
- Evaluate model performance using multiple metrics
- Conduct statistical significance testing
- Analyze feature importance and interpretability
- Assess model robustness and generalization
- Validate business impact and ROI
- Provide deployment recommendations


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Machine Learning evaluation imports
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, roc_curve, 
    precision_recall_curve, f1_score, precision_score, recall_score,
    mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
)
from sklearn.model_selection import cross_val_score, learning_curve, validation_curve
from sklearn.calibration import calibration_curve
from scipy import stats
import shap

import warnings
warnings.filterwarnings('ignore')

# Import custom modules
import sys
sys.path.append('../src')
from utils.evaluation import ModelEvaluator

print("📊 Model evaluation libraries loaded!")

# Set plotting style
plt.style.use('default')
sns.set_palette('husl')
print("🎨 Plotting configuration set")

In [None]:
# Load model results and predictions
try:
    predictions_df = pd.read_csv('../reports/analysis/04_model_predictions.csv')
    model_summary = pd.read_csv('../reports/analysis/04_model_performance_summary.csv')
    print(f"✅ Model results loaded successfully")
    print(f"Predictions shape: {predictions_df.shape}")
    display(predictions_df.head())
    print(f"\nModel Summary:")
    display(model_summary)
except FileNotFoundError:
    print("⚠️ Model results not found. Creating synthetic evaluation data...")
    # Create synthetic data for evaluation
    np.random.seed(42)
    n_samples = 1000
    
    predictions_df = pd.DataFrame({
        'Actual_Churn': np.random.binomial(1, 0.3, n_samples),
        'Predicted_Churn': np.random.binomial(1, 0.3, n_samples),
        'Churn_Probability': np.random.random(n_samples),
        'Actual_CLV': np.random.exponential(300, n_samples),
        'Predicted_CLV': np.random.exponential(300, n_samples)
    })
    
    model_summary = pd.DataFrame({
        'Model': ['Churn Prediction', 'CLV Prediction'],
        'Algorithm': ['Random Forest Classifier', 'Random Forest Regressor'],
        'Primary_Metric': [0.85, 0.75],
        'Metric_Name': ['AUC Score', 'R² Score'],
        'Secondary_Metric': [0.78, 45.2],
        'Secondary_Metric_Name': ['Accuracy', 'RMSE']
    })
    
    print("📊 Synthetic evaluation data created")

# Load original data for additional analysis
try:
    df = pd.read_csv('../data/processed/cleaned_data.csv')
    print(f"✅ Original dataset loaded: {df.shape}")
except FileNotFoundError:
    try:
        df = pd.read_csv('../data/raw/customer_shopping_data.csv')
    except FileNotFoundError:
        from utils.common import load_sample_data
        df = load_sample_data(n_customers=2000)
    print(f"Dataset shape: {df.shape}")

## 1. Churn Prediction Model Evaluation

In [None]:
# Comprehensive churn model evaluation
def evaluate_churn_model(predictions_df):
    """Comprehensive evaluation of churn prediction model"""
    
    y_true = predictions_df['Actual_Churn']
    y_pred = predictions_df['Predicted_Churn']
    y_proba = predictions_df['Churn_Probability']
    
    print("🎯 CHURN MODEL EVALUATION")
    print("=" * 50)
    
    # Basic metrics
    accuracy = (y_pred == y_true).mean()
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_proba)
    
    print(f"\n📊 CLASSIFICATION METRICS:")
    print(f"  Accuracy: {accuracy:.3f}")
    print(f"  Precision: {precision:.3f}")
    print(f"  Recall: {recall:.3f}")
    print(f"  F1-Score: {f1:.3f}")
    print(f"  AUC Score: {auc:.3f}")
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print(f"\n📋 CONFUSION MATRIX:")
    print(f"              Predicted")
    print(f"              0    1")
    print(f"Actual   0   {cm[0,0]:4d} {cm[0,1]:4d}")
    print(f"         1   {cm[1,0]:4d} {cm[1,1]:4d}")
    
    # Business impact metrics
    true_positives = cm[1, 1]
    false_positives = cm[0, 1]
    false_negatives = cm[1, 0]
    true_negatives = cm[0, 0]
    
    print(f"\n💼 BUSINESS IMPACT ANALYSIS:")
    print(f"  Correctly Identified Churners: {true_positives} (Potential saves)")
    print(f"  Missed Churners: {false_negatives} (Lost customers)")
    print(f"  False Alarms: {false_positives} (Unnecessary interventions)")
    print(f"  Correctly Identified Loyal: {true_negatives} (No action needed)")
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc,
        'confusion_matrix': cm
    }

# Evaluate churn model
churn_metrics = evaluate_churn_model(predictions_df)

In [None]:
# Visualize churn model performance
def visualize_churn_evaluation(predictions_df, metrics):
    """Create comprehensive visualization of churn model performance"""
    
    y_true = predictions_df['Actual_Churn']
    y_pred = predictions_df['Predicted_Churn']
    y_proba = predictions_df['Churn_Probability']
    
    fig = make_subplots(
        rows=2, cols=3,
        subplot_titles=['Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve',
                       'Probability Distribution', 'Calibration Plot', 'Threshold Analysis'],
        specs=[[{'type': 'heatmap'}, {'type': 'scatter'}, {'type': 'scatter'}],
               [{'type': 'histogram'}, {'type': 'scatter'}, {'type': 'scatter'}]]
    )
    
    # Confusion Matrix Heatmap
    cm = metrics['confusion_matrix']
    fig.add_trace(
        go.Heatmap(
            z=cm,
            x=['Predicted 0', 'Predicted 1'],
            y=['Actual 0', 'Actual 1'],
            colorscale='Blues',
            text=cm,
            texttemplate="%{text}",
            textfont={'size': 16},
            showscale=False
        ),
        row=1, col=1
    )
    
    # ROC Curve
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    fig.add_trace(
        go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC={metrics["auc"]:.3f})'),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), 
                  name='Random', showlegend=False),
        row=1, col=2
    )
    
    # Precision-Recall Curve
    precision_vals, recall_vals, _ = precision_recall_curve(y_true, y_proba)
    fig.add_trace(
        go.Scatter(x=recall_vals, y=precision_vals, mode='lines', 
                  name=f'PR Curve'),
        row=1, col=3
    )
    
    # Probability Distribution
    fig.add_trace(
        go.Histogram(x=y_proba[y_true == 0], name='Non-Churners', opacity=0.7, nbinsx=30),
        row=2, col=1
    )
    fig.add_trace(
        go.Histogram(x=y_proba[y_true == 1], name='Churners', opacity=0.7, nbinsx=30),
        row=2, col=1
    )
    
    # Calibration Plot
    try:
        fraction_of_positives, mean_predicted_value = calibration_curve(y_true, y_proba, n_bins=10)
        fig.add_trace(
            go.Scatter(x=mean_predicted_value, y=fraction_of_positives, 
                      mode='lines+markers', name='Calibration'),
            row=2, col=2
        )
        fig.add_trace(
            go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'),
                      name='Perfect', showlegend=False),
            row=2, col=2
        )
    except Exception as e:
        print(f"Calibration plot error: {e}")
    
    # Threshold Analysis
    thresholds = np.arange(0.1, 1.0, 0.05)
    precisions = []
    recalls = []
    f1_scores = []
    
    for threshold in thresholds:
        y_pred_thresh = (y_proba >= threshold).astype(int)
        if y_pred_thresh.sum() > 0 and (1 - y_pred_thresh).sum() > 0:
            precisions.append(precision_score(y_true, y_pred_thresh))
            recalls.append(recall_score(y_true, y_pred_thresh))
            f1_scores.append(f1_score(y_true, y_pred_thresh))
        else:
            precisions.append(0)
            recalls.append(0)
            f1_scores.append(0)
    
    fig.add_trace(
        go.Scatter(x=thresholds, y=precisions, mode='lines', name='Precision'),
        row=2, col=3
    )
    fig.add_trace(
        go.Scatter(x=thresholds, y=recalls, mode='lines', name='Recall'),
        row=2, col=3
    )
    fig.add_trace(
        go.Scatter(x=thresholds, y=f1_scores, mode='lines', name='F1-Score'),
        row=2, col=3
    )
    
    fig.update_layout(height=800, title_text="Churn Model Performance Analysis")
    fig.show()

# Visualize churn evaluation
visualize_churn_evaluation(predictions_df, churn_metrics)

## 2. Customer Lifetime Value (CLV) Model Evaluation

In [None]:
# Comprehensive CLV model evaluation
def evaluate_clv_model(predictions_df):
    """Comprehensive evaluation of CLV prediction model"""
    
    y_true = predictions_df['Actual_CLV']
    y_pred = predictions_df['Predicted_CLV']
    
    print("💎 CLV MODEL EVALUATION")
    print("=" * 50)
    
    # Regression metrics
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    explained_var = explained_variance_score(y_true, y_pred)
    
    # Additional metrics
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100  # Mean Absolute Percentage Error
    max_error = np.max(np.abs(y_true - y_pred))
    
    print(f"\n📊 REGRESSION METRICS:")
    print(f"  R² Score: {r2:.3f}")
    print(f"  RMSE: ${rmse:.2f}")
    print(f"  MAE: ${mae:.2f}")
    print(f"  Explained Variance: {explained_var:.3f}")
    print(f"  MAPE: {mape:.2f}%")
    print(f"  Max Error: ${max_error:.2f}")
    
    # Error analysis
    errors = y_pred - y_true
    mean_error = np.mean(errors)
    std_error = np.std(errors)
    
    print(f"\n📈 ERROR ANALYSIS:")
    print(f"  Mean Error (Bias): ${mean_error:.2f}")
    print(f"  Error Std Dev: ${std_error:.2f}")
    print(f"  Error Range: ${errors.min():.2f} to ${errors.max():.2f}")
    
    # Business impact analysis
    actual_total_clv = y_true.sum()
    predicted_total_clv = y_pred.sum()
    total_error = predicted_total_clv - actual_total_clv
    
    print(f"\n💼 BUSINESS IMPACT ANALYSIS:")
    print(f"  Actual Total CLV: ${actual_total_clv:,.2f}")
    print(f"  Predicted Total CLV: ${predicted_total_clv:,.2f}")
    print(f"  Total Prediction Error: ${total_error:,.2f} ({total_error/actual_total_clv*100:.1f}%)")
    
    # CLV quartile analysis
    y_true_quartiles = pd.qcut(y_true, 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])
    quartile_performance = []
    
    for quartile in ['Q1', 'Q2', 'Q3', 'Q4']:
        mask = y_true_quartiles == quartile
        q_r2 = r2_score(y_true[mask], y_pred[mask])
        q_mae = mean_absolute_error(y_true[mask], y_pred[mask])
        quartile_performance.append({
            'Quartile': quartile,
            'R2': q_r2,
            'MAE': q_mae,
            'Count': mask.sum(),
            'Avg_Actual': y_true[mask].mean(),
            'Avg_Predicted': y_pred[mask].mean()
        })
    
    quartile_df = pd.DataFrame(quartile_performance)
    print(f"\n📊 PERFORMANCE BY CLV QUARTILE:")
    display(quartile_df.round(3))
    
    return {
        'r2': r2,
        'rmse': rmse,
        'mae': mae,
        'mape': mape,
        'explained_variance': explained_var,
        'errors': errors,
        'quartile_performance': quartile_df
    }

# Evaluate CLV model
clv_metrics = evaluate_clv_model(predictions_df)

In [None]:
# Visualize CLV model performance
def visualize_clv_evaluation(predictions_df, metrics):
    """Create comprehensive visualization of CLV model performance"""
    
    y_true = predictions_df['Actual_CLV']
    y_pred = predictions_df['Predicted_CLV']
    errors = metrics['errors']
    
    fig = make_subplots(
        rows=2, cols=3,
        subplot_titles=['Actual vs Predicted', 'Residual Plot', 'Error Distribution',
                       'CLV Distribution Comparison', 'Quartile Performance', 'Prediction Intervals']
    )
    
    # Actual vs Predicted Scatter
    fig.add_trace(
        go.Scatter(x=y_true, y=y_pred, mode='markers', name='Predictions', opacity=0.6),
        row=1, col=1
    )
    
    # Perfect prediction line
    min_val = min(y_true.min(), y_pred.min())
    max_val = max(y_true.max(), y_pred.max())
    fig.add_trace(
        go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode='lines',
                  line=dict(dash='dash', color='red'), name='Perfect Prediction'),
        row=1, col=1
    )
    
    # Residual Plot
    fig.add_trace(
        go.Scatter(x=y_pred, y=errors, mode='markers', name='Residuals', opacity=0.6),
        row=1, col=2
    )
    fig.add_hline(y=0, line_dash="dash", line_color="red", row=1, col=2)
    
    # Error Distribution
    fig.add_trace(
        go.Histogram(x=errors, name='Error Distribution', nbinsx=30),
        row=1, col=3
    )
    
    # CLV Distribution Comparison
    fig.add_trace(
        go.Histogram(x=y_true, name='Actual CLV', opacity=0.7, nbinsx=30),
        row=2, col=1
    )
    fig.add_trace(
        go.Histogram(x=y_pred, name='Predicted CLV', opacity=0.7, nbinsx=30),
        row=2, col=1
    )
    
    # Quartile Performance
    quartile_df = metrics['quartile_performance']
    fig.add_trace(
        go.Bar(x=quartile_df['Quartile'], y=quartile_df['R2'], name='R² by Quartile'),
        row=2, col=2
    )
    
    # Prediction Intervals (simplified)
    sorted_indices = np.argsort(y_true)
    sorted_true = y_true.iloc[sorted_indices]
    sorted_pred = y_pred.iloc[sorted_indices]
    
    # Calculate rolling standard deviation for prediction intervals
    window_size = max(50, len(sorted_true) // 20)
    rolling_std = pd.Series(errors).rolling(window=window_size, center=True).std().fillna(errors.std())
    
    fig.add_trace(
        go.Scatter(x=sorted_true, y=sorted_pred, mode='lines', name='Predictions'),
        row=2, col=3
    )
    
    fig.update_layout(height=800, title_text="CLV Model Performance Analysis")
    fig.show()

# Visualize CLV evaluation
visualize_clv_evaluation(predictions_df, clv_metrics)

## 3. Model Robustness and Stability Analysis

In [None]:
# Cross-validation stability analysis
def analyze_model_stability(predictions_df):
    """Analyze model stability and robustness"""
    
    print("🔒 MODEL STABILITY ANALYSIS")
    print("=" * 50)
    
    # Bootstrap analysis for confidence intervals
    n_bootstrap = 100
    bootstrap_results = {'churn_auc': [], 'clv_r2': []}
    
    np.random.seed(42)
    for i in range(n_bootstrap):
        # Sample with replacement
        sample_indices = np.random.choice(len(predictions_df), size=len(predictions_df), replace=True)
        sample_df = predictions_df.iloc[sample_indices]
        
        # Calculate metrics for bootstrap sample
        try:
            churn_auc = roc_auc_score(sample_df['Actual_Churn'], sample_df['Churn_Probability'])
            clv_r2 = r2_score(sample_df['Actual_CLV'], sample_df['Predicted_CLV'])
            
            bootstrap_results['churn_auc'].append(churn_auc)
            bootstrap_results['clv_r2'].append(clv_r2)
        except:
            continue
    
    # Calculate confidence intervals
    churn_auc_ci = np.percentile(bootstrap_results['churn_auc'], [2.5, 97.5])
    clv_r2_ci = np.percentile(bootstrap_results['clv_r2'], [2.5, 97.5])
    
    print(f"\n📊 BOOTSTRAP CONFIDENCE INTERVALS (95%):")
    print(f"  Churn AUC: {np.mean(bootstrap_results['churn_auc']):.3f} [{churn_auc_ci[0]:.3f}, {churn_auc_ci[1]:.3f}]")
    print(f"  CLV R²: {np.mean(bootstrap_results['clv_r2']):.3f} [{clv_r2_ci[0]:.3f}, {clv_r2_ci[1]:.3f}]")
    
    # Stability metrics
    churn_auc_std = np.std(bootstrap_results['churn_auc'])
    clv_r2_std = np.std(bootstrap_results['clv_r2'])
    
    print(f"\n📈 STABILITY METRICS:")
    print(f"  Churn AUC Std Dev: {churn_auc_std:.3f} (Lower is more stable)")
    print(f"  CLV R² Std Dev: {clv_r2_std:.3f} (Lower is more stable)")
    
    # Performance consistency check
    churn_consistency = (churn_auc_std < 0.05)  # AUC should not vary by more than 0.05
    clv_consistency = (clv_r2_std < 0.05)  # R² should not vary by more than 0.05
    
    print(f"\n✅ CONSISTENCY CHECK:")
    print(f"  Churn Model: {'Stable' if churn_consistency else 'Unstable'}")
    print(f"  CLV Model: {'Stable' if clv_consistency else 'Unstable'}")
    
    return {
        'bootstrap_results': bootstrap_results,
        'confidence_intervals': {
            'churn_auc': churn_auc_ci,
            'clv_r2': clv_r2_ci
        },
        'stability': {
            'churn_stable': churn_consistency,
            'clv_stable': clv_consistency
        }
    }

# Analyze model stability
stability_results = analyze_model_stability(predictions_df)

# Visualize stability analysis
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=['Churn AUC Bootstrap Distribution', 'CLV R² Bootstrap Distribution']
)

# Churn AUC distribution
fig.add_trace(
    go.Histogram(x=stability_results['bootstrap_results']['churn_auc'], 
                name='Churn AUC', nbinsx=20),
    row=1, col=1
)

# CLV R² distribution
fig.add_trace(
    go.Histogram(x=stability_results['bootstrap_results']['clv_r2'], 
                name='CLV R²', nbinsx=20),
    row=1, col=2
)

fig.update_layout(height=400, title_text="Model Stability Analysis - Bootstrap Distributions")
fig.show()

## 4. Business Impact Assessment

In [None]:
# Business impact assessment
def assess_business_impact(predictions_df, df):
    """Assess the business impact of the predictive models"""
    
    print("💼 BUSINESS IMPACT ASSESSMENT")
    print("=" * 50)
    
    # Churn prevention impact
    y_true_churn = predictions_df['Actual_Churn']
    y_pred_churn = predictions_df['Predicted_Churn']
    y_proba_churn = predictions_df['Churn_Probability']
    
    # Assume average customer value and retention campaign costs
    avg_customer_value = predictions_df['Actual_CLV'].mean() if 'Actual_CLV' in predictions_df.columns else 200
    retention_campaign_cost = 20  # Cost per targeted customer
    retention_success_rate = 0.3  # 30% of targeted churners can be saved
    
    # Calculate potential savings
    high_risk_customers = (y_proba_churn >= 0.7).sum()  # High churn probability
    true_churners_identified = ((y_proba_churn >= 0.7) & (y_true_churn == 1)).sum()
    
    campaign_cost = high_risk_customers * retention_campaign_cost
    customers_saved = true_churners_identified * retention_success_rate
    revenue_saved = customers_saved * avg_customer_value
    net_benefit = revenue_saved - campaign_cost
    
    print(f"\n🎯 CHURN PREVENTION IMPACT:")
    print(f"  High-risk customers identified: {high_risk_customers:,}")
    print(f"  True churners in high-risk group: {true_churners_identified:,}")
    print(f"  Retention campaign cost: ${campaign_cost:,.2f}")
    print(f"  Customers potentially saved: {customers_saved:.0f}")
    print(f"  Revenue saved: ${revenue_saved:,.2f}")
    print(f"  Net benefit: ${net_benefit:,.2f}")
    print(f"  ROI: {(net_benefit / campaign_cost) * 100:.1f}%" if campaign_cost > 0 else "  ROI: N/A")
    
    # CLV-based resource allocation
    y_true_clv = predictions_df['Actual_CLV']
    y_pred_clv = predictions_df['Predicted_CLV']
    
    # Segment customers by predicted CLV
    clv_quartiles = pd.qcut(y_pred_clv, 4, labels=['Low', 'Medium', 'High', 'Very High'])
    
    # Calculate misallocation costs
    service_costs = {'Low': 10, 'Medium': 25, 'High': 50, 'Very High': 100}  # Service cost per customer
    
    total_allocation_cost = 0
    optimal_allocation_cost = 0
    
    for quartile in ['Low', 'Medium', 'High', 'Very High']:
        predicted_in_quartile = (clv_quartiles == quartile).sum()
        actual_quartile = pd.qcut(y_true_clv, 4, labels=['Low', 'Medium', 'High', 'Very High'])
        actual_in_quartile = (actual_quartile == quartile).sum()
        
        total_allocation_cost += predicted_in_quartile * service_costs[quartile]
        optimal_allocation_cost += actual_in_quartile * service_costs[quartile]
    
    allocation_efficiency = (optimal_allocation_cost / total_allocation_cost) * 100 if total_allocation_cost > 0 else 100
    
    print(f"\n💎 CLV-BASED RESOURCE ALLOCATION:")
    print(f"  Current allocation cost: ${total_allocation_cost:,.2f}")
    print(f"  Optimal allocation cost: ${optimal_allocation_cost:,.2f}")
    print(f"  Allocation efficiency: {allocation_efficiency:.1f}%")
    print(f"  Potential savings: ${total_allocation_cost - optimal_allocation_cost:,.2f}")
    
    # Overall business impact summary
    total_customers = len(predictions_df)
    total_revenue = y_true_clv.sum()
    
    print(f"\n📊 OVERALL BUSINESS IMPACT:")
    print(f"  Total customers analyzed: {total_customers:,}")
    print(f"  Total customer lifetime value: ${total_revenue:,.2f}")
    print(f"  Churn prevention net benefit: ${net_benefit:,.2f}")
    print(f"  Resource allocation savings: ${total_allocation_cost - optimal_allocation_cost:,.2f}")
    print(f"  Combined annual impact: ${net_benefit + (total_allocation_cost - optimal_allocation_cost):,.2f}")
    
    return {
        'churn_prevention': {
            'high_risk_customers': high_risk_customers,
            'campaign_cost': campaign_cost,
            'revenue_saved': revenue_saved,
            'net_benefit': net_benefit,
            'roi': (net_benefit / campaign_cost) * 100 if campaign_cost > 0 else 0
        },
        'resource_allocation': {
            'current_cost': total_allocation_cost,
            'optimal_cost': optimal_allocation_cost,
            'efficiency': allocation_efficiency,
            'savings': total_allocation_cost - optimal_allocation_cost
        },
        'total_impact': net_benefit + (total_allocation_cost - optimal_allocation_cost)
    }

# Assess business impact
business_impact = assess_business_impact(predictions_df, df)

In [None]:
# Visualize business impact
def visualize_business_impact(business_impact, predictions_df):
    """Visualize business impact assessment"""
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=['Churn Prevention ROI', 'Resource Allocation Efficiency', 
                       'CLV Distribution by Risk Level', 'Cost-Benefit Analysis']
    )
    
    # Churn Prevention ROI
    churn_data = business_impact['churn_prevention']
    fig.add_trace(
        go.Bar(
            x=['Campaign Cost', 'Revenue Saved', 'Net Benefit'],
            y=[churn_data['campaign_cost'], churn_data['revenue_saved'], churn_data['net_benefit']],
            name='Churn Prevention'
        ),
        row=1, col=1
    )
    
    # Resource Allocation Efficiency
    allocation_data = business_impact['resource_allocation']
    fig.add_trace(
        go.Bar(
            x=['Current Cost', 'Optimal Cost'],
            y=[allocation_data['current_cost'], allocation_data['optimal_cost']],
            name='Allocation Cost'
        ),
        row=1, col=2
    )
    
    # CLV Distribution by Risk Level
    y_proba_churn = predictions_df['Churn_Probability']
    y_pred_clv = predictions_df['Predicted_CLV']
    
    high_risk = y_pred_clv[y_proba_churn >= 0.7]
    low_risk = y_pred_clv[y_proba_churn < 0.3]
    medium_risk = y_pred_clv[(y_proba_churn >= 0.3) & (y_proba_churn < 0.7)]
    
    fig.add_trace(
        go.Box(y=high_risk, name='High Risk', boxpoints='outliers'),
        row=2, col=1
    )
    fig.add_trace(
        go.Box(y=medium_risk, name='Medium Risk', boxpoints='outliers'),
        row=2, col=1
    )
    fig.add_trace(
        go.Box(y=low_risk, name='Low Risk', boxpoints='outliers'),
        row=2, col=1
    )
    
    # Cost-Benefit Summary
    total_benefits = churn_data['revenue_saved'] + allocation_data['savings']
    total_costs = churn_data['campaign_cost']
    
    fig.add_trace(
        go.Bar(
            x=['Total Benefits', 'Total Costs', 'Net Impact'],
            y=[total_benefits, total_costs, business_impact['total_impact']],
            name='Overall Impact'
        ),
        row=2, col=2
    )
    
    fig.update_layout(height=800, title_text="Business Impact Analysis Dashboard")
    fig.show()

# Visualize business impact
visualize_business_impact(business_impact, predictions_df)

## 5. Model Deployment Recommendations

In [None]:
# Deployment recommendations and monitoring plan
def generate_deployment_recommendations(churn_metrics, clv_metrics, stability_results, business_impact):
    """Generate comprehensive deployment recommendations"""
    
    print("🚀 DEPLOYMENT RECOMMENDATIONS")
    print("=" * 60)
    
    # Model readiness assessment
    churn_ready = (
        churn_metrics['auc'] >= 0.75 and 
        churn_metrics['precision'] >= 0.6 and 
        stability_results['stability']['churn_stable']
    )
    
    clv_ready = (
        clv_metrics['r2'] >= 0.6 and 
        clv_metrics['mape'] <= 30 and 
        stability_results['stability']['clv_stable']
    )
    
    print(f"\n📋 MODEL READINESS ASSESSMENT:")
    print(f"  Churn Model: {'✅ Ready for Production' if churn_ready else '⚠️ Needs Improvement'}")
    print(f"  CLV Model: {'✅ Ready for Production' if clv_ready else '⚠️ Needs Improvement'}")
    
    # Deployment strategy
    print(f"\n🎯 RECOMMENDED DEPLOYMENT STRATEGY:")
    
    if churn_ready and clv_ready:
        print("  Strategy: Full Production Deployment")
        print("  Timeline: 2-4 weeks")
        print("  Risk Level: Low")
        deployment_approach = "full_deployment"
    elif churn_ready or clv_ready:
        print("  Strategy: Phased Deployment (Ready models first)")
        print("  Timeline: 4-6 weeks")
        print("  Risk Level: Medium")
        deployment_approach = "phased_deployment"
    else:
        print("  Strategy: Pilot Testing Required")
        print("  Timeline: 6-12 weeks")
        print("  Risk Level: High")
        deployment_approach = "pilot_testing"
    
    # Performance thresholds for monitoring
    print(f"\n📊 PERFORMANCE MONITORING THRESHOLDS:")
    print(f"  Churn Model:")
    print(f"    - AUC Score: Monitor if < {churn_metrics['auc'] * 0.95:.3f}")
    print(f"    - Precision: Monitor if < {churn_metrics['precision'] * 0.95:.3f}")
    print(f"    - Prediction Volume: Monitor if churn rate changes by >20%")
    
    print(f"  CLV Model:")
    print(f"    - R² Score: Monitor if < {clv_metrics['r2'] * 0.95:.3f}")
    print(f"    - MAPE: Monitor if > {clv_metrics['mape'] * 1.1:.1f}%")
    print(f"    - Bias: Monitor if mean error > ${abs(clv_metrics['errors'].mean()) * 2:.2f}")
    
    # Business value validation
    print(f"\n💼 BUSINESS VALUE VALIDATION:")
    roi = business_impact['churn_prevention']['roi']
    total_impact = business_impact['total_impact']
    
    if roi > 200 and total_impact > 10000:
        print(f"  ✅ High Business Value: ROI = {roi:.1f}%, Total Impact = ${total_impact:,.2f}")
        print(f"  Recommendation: Prioritize deployment and scale quickly")
    elif roi > 100 or total_impact > 5000:
        print(f"  ⚡ Medium Business Value: ROI = {roi:.1f}%, Total Impact = ${total_impact:,.2f}")
        print(f"  Recommendation: Deploy with careful monitoring")
    else:
        print(f"  ⚠️ Limited Business Value: ROI = {roi:.1f}%, Total Impact = ${total_impact:,.2f}")
        print(f"  Recommendation: Re-evaluate model parameters or business assumptions")
    
    # Infrastructure recommendations
    print(f"\n🏗️ INFRASTRUCTURE RECOMMENDATIONS:")
    print(f"  - Real-time scoring: Required for churn prevention")
    print(f"  - Batch processing: Suitable for CLV updates (monthly)")
    print(f"  - Model storage: Version control and rollback capability")
    print(f"  - Monitoring: Automated alerts for performance degradation")
    print(f"  - A/B testing: Compare model performance against baseline")
    
    # Risk mitigation
    print(f"\n⚠️ RISK MITIGATION STRATEGIES:")
    print(f"  - Gradual rollout: Start with 10% of customers, increase weekly")
    print(f"  - Champion/Challenger: Keep current methods as backup")
    print(f"  - Human oversight: Review high-impact predictions manually")
    print(f"  - Regular retraining: Monthly model updates with new data")
    print(f"  - Performance tracking: Daily dashboard monitoring")
    
    # Success metrics
    print(f"\n🎯 SUCCESS METRICS TO TRACK:")
    print(f"  Business Metrics:")
    print(f"    - Customer retention rate improvement")
    print(f"    - Revenue per customer increase")
    print(f"    - Marketing campaign efficiency")
    print(f"    - Customer satisfaction scores")
    
    print(f"  Technical Metrics:")
    print(f"    - Model prediction accuracy")
    print(f"    - Prediction latency (< 100ms target)")
    print(f"    - System uptime (> 99.9% target)")
    print(f"    - Data quality scores")
    
    return {
        'churn_ready': churn_ready,
        'clv_ready': clv_ready,
        'deployment_approach': deployment_approach,
        'business_value': 'high' if roi > 200 and total_impact > 10000 else 'medium' if roi > 100 or total_impact > 5000 else 'limited'
    }

# Generate deployment recommendations
deployment_rec = generate_deployment_recommendations(churn_metrics, clv_metrics, stability_results, business_impact)

In [None]:
# Create comprehensive evaluation summary
def create_evaluation_summary(churn_metrics, clv_metrics, stability_results, business_impact, deployment_rec):
    """Create a comprehensive evaluation summary"""
    
    summary_data = {
        'Model': ['Churn Prediction', 'CLV Prediction'],
        'Primary_Metric': [f"{churn_metrics['auc']:.3f} (AUC)", f"{clv_metrics['r2']:.3f} (R²)"],
        'Secondary_Metric': [f"{churn_metrics['precision']:.3f} (Precision)", f"{clv_metrics['rmse']:.2f} (RMSE)"],
        'Stability': ['Stable' if stability_results['stability']['churn_stable'] else 'Unstable',
                     'Stable' if stability_results['stability']['clv_stable'] else 'Unstable'],
        'Business_Impact': [f"${business_impact['churn_prevention']['net_benefit']:,.0f}",
                          f"${business_impact['resource_allocation']['savings']:,.0f}"],
        'Deployment_Ready': ['✅' if deployment_rec['churn_ready'] else '⚠️',
                            '✅' if deployment_rec['clv_ready'] else '⚠️']
    }
    
    summary_df = pd.DataFrame(summary_data)
    
    print("\n📋 COMPREHENSIVE EVALUATION SUMMARY")
    print("=" * 70)
    display(summary_df)
    
    # Overall recommendation
    print(f"\n🎯 OVERALL RECOMMENDATION:")
    if deployment_rec['churn_ready'] and deployment_rec['clv_ready']:
        print(f"  ✅ PROCEED WITH FULL DEPLOYMENT")
        print(f"  Both models meet production standards and show strong business impact.")
    elif deployment_rec['churn_ready'] or deployment_rec['clv_ready']:
        print(f"  ⚡ PROCEED WITH PHASED DEPLOYMENT")
        ready_model = "Churn" if deployment_rec['churn_ready'] else "CLV"
        print(f"  Deploy {ready_model} model first, improve the other before deployment.")
    else:
        print(f"  ⚠️ ADDITIONAL DEVELOPMENT REQUIRED")
        print(f"  Models need improvement before production deployment.")
    
    return summary_df

# Create evaluation summary
evaluation_summary = create_evaluation_summary(churn_metrics, clv_metrics, stability_results, business_impact, deployment_rec)

# Save evaluation results
try:
    # Save comprehensive evaluation summary
    evaluation_summary.to_csv('../reports/analysis/05_model_evaluation_summary.csv', index=False)
    print("💾 Evaluation summary saved to ../reports/analysis/05_model_evaluation_summary.csv")
    
    # Save business impact analysis
    business_impact_df = pd.DataFrame({
        'Metric': [
            'Churn Prevention - High Risk Customers',
            'Churn Prevention - Campaign Cost',
            'Churn Prevention - Revenue Saved',
            'Churn Prevention - Net Benefit',
            'Churn Prevention - ROI (%)',
            'Resource Allocation - Current Cost',
            'Resource Allocation - Optimal Cost',
            'Resource Allocation - Efficiency (%)',
            'Resource Allocation - Savings',
            'Total Annual Impact'
        ],
        'Value': [
            business_impact['churn_prevention']['high_risk_customers'],
            business_impact['churn_prevention']['campaign_cost'],
            business_impact['churn_prevention']['revenue_saved'],
            business_impact['churn_prevention']['net_benefit'],
            business_impact['churn_prevention']['roi'],
            business_impact['resource_allocation']['current_cost'],
            business_impact['resource_allocation']['optimal_cost'],
            business_impact['resource_allocation']['efficiency'],
            business_impact['resource_allocation']['savings'],
            business_impact['total_impact']
        ]
    })
    
    business_impact_df.to_csv('../reports/analysis/05_business_impact_assessment.csv', index=False)
    print("💾 Business impact assessment saved to ../reports/analysis/05_business_impact_assessment.csv")
    
    print("\n✅ Model evaluation completed successfully!")
    
except Exception as e:
    print(f"⚠️ Could not save evaluation results: {e}")
    print("📊 Evaluation completed - results available in notebook")

# Final comprehensive summary
print("\n" + "=" * 80)
print("🎊 MODEL EVALUATION COMPLETE - FINAL SUMMARY")
print("=" * 80)
print(f"✅ Churn Model Performance: AUC = {churn_metrics['auc']:.3f}, Precision = {churn_metrics['precision']:.3f}")
print(f"✅ CLV Model Performance: R² = {clv_metrics['r2']:.3f}, RMSE = ${clv_metrics['rmse']:.2f}")
print(f"✅ Model Stability: Both models {'passed' if stability_results['stability']['churn_stable'] and stability_results['stability']['clv_stable'] else 'need attention'}")
print(f"✅ Business Impact: ${business_impact['total_impact']:,.2f} annual value")
print(f"✅ Deployment Status: {deployment_rec['deployment_approach'].replace('_', ' ').title()}")
print(f"\n🚀 Next Steps: Implement deployment plan and monitoring framework")
print(f"📈 Expected ROI: {business_impact['churn_prevention']['roi']:.1f}% within first year")