# Policy Learning and Uplift Evaluation

This notebook:
1. Evaluates targeting policy performance (Qini, AUUC)
2. Compares learned policy vs random baseline
3. Analyzes uplift by customer segments
4. Generates targeting recommendations

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from coupon_causal import data, features, cate, policy, viz, utils

%matplotlib inline

## 1. Load Data and CATE Estimates

In [None]:
# Load results from previous notebook
try:
    results = utils.load_artifact('../models/estimation_results.joblib')
    df = pd.read_parquet('../data/processed/coupon_data.parquet')
    
    cate_scores = results['cate_scores']
    T = df['treatment'].values
    Y = df['outcome'].values
    
    print(f"Loaded {len(df):,} records")
    print(f"Mean CATE: ${cate_scores.mean():.2f}")
except:
    print("Results not found. Please run the pipeline first or execute previous notebooks.")
    # Fallback: generate fresh data
    config = utils.load_config('../config/default.yaml')
    df, _ = data.generate_synthetic_coupon_data(random_state=42)
    
    # Quick CATE estimation
    from coupon_causal import propensity
    X, T, Y, fe = features.prepare_features(df, config['features'], fit=True)
    _, _, ensemble_prop = propensity.fit_propensity_models(X, T, config)
    cate_scores, _ = cate.fit_x_learner(X, Y, T, ensemble_prop, config)
    print("Generated fresh data and CATE estimates")

## 2. Qini Curve and AUUC

In [None]:
# Compute Qini curve
fractions, qini_values, random_baseline = policy.compute_qini_curve(
    cate_scores, T, Y, n_bins=20
)

# Compute AUUC
auuc = policy.compute_auuc(cate_scores, T, Y, normalize=True)
print(f"Area Under Uplift Curve (AUUC): {auuc:.3f}")
print(f"(Positive AUUC indicates learned policy beats random targeting)")

In [None]:
# Plot Qini curve
viz.plot_qini_curve(fractions, qini_values, random_baseline)
plt.show()

## 3. Policy Comparison

In [None]:
# Compare policies
policy_comparison = policy.compare_policies(cate_scores, T, Y)

print("\nPolicy Comparison (AUUC):")
for policy_name, auuc_value in policy_comparison.items():
    print(f"  {policy_name}: {auuc_value:.2f}")

## 4. Budget-Constrained Policy Evaluation

In [None]:
# Evaluate policy under different budgets
policy_results = policy.evaluate_policy_uplift(
    cate_scores, T, Y, 
    budget_fractions=[0.1, 0.2, 0.3, 0.4, 0.5],
    cost_per_treatment=1.0
)

print("\nPolicy Performance by Budget:")
print(policy_results.to_string(index=False))

In [None]:
# Visualize net benefit by budget
plt.figure(figsize=(10, 6))
plt.plot(policy_results['budget_fraction'] * 100, 
         policy_results['net_benefit'], 
         marker='o', linewidth=2, markersize=8, color='steelblue')
plt.axhline(0, color='red', linestyle='--', linewidth=1)
plt.xlabel('Budget (% of customers targeted)', fontsize=12)
plt.ylabel('Net Benefit ($)', fontsize=12)
plt.title('Net Benefit by Targeting Budget', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# ROI by budget
plt.figure(figsize=(10, 6))
plt.plot(policy_results['budget_fraction'] * 100, 
         policy_results['roi'], 
         marker='s', linewidth=2, markersize=8, color='coral')
plt.axhline(0, color='red', linestyle='--', linewidth=1)
plt.xlabel('Budget (% of customers targeted)', fontsize=12)
plt.ylabel('Return on Investment (ROI)', fontsize=12)
plt.title('ROI by Targeting Budget', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.show()

## 5. Segment Analysis

In [None]:
# Analyze uplift by segment
if 'customer_segment' in df.columns:
    segment_stats = policy.segment_uplift_analysis(
        cate_scores, df, 'customer_segment', top_k=10
    )
    
    # Visualize
    viz.plot_segment_uplift(segment_stats)
    plt.show()
else:
    print("No segment column available")

## 6. Optimal Policy and Targeting Recommendations

In [None]:
# Determine optimal threshold for a given budget
TARGET_BUDGET = 0.2  # Target 20% of customers

threshold, treatment_indicator = policy.optimal_policy_threshold(
    cate_scores,
    cost_per_treatment=1.0,
    treatment_capacity=int(TARGET_BUDGET * len(df))
)

print(f"\nOptimal Policy (Budget={TARGET_BUDGET:.0%}):")
print(f"  Threshold: ${threshold:.2f}")
print(f"  Customers to target: {treatment_indicator.sum():,}")
print(f"  Avg predicted uplift (targeted): ${cate_scores[treatment_indicator == 1].mean():.2f}")

In [None]:
# Create targeting recommendations
recommendations = policy.create_targeting_recommendations(
    cate_scores,
    df,
    threshold,
    output_path='../reports/tables/targeting_recommendations.csv'
)

print("\nTop 10 customers to target:")
print(recommendations.head(10).to_string(index=False))

## 7. Uplift Distribution by Targeting Decision

In [None]:
# Compare CATE distribution for targeted vs not targeted
plt.figure(figsize=(10, 6))
plt.hist(cate_scores[treatment_indicator == 0], bins=30, alpha=0.6, 
         label='Not Targeted', color='gray')
plt.hist(cate_scores[treatment_indicator == 1], bins=30, alpha=0.6, 
         label='Targeted', color='steelblue')
plt.axvline(threshold, color='red', linestyle='--', linewidth=2, 
            label=f'Threshold: ${threshold:.2f}')
plt.xlabel('Predicted Treatment Effect ($)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('CATE Distribution: Targeted vs Not Targeted', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Summary & Recommendations

### Key Findings:
1. **Uplift Model Performance**: AUUC shows the learned policy significantly outperforms random targeting
2. **Optimal Budget**: ROI analysis suggests optimal budget around [X]%
3. **High-Value Segments**: [Segment names] show highest predicted uplift

### Deployment Recommendations:
1. Target customers with predicted CATE > threshold
2. Prioritize high-uplift segments
3. Monitor actual uplift and recalibrate models periodically
4. Consider A/B testing the learned policy vs. current policy

### Next Steps:
- Export targeting list to CRM/marketing platform
- Set up monitoring dashboard
- Plan follow-up measurement study