# Model Training & Evaluation Notebook

This notebook covers:
1. Pricing Optimization Model
2. Churn Prediction Model
3. Model Comparison & Selection
4. Feature Importance Analysis

In [None]:
import sys
sys.path.insert(0, '../src/data')
sys.path.insert(0, '../src/models')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import learning_curve

from generator import PayPalDataGenerator, DataConfig
from pricing_optimizer import PricingOptimizer
from churn_predictor import ChurnPredictor

In [None]:
# Generate training data
config = DataConfig(num_merchants=5000, num_users=25000, num_transactions=200000, seed=42)
generator = PayPalDataGenerator(config)
data = generator.generate_all()

print(f'Training data generated:')
for name, df in data.items():
    print(f'  {name}: {len(df):,} records')

## 1. Pricing Optimization Model

In [None]:
# Train pricing model
pricing_model = PricingOptimizer()
pricing_metrics = pricing_model.fit(data['merchants'], data['transactions'])

print('Pricing Model Performance:')
print(f'  MAE: {pricing_metrics.mae:.4f}')
print(f'  RMSE: {pricing_metrics.rmse:.4f}')
print(f'  RÂ²: {pricing_metrics.r2:.4f}')

In [None]:
# Feature importance
importance = pd.DataFrame({
    'feature': list(pricing_metrics.feature_importance.keys()),
    'importance': list(pricing_metrics.feature_importance.values())
}).sort_values('importance', ascending=True)

plt.figure(figsize=(10, 6))
plt.barh(importance['feature'], importance['importance'])
plt.title('Pricing Model - Feature Importance')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## 2. Churn Prediction Model

In [None]:
# Train churn model
churn_model = ChurnPredictor()
churn_metrics = churn_model.fit(data['merchants'], data['transactions'])

print('Churn Model Performance:')
print(f'  Accuracy: {churn_metrics.accuracy:.4f}')
print(f'  Precision: {churn_metrics.precision:.4f}')
print(f'  Recall: {churn_metrics.recall:.4f}')
print(f'  F1: {churn_metrics.f1:.4f}')
print(f'  AUC-ROC: {churn_metrics.auc_roc:.4f}')

In [None]:
# Confusion matrix visualization
plt.figure(figsize=(8, 6))
sns.heatmap(churn_metrics.confusion_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Not Churned', 'Churned'],
            yticklabels=['Not Churned', 'Churned'])
plt.title('Churn Prediction - Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## 3. Business Impact Analysis

In [None]:
# Get pricing recommendations
recommendations = pricing_model.get_pricing_recommendations(
    data['merchants'], data['transactions'], top_n=50
)

total_impact = recommendations['revenue_impact'].sum()
print(f'Total Revenue Impact from Top 50 Recommendations: ${total_impact:,.2f}')

plt.figure(figsize=(12, 5))
plt.bar(range(len(recommendations)), recommendations['revenue_impact'])
plt.title('Revenue Impact by Merchant (Top 50 Recommendations)')
plt.xlabel('Merchant Rank')
plt.ylabel('Revenue Impact ($)')
plt.show()