# Notebook 5: Model Diagnostics and Visualization
## Understanding Model Performance and Marketing Insights

**Learning Objectives:**
- Generate comprehensive diagnostic plots
- Visualize marketing saturation curves
- Analyze price elasticity
- Interpret feature contributions

---

## Diagnostic Visualizations

Good diagnostics help us understand not just IF the model works, but HOW it works.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print("Libraries loaded for visualization")

## Error Analysis

Understanding where and why the model makes mistakes:

In [None]:
def plot_error_analysis(y_true, y_pred, title="Model Performance"):
    '''Create comprehensive error analysis plots.'''

    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # Actual vs Predicted
    axes[0, 0].scatter(y_true, y_pred, alpha=0.5, s=10)
    axes[0, 0].plot([y_true.min(), y_true.max()],
                    [y_true.min(), y_true.max()], 'r--', lw=2)
    axes[0, 0].set_xlabel('Actual GMV')
    axes[0, 0].set_ylabel('Predicted GMV')
    r2 = r2_score(y_true, y_pred)
    axes[0, 0].set_title(f'Actual vs Predicted (R^2 = {r2:.3f})')
    axes[0, 0].grid(True, alpha=0.3)

    # Residuals
    residuals = y_true - y_pred
    axes[0, 1].scatter(y_pred, residuals, alpha=0.5, s=10)
    axes[0, 1].axhline(y=0, color='r', linestyle='--')
    axes[0, 1].set_xlabel('Predicted GMV')
    axes[0, 1].set_ylabel('Residuals')
    axes[0, 1].set_title('Residual Plot')
    axes[0, 1].grid(True, alpha=0.3)

    # Residual Distribution
    axes[0, 2].hist(residuals, bins=50, edgecolor='black', alpha=0.7)
    axes[0, 2].set_xlabel('Residuals')
    axes[0, 2].set_ylabel('Frequency')
    axes[0, 2].set_title('Residual Distribution')
    axes[0, 2].grid(True, alpha=0.3)

    # Percentage Error Distribution
    mask = y_true != 0
    percentage_errors = np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]) * 100
    mape = percentage_errors.mean()

    axes[1, 0].hist(percentage_errors, bins=50, edgecolor='black', alpha=0.7, color='orange')
    axes[1, 0].axvline(x=mape, color='r', linestyle='--', label=f'MAPE = {mape:.1f}%')
    axes[1, 0].set_xlabel('Percentage Error (%)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Percentage Error Distribution')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)

    # Q-Q Plot
    from scipy import stats
    stats.probplot(residuals, dist="norm", plot=axes[1, 1])
    axes[1, 1].set_title('Q-Q Plot (Normality Check)')
    axes[1, 1].grid(True, alpha=0.3)

    # Metrics Summary
    axes[1, 2].axis('off')
    mae = mean_absolute_error(y_true, y_pred)

    metrics_text = f'''
    Performance Metrics
    ==================

    R^2:    {r2:.4f}
    MAPE:  {mape:.2f}%
    MAE:   ${mae:,.0f}

    Interpretation:
    Model explains {r2*100:.1f}% of variance
    Average error: {mape:.1f}%
    '''

    axes[1, 2].text(0.1, 0.5, metrics_text, fontsize=11,
                    family='monospace', verticalalignment='center')

    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.show()

# Example with synthetic data
np.random.seed(42)
y_true = np.random.lognormal(10, 1, 1000)
y_pred = y_true * (1 + np.random.normal(0, 0.15, 1000))

plot_error_analysis(y_true, y_pred)

## Marketing Saturation Curves

Visualizing diminishing returns in marketing spend:

In [None]:
def plot_saturation_curves():
    '''Visualize marketing saturation for different channels.'''

    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.flatten()

    channels = ['TV', 'Digital', 'SEM', 'Sponsorship',
                'Content', 'Affiliates', 'Radio', 'Online']

    for i, channel in enumerate(channels):
        ax = axes[i]

        # Create saturation curve
        spend = np.linspace(0, 100, 100)

        # Saturation parameters (would come from trained model)
        alpha = np.random.uniform(50, 150)
        beta = np.random.uniform(0.5, 1.5)
        gamma = np.random.uniform(0.01, 0.05)

        # Response curve
        response = alpha * (spend ** beta) * np.exp(-gamma * spend)

        # ROI curve
        roi = np.gradient(response) / (np.gradient(spend) + 1e-8)

        # Plot response
        ax2 = ax.twinx()
        line1 = ax.plot(spend, response, 'b-', linewidth=2, label='Response')
        ax.set_xlabel('Investment ($k)')
        ax.set_ylabel('Incremental GMV', color='b')
        ax.tick_params(axis='y', labelcolor='b')

        # Plot ROI
        line2 = ax2.plot(spend[1:], roi[1:], 'r--', linewidth=1,
                        alpha=0.7, label='ROI')
        ax2.set_ylabel('ROI', color='r')
        ax2.tick_params(axis='y', labelcolor='r')
        ax2.axhline(y=1, color='gray', linestyle=':', alpha=0.5)

        # Mark saturation point
        sat_idx = np.where(roi < 1.0)[0]
        if len(sat_idx) > 0:
            ax.axvline(x=spend[sat_idx[0]], color='green',
                      linestyle=':', alpha=0.5, label='Saturation')

        ax.set_title(channel, fontsize=10)
        ax.grid(True, alpha=0.3)

    plt.suptitle('Marketing Channel Saturation Curves', fontsize=16)
    plt.tight_layout()
    plt.show()

plot_saturation_curves()

## Price Elasticity Analysis

Understanding how price changes affect demand:

In [None]:
def plot_price_elasticity():
    '''Visualize price elasticity curves.'''

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    categories = ['Premium', 'Standard', 'Budget']
    elasticities = [-2.5, -1.5, -0.8]  # Different price sensitivities

    for i, (category, elasticity) in enumerate(zip(categories, elasticities)):
        ax = axes[i]

        # Price range
        price_index = np.linspace(0.5, 2.0, 100)

        # Sales response
        sales_response = 100 * (price_index ** elasticity)

        # Plot
        ax.plot(price_index, sales_response, 'b-', linewidth=2)
        ax.set_xlabel('Price Index (1.0 = baseline)')
        ax.set_ylabel('Sales Index')
        ax.set_title(f'{category} Products\nElasticity = {elasticity}')
        ax.grid(True, alpha=0.3)
        ax.axvline(x=1.0, color='r', linestyle='--', alpha=0.5)
        ax.axhline(y=100, color='r', linestyle='--', alpha=0.5)

        # Add annotation
        ax.annotate(f'10% price increase ->\n{abs(elasticity*10):.0f}% sales decrease',
                   xy=(1.1, 100*(1.1**elasticity)),
                   xytext=(1.3, 50),
                   arrowprops=dict(arrowstyle='->', color='red', alpha=0.5))

    plt.suptitle('Price Elasticity by Product Category', fontsize=16)
    plt.tight_layout()
    plt.show()

plot_price_elasticity()

## Key Takeaways

### Diagnostic Insights:
1. **Error patterns** reveal model strengths and weaknesses
2. **Saturation curves** show optimal marketing spend levels
3. **Price elasticity** guides pricing strategies
4. **Feature importance** identifies key business drivers

### Business Applications:
- Use saturation points to optimize marketing budgets
- Apply elasticity for pricing decisions
- Monitor prediction accuracy by segment
- Identify high-impact marketing channels

### Next Steps:
In Notebook 6, we'll apply these insights for business decision-making.