# Budget Optimization and Scenario Planning

This notebook covers:
- Budget allocation optimization
- ROI/ROAS analysis
- Scenario planning
- Incremental impact measurement

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize, differential_evolution
import sys
sys.path.append('../utils')
from mmm_helpers import *

# Load or create sample data
df = create_synthetic_mmm_data(n_periods=104)
channels = ['tv_spend', 'digital_spend', 'radio_spend', 'print_spend']

print("Sample data loaded:")
print(df.head())
print(f"\nDataset shape: {df.shape}")

## 1. Current Performance Analysis

In [None]:
# Calculate current ROAS for each channel
def calculate_channel_roas(data, channels, target='conversions', conversion_value=50):
    """
    Calculate ROAS for each channel (simplified attribution).
    """
    roas_data = {}
    
    for channel in channels:
        # Simple correlation-based attribution (for demonstration)
        correlation = data[channel].corr(data[target])
        attributed_conversions = data[target].sum() * (correlation / data[channels].corrwith(data[target]).sum())
        
        total_spend = data[channel].sum()
        revenue = attributed_conversions * conversion_value
        roas = revenue / total_spend if total_spend > 0 else 0
        
        roas_data[channel] = {
            'spend': total_spend,
            'attributed_conversions': attributed_conversions,
            'revenue': revenue,
            'roas': roas
        }
    
    return pd.DataFrame(roas_data).T

current_performance = calculate_channel_roas(df, channels)
print("Current Channel Performance:")
print(current_performance.round(2))

# Visualize current performance
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Spend distribution
axes[0].pie(current_performance['spend'], labels=current_performance.index, autopct='%1.1f%%')
axes[0].set_title('Current Spend Distribution')

# ROAS comparison
axes[1].bar(current_performance.index, current_performance['roas'])
axes[1].set_title('ROAS by Channel')
axes[1].set_ylabel('ROAS')
axes[1].tick_params(axis='x', rotation=45)

# Revenue contribution
axes[2].bar(current_performance.index, current_performance['revenue'])
axes[2].set_title('Revenue Contribution by Channel')
axes[2].set_ylabel('Revenue ($)')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 2. Response Curve Modeling

In [None]:
class ResponseCurveOptimizer:
    def __init__(self, data, channels, target='conversions'):
        self.data = data
        self.channels = channels
        self.target = target
        self.response_curves = {}
        
    def fit_response_curves(self):
        """
        Fit response curves for each channel using binned analysis.
        """
        for channel in self.channels:
            # Create spend bins
            spend_values = self.data[channel]
            n_bins = min(10, len(spend_values.unique()))
            spend_bins = pd.cut(spend_values, bins=n_bins, include_lowest=True)
            
            # Calculate average response for each bin
            binned_data = self.data.groupby(spend_bins).agg({
                channel: 'mean',
                self.target: 'mean'
            }).dropna()
            
            if len(binned_data) > 3:  # Need enough points for curve fitting
                self.response_curves[channel] = {
                    'spend': binned_data[channel].values,
                    'response': binned_data[self.target].values
                }
    
    def predict_response(self, channel, spend_level):
        """
        Predict response for a given spend level using interpolation.
        """
        if channel not in self.response_curves:
            return 0
        
        curve = self.response_curves[channel]
        
        # Simple linear interpolation
        return np.interp(spend_level, curve['spend'], curve['response'])
    
    def plot_response_curves(self):
        """
        Plot response curves for all channels.
        """
        n_channels = len(self.response_curves)
        n_cols = min(2, n_channels)
        n_rows = (n_channels + n_cols - 1) // n_cols
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 4 * n_rows))
        if n_channels == 1:
            axes = [axes]
        elif n_rows == 1:
            axes = axes.flatten()
        else:
            axes = axes.flatten()
        
        for i, (channel, curve) in enumerate(self.response_curves.items()):
            if i < len(axes):
                axes[i].plot(curve['spend'], curve['response'], 'o-')
                axes[i].set_xlabel(f'{channel} Spend')
                axes[i].set_ylabel('Average Response')
                axes[i].set_title(f'{channel.replace("_", " ").title()} Response Curve')
        
        # Hide extra subplots
        for i in range(len(self.response_curves), len(axes)):
            axes[i].set_visible(False)
        
        plt.tight_layout()
        plt.show()

# Fit response curves
optimizer = ResponseCurveOptimizer(df, channels)
optimizer.fit_response_curves()
optimizer.plot_response_curves()

## 3. Budget Optimization

In [None]:
def optimize_budget_allocation(total_budget, channels, response_curves, 
                             min_spend_pct=0.05, max_spend_pct=0.6):
    """
    Optimize budget allocation across channels to maximize total response.
    
    Parameters:
    -----------
    total_budget : float
        Total marketing budget to allocate
    channels : list
        List of channel names
    response_curves : dict
        Response curve data for each channel
    min_spend_pct : float
        Minimum spend percentage per channel
    max_spend_pct : float
        Maximum spend percentage per channel
    """
    
    def objective(allocations):
        """
        Objective function to maximize (negative because we minimize).
        """
        total_response = 0
        
        for i, channel in enumerate(channels):
            spend = allocations[i] * total_budget
            
            if channel in response_curves:
                curve = response_curves[channel]
                # Simple saturation curve approximation
                max_spend = curve['spend'].max()
                max_response = curve['response'].max()
                
                # Diminishing returns approximation
                response = max_response * (spend / (spend + max_spend/2))
                total_response += response
        
        return -total_response  # Negative because we minimize
    
    # Constraints: allocations must sum to 1
    constraints = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}]
    
    # Bounds: each allocation between min and max percentage
    bounds = [(min_spend_pct, max_spend_pct) for _ in channels]
    
    # Initial guess: equal allocation
    x0 = np.array([1/len(channels)] * len(channels))
    
    # Optimize
    result = minimize(objective, x0, method='SLSQP', bounds=bounds, constraints=constraints)
    
    if result.success:
        optimal_allocations = result.x
        optimal_spends = optimal_allocations * total_budget
        optimal_response = -result.fun
        
        return {
            'allocations': dict(zip(channels, optimal_allocations)),
            'spends': dict(zip(channels, optimal_spends)),
            'total_response': optimal_response,
            'success': True
        }
    else:
        return {'success': False, 'message': result.message}

# Current total budget
current_budget = df[channels].sum().sum()
print(f"Current total budget: ${current_budget:,.0f}")

# Optimize allocation
optimization_result = optimize_budget_allocation(
    current_budget, channels, optimizer.response_curves
)

if optimization_result['success']:
    print("\nOptimal Budget Allocation:")
    optimal_df = pd.DataFrame({
        'Channel': channels,
        'Current_Spend': [df[ch].sum() for ch in channels],
        'Current_Pct': [df[ch].sum()/current_budget*100 for ch in channels],
        'Optimal_Spend': [optimization_result['spends'][ch] for ch in channels],
        'Optimal_Pct': [optimization_result['allocations'][ch]*100 for ch in channels]
    })
    optimal_df['Spend_Change'] = optimal_df['Optimal_Spend'] - optimal_df['Current_Spend']
    optimal_df['Pct_Change'] = optimal_df['Optimal_Pct'] - optimal_df['Current_Pct']
    
    print(optimal_df.round(2))
else:
    print(f"Optimization failed: {optimization_result['message']}")

## 4. Scenario Planning

In [None]:
def run_budget_scenarios(base_budget, scenarios, channels, response_curves):
    """
    Run multiple budget scenarios for comparison.
    
    Parameters:
    -----------
    base_budget : float
        Base budget amount
    scenarios : dict
        Dictionary of scenario names and budget multipliers
    channels : list
        List of channel names
    response_curves : dict
        Response curve data
    """
    scenario_results = {}
    
    for scenario_name, budget_multiplier in scenarios.items():
        scenario_budget = base_budget * budget_multiplier
        
        result = optimize_budget_allocation(
            scenario_budget, channels, response_curves
        )
        
        if result['success']:
            scenario_results[scenario_name] = {
                'budget': scenario_budget,
                'allocations': result['allocations'],
                'spends': result['spends'],
                'expected_response': result['total_response']
            }
    
    return scenario_results

# Define scenarios
scenarios = {
    'Budget Cut -20%': 0.8,
    'Current Budget': 1.0,
    'Budget Increase +20%': 1.2,
    'Budget Increase +50%': 1.5
}

scenario_results = run_budget_scenarios(
    current_budget, scenarios, channels, optimizer.response_curves
)

# Create comparison table
scenario_comparison = []

for scenario_name, result in scenario_results.items():
    row = {
        'Scenario': scenario_name,
        'Total_Budget': result['budget'],
        'Expected_Response': result['expected_response']
    }
    
    # Add channel allocations
    for channel in channels:
        row[f'{channel}_pct'] = result['allocations'][channel] * 100
    
    scenario_comparison.append(row)

scenario_df = pd.DataFrame(scenario_comparison)
print("Scenario Comparison:")
print(scenario_df.round(2))

## 5. Incremental Impact Analysis

In [None]:
# Visualize scenario results
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Budget vs Response
budgets = [result['budget'] for result in scenario_results.values()]
responses = [result['expected_response'] for result in scenario_results.values()]
scenario_names = list(scenario_results.keys())

axes[0, 0].plot(budgets, responses, 'o-')
for i, name in enumerate(scenario_names):
    axes[0, 0].annotate(name, (budgets[i], responses[i]), 
                       xytext=(5, 5), textcoords='offset points')
axes[0, 0].set_xlabel('Total Budget ($)')
axes[0, 0].set_ylabel('Expected Response')
axes[0, 0].set_title('Budget vs Expected Response')

# Channel allocation changes across scenarios
channel_data = {}
for channel in channels:
    channel_data[channel] = [result['allocations'][channel] * 100 
                           for result in scenario_results.values()]

x = np.arange(len(scenario_names))
width = 0.2

for i, (channel, allocations) in enumerate(channel_data.items()):
    axes[0, 1].bar(x + i*width, allocations, width, label=channel.replace('_', ' ').title())

axes[0, 1].set_xlabel('Scenario')
axes[0, 1].set_ylabel('Allocation (%)')
axes[0, 1].set_title('Channel Allocation by Scenario')
axes[0, 1].set_xticks(x + width * 1.5)
axes[0, 1].set_xticklabels(scenario_names, rotation=45)
axes[0, 1].legend()

# Efficiency analysis (Response per dollar)
efficiency = [responses[i] / budgets[i] for i in range(len(budgets))]
axes[1, 0].bar(scenario_names, efficiency)
axes[1, 0].set_ylabel('Response per Dollar')
axes[1, 0].set_title('Marketing Efficiency by Scenario')
axes[1, 0].tick_params(axis='x', rotation=45)

# Incremental analysis
base_idx = scenario_names.index('Current Budget')
base_response = responses[base_idx]
base_budget = budgets[base_idx]

incremental_response = [resp - base_response for resp in responses]
incremental_budget = [budget - base_budget for budget in budgets]

# Calculate incremental efficiency (avoiding division by zero)
incremental_efficiency = []
for i in range(len(incremental_budget)):
    if abs(incremental_budget[i]) > 0.01:  # Avoid division by very small numbers
        eff = incremental_response[i] / incremental_budget[i]
    else:
        eff = 0
    incremental_efficiency.append(eff)

# Filter out the base scenario for cleaner visualization
non_base_scenarios = [name for name in scenario_names if name != 'Current Budget']
non_base_efficiency = [incremental_efficiency[i] for i, name in enumerate(scenario_names) 
                      if name != 'Current Budget']

if non_base_scenarios:  # Only plot if we have non-base scenarios
    axes[1, 1].bar(non_base_scenarios, non_base_efficiency)
    axes[1, 1].set_ylabel('Incremental Response per Incremental Dollar')
    axes[1, 1].set_title('Incremental Marketing Efficiency')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].axhline(y=0, color='r', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

# Print key insights
print("\nKey Insights:")
best_efficiency_idx = np.argmax(efficiency)
print(f"Most efficient scenario: {scenario_names[best_efficiency_idx]}")
print(f"Efficiency: {efficiency[best_efficiency_idx]:.4f} response per dollar")

if non_base_scenarios:
    best_incremental_idx = np.argmax([abs(x) for x in non_base_efficiency])
    best_incremental_scenario = non_base_scenarios[best_incremental_idx]
    print(f"Best incremental scenario: {best_incremental_scenario}")
    print(f"Incremental efficiency: {non_base_efficiency[best_incremental_idx]:.4f}")

## Exercise: Custom Optimization

Try creating your own optimization scenarios:

In [None]:
# TODO: Create custom optimization scenarios
# Ideas:
# 1. Optimize for different objectives (e.g., maximize ROAS instead of total response)
# 2. Add constraints (e.g., minimum spend requirements for certain channels)
# 3. Include seasonality factors in the optimization
# 4. Test different response curve assumptions

def custom_optimization(total_budget, channels, objective='response'):
    """
    Custom optimization function.
    
    Parameters:
    -----------
    total_budget : float
        Total budget to optimize
    channels : list
        List of channel names
    objective : str
        Optimization objective ('response', 'roas', etc.)
    """
    # Your implementation here
    pass

print("Implement your custom optimization above!")

## Next Steps

In the final module (`05-interpretation/`), we'll cover:
- Model validation and diagnostics
- Results interpretation and storytelling
- Business recommendations
- Reporting and visualization best practices

You now understand the core concepts of budget optimization in MMM!