# Promotion Effectiveness A/B Testing

This notebook analyzes promotion effectiveness using:
- **A/B Testing Framework**: Statistical comparison of promotion strategies
- **Hypothesis Testing**: T-tests, Chi-square tests, and effect size analysis
- **Conversion Rate Analysis**: Impact on customer behavior metrics
- **ROI Analysis**: Financial impact assessment

## Business Objective
Determine which promotion strategies drive the highest customer engagement and revenue while maintaining profitability.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Statistical testing libraries
# Note: Install these in your local environment:
# pip install scipy statsmodels
from scipy import stats
from scipy.stats import chi2_contingency, ttest_ind
import statsmodels.stats.api as sms
from statsmodels.stats.proportion import proportions_ztest

# Set random seed for reproducibility
np.random.seed(42)

In [None]:
# Generate A/B test data for different promotion types
def generate_ab_test_data():
    """
    Generate synthetic A/B test data for promotion campaigns
    """
    
    # Campaign parameters
    campaigns = {
        'discount_20': {'conversion_rate': 0.15, 'avg_order': 85, 'cost_per_user': 5},
        'discount_10': {'conversion_rate': 0.12, 'avg_order': 95, 'cost_per_user': 3},
        'free_shipping': {'conversion_rate': 0.14, 'avg_order': 90, 'cost_per_user': 8},
        'bogo': {'conversion_rate': 0.18, 'avg_order': 120, 'cost_per_user': 12},
        'control': {'conversion_rate': 0.10, 'avg_order': 100, 'cost_per_user': 0}
    }
    
    data = []
    user_id = 1
    
    for campaign, params in campaigns.items():
        n_users = 2000  # Each campaign tests 2000 users
        
        for _ in range(n_users):
            # Simulate user behavior
            converted = np.random.random() < params['conversion_rate']
            
            if converted:
                # Add some variance to order amounts
                order_amount = np.random.normal(params['avg_order'], 20)
                order_amount = max(order_amount, 10)  # Minimum order
            else:
                order_amount = 0
            
            # Simulate user engagement metrics
            time_on_site = np.random.exponential(5 if converted else 3)  # minutes
            pages_viewed = np.random.poisson(8 if converted else 4)
            
            data.append({
                'user_id': user_id,
                'campaign': campaign,
                'converted': converted,
                'order_amount': round(order_amount, 2),
                'time_on_site': round(time_on_site, 1),
                'pages_viewed': pages_viewed,
                'cost_per_user': params['cost_per_user']
            })
            
            user_id += 1
    
    return pd.DataFrame(data)

# Generate test data
ab_test_data = generate_ab_test_data()
print(f"Generated A/B test data: {len(ab_test_data)} users across {ab_test_data['campaign'].nunique()} campaigns")
print(f"\nCampaigns: {', '.join(ab_test_data['campaign'].unique())}")
ab_test_data.head()

## A/B Test Analysis: Conversion Rates

In [None]:
# Calculate key metrics by campaign
campaign_summary = ab_test_data.groupby('campaign').agg({
    'user_id': 'count',
    'converted': ['sum', 'mean'],
    'order_amount': ['sum', 'mean'],
    'time_on_site': 'mean',
    'pages_viewed': 'mean',
    'cost_per_user': 'first'
}).round(4)

# Flatten column names
campaign_summary.columns = ['Total_Users', 'Total_Conversions', 'Conversion_Rate', 
                           'Total_Revenue', 'Avg_Order_Value', 'Avg_Time_on_Site', 
                           'Avg_Pages_Viewed', 'Cost_Per_User']

# Calculate additional metrics
campaign_summary['Revenue_Per_User'] = campaign_summary['Total_Revenue'] / campaign_summary['Total_Users']
campaign_summary['Total_Cost'] = campaign_summary['Total_Users'] * campaign_summary['Cost_Per_User']
campaign_summary['ROI'] = (campaign_summary['Total_Revenue'] - campaign_summary['Total_Cost']) / campaign_summary['Total_Cost']
campaign_summary['Profit'] = campaign_summary['Total_Revenue'] - campaign_summary['Total_Cost']

print("Campaign Performance Summary:")
print(campaign_summary)

In [None]:
# Statistical significance testing
def test_conversion_significance(data, control_campaign='control', test_campaign='discount_20'):
    """
    Test statistical significance of conversion rate differences
    """
    control_data = data[data['campaign'] == control_campaign]
    test_data = data[data['campaign'] == test_campaign]
    
    # Conversion rate test
    control_conversions = control_data['converted'].sum()
    control_total = len(control_data)
    test_conversions = test_data['converted'].sum()
    test_total = len(test_data)
    
    # Z-test for proportions
    count = np.array([test_conversions, control_conversions])
    nobs = np.array([test_total, control_total])
    
    z_stat, p_value = proportions_ztest(count, nobs)
    
    # Effect size (Cohen's h)
    p1 = test_conversions / test_total
    p2 = control_conversions / control_total
    cohens_h = 2 * (np.arcsin(np.sqrt(p1)) - np.arcsin(np.sqrt(p2)))
    
    # Revenue per user test (t-test)
    control_revenue = control_data['order_amount']
    test_revenue = test_data['order_amount']
    t_stat, t_p_value = ttest_ind(test_revenue, control_revenue)
    
    results = {
        'control_conversion_rate': p2,
        'test_conversion_rate': p1,
        'conversion_lift': (p1 - p2) / p2,
        'z_statistic': z_stat,
        'p_value_conversion': p_value,
        'cohens_h': cohens_h,
        'significant_conversion': p_value < 0.05,
        't_statistic': t_stat,
        'p_value_revenue': t_p_value,
        'significant_revenue': t_p_value < 0.05
    }
    
    return results

# Test all campaigns against control
test_campaigns = ['discount_20', 'discount_10', 'free_shipping', 'bogo']
significance_results = {}

for campaign in test_campaigns:
    results = test_conversion_significance(ab_test_data, 'control', campaign)
    significance_results[campaign] = results
    
    print(f"\n{campaign.upper()} vs CONTROL:")
    print(f"Conversion Rate: {results['test_conversion_rate']:.3f} vs {results['control_conversion_rate']:.3f}")
    print(f"Lift: {results['conversion_lift']:.2%}")
    print(f"Statistical Significance: {'YES' if results['significant_conversion'] else 'NO'} (p={results['p_value_conversion']:.4f})")
    print(f"Effect Size (Cohen's h): {results['cohens_h']:.3f}")

## Visualization and ROI Analysis

In [None]:
# Create comprehensive visualization
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

# 1. Conversion rates by campaign
conv_rates = campaign_summary['Conversion_Rate'].sort_values(ascending=False)
bars1 = axes[0, 0].bar(range(len(conv_rates)), conv_rates.values)
axes[0, 0].set_title('Conversion Rate by Campaign')
axes[0, 0].set_ylabel('Conversion Rate')
axes[0, 0].set_xticks(range(len(conv_rates)))
axes[0, 0].set_xticklabels(conv_rates.index, rotation=45)

# Add value labels on bars
for i, bar in enumerate(bars1):
    height = bar.get_height()
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., height + 0.001,
                   f'{height:.2%}', ha='center', va='bottom')

# 2. Revenue per user
rpu = campaign_summary['Revenue_Per_User'].sort_values(ascending=False)
bars2 = axes[0, 1].bar(range(len(rpu)), rpu.values)
axes[0, 1].set_title('Revenue Per User by Campaign')
axes[0, 1].set_ylabel('Revenue Per User ($)')
axes[0, 1].set_xticks(range(len(rpu)))
axes[0, 1].set_xticklabels(rpu.index, rotation=45)

for i, bar in enumerate(bars2):
    height = bar.get_height()
    axes[0, 1].text(bar.get_x() + bar.get_width()/2., height + 1,
                   f'${height:.0f}', ha='center', va='bottom')

# 3. ROI comparison
roi = campaign_summary['ROI'].sort_values(ascending=False)
bars3 = axes[0, 2].bar(range(len(roi)), roi.values)
axes[0, 2].set_title('ROI by Campaign')
axes[0, 2].set_ylabel('ROI')
axes[0, 2].set_xticks(range(len(roi)))
axes[0, 2].set_xticklabels(roi.index, rotation=45)
axes[0, 2].axhline(y=0, color='red', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars3):
    height = bar.get_height()
    axes[0, 2].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                   f'{height:.1f}x', ha='center', va='bottom')

# 4. Order value distribution
for campaign in ab_test_data['campaign'].unique():
    campaign_orders = ab_test_data[ab_test_data['campaign'] == campaign]['order_amount']
    campaign_orders = campaign_orders[campaign_orders > 0]  # Only converted users
    if len(campaign_orders) > 0:
        axes[1, 0].hist(campaign_orders, alpha=0.6, label=campaign, bins=20)

axes[1, 0].set_title('Order Value Distribution by Campaign')
axes[1, 0].set_xlabel('Order Amount ($)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].legend()

# 5. Engagement metrics
engagement_metrics = ab_test_data.groupby('campaign')[['time_on_site', 'pages_viewed']].mean()
x = np.arange(len(engagement_metrics))
width = 0.35

bars1 = axes[1, 1].bar(x - width/2, engagement_metrics['time_on_site'], width, label='Time on Site')
ax2 = axes[1, 1].twinx()
bars2 = ax2.bar(x + width/2, engagement_metrics['pages_viewed'], width, label='Pages Viewed', alpha=0.7)

axes[1, 1].set_xlabel('Campaign')
axes[1, 1].set_ylabel('Time on Site (min)')
ax2.set_ylabel('Pages Viewed')
axes[1, 1].set_title('Engagement Metrics by Campaign')
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(engagement_metrics.index, rotation=45)
axes[1, 1].legend(loc='upper left')
ax2.legend(loc='upper right')

# 6. Statistical significance heatmap
campaigns_list = list(significance_results.keys())
sig_matrix = np.zeros((len(campaigns_list), 2))

for i, campaign in enumerate(campaigns_list):
    sig_matrix[i, 0] = 1 if significance_results[campaign]['significant_conversion'] else 0
    sig_matrix[i, 1] = 1 if significance_results[campaign]['significant_revenue'] else 0

im = axes[1, 2].imshow(sig_matrix, cmap='RdYlGn', aspect='auto')
axes[1, 2].set_title('Statistical Significance\n(vs Control)')
axes[1, 2].set_yticks(range(len(campaigns_list)))
axes[1, 2].set_yticklabels(campaigns_list)
axes[1, 2].set_xticks([0, 1])
axes[1, 2].set_xticklabels(['Conversion', 'Revenue'])

# Add text annotations
for i in range(len(campaigns_list)):
    for j in range(2):
        text = 'SIG' if sig_matrix[i, j] == 1 else 'NS'
        axes[1, 2].text(j, i, text, ha='center', va='center', 
                       color='white' if sig_matrix[i, j] == 1 else 'black', fontweight='bold')

plt.tight_layout()
plt.show()

## Advanced Statistical Analysis

In [None]:
# Power analysis and sample size calculation
def calculate_required_sample_size(p1, p2, alpha=0.05, power=0.8):
    """
    Calculate required sample size for detecting difference between two proportions
    """
    from statsmodels.stats.power import zt_ind_solve_power
    
    effect_size = 2 * (np.arcsin(np.sqrt(p1)) - np.arcsin(np.sqrt(p2)))
    required_n = zt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=power)
    
    return required_n

# Calculate confidence intervals for conversion rates
def calculate_conversion_ci(data, campaign, confidence=0.95):
    """
    Calculate confidence interval for conversion rate
    """
    campaign_data = data[data['campaign'] == campaign]
    n = len(campaign_data)
    conversions = campaign_data['converted'].sum()
    p = conversions / n
    
    z_score = stats.norm.ppf((1 + confidence) / 2)
    margin_error = z_score * np.sqrt((p * (1 - p)) / n)
    
    return (p - margin_error, p + margin_error)

# Calculate confidence intervals for all campaigns
print("95% Confidence Intervals for Conversion Rates:")
print("=" * 50)

for campaign in ab_test_data['campaign'].unique():
    ci_lower, ci_upper = calculate_conversion_ci(ab_test_data, campaign)
    actual_rate = campaign_summary.loc[campaign, 'Conversion_Rate']
    print(f"{campaign}: {actual_rate:.3f} [{ci_lower:.3f}, {ci_upper:.3f}]")

# Multi-armed bandit simulation
def simulate_bandit_allocation(campaign_data, n_periods=10):
    """
    Simulate dynamic traffic allocation using epsilon-greedy strategy
    """
    campaigns = campaign_data['campaign'].unique()
    campaign_rewards = {}
    
    for campaign in campaigns:
        camp_data = campaign_data[campaign_data['campaign'] == campaign]
        avg_reward = camp_data['order_amount'].mean()
        campaign_rewards[campaign] = avg_reward
    
    # Sort campaigns by performance
    sorted_campaigns = sorted(campaign_rewards.items(), key=lambda x: x[1], reverse=True)
    
    print("\nRecommended Traffic Allocation (Epsilon-Greedy):")
    total_traffic = 100
    
    for i, (campaign, reward) in enumerate(sorted_campaigns):
        if i == 0:  # Best performer gets most traffic
            allocation = 40
        elif i == 1:  # Second best
            allocation = 25
        elif i == 2:  # Third
            allocation = 20
        else:
            allocation = 15 // (len(sorted_campaigns) - 2)
        
        print(f"{campaign}: {allocation}% of traffic (avg reward: ${reward:.2f})")

simulate_bandit_allocation(ab_test_data)

## Business Recommendations

In [None]:
# Generate final recommendations
print("PROMOTION EFFECTIVENESS ANALYSIS - EXECUTIVE SUMMARY")
print("=" * 70)

# Find best performing campaign
best_roi_campaign = campaign_summary['ROI'].idxmax()
best_conversion_campaign = campaign_summary['Conversion_Rate'].idxmax()
best_revenue_campaign = campaign_summary['Revenue_Per_User'].idxmax()

print(f"\n🏆 WINNER - Best ROI: {best_roi_campaign.upper()}")
print(f"   ROI: {campaign_summary.loc[best_roi_campaign, 'ROI']:.1f}x")
print(f"   Profit: ${campaign_summary.loc[best_roi_campaign, 'Profit']:,.2f}")

print(f"\n📈 Best Conversion: {best_conversion_campaign.upper()}")
print(f"   Conversion Rate: {campaign_summary.loc[best_conversion_campaign, 'Conversion_Rate']:.2%}")

print(f"\n💰 Highest Revenue/User: {best_revenue_campaign.upper()}")
print(f"   Revenue per User: ${campaign_summary.loc[best_revenue_campaign, 'Revenue_Per_User']:.2f}")

print("\n📊 KEY INSIGHTS:")
total_profit = campaign_summary['Profit'].sum()
control_profit = campaign_summary.loc['control', 'Profit']
promotion_lift = ((total_profit - control_profit) / abs(control_profit)) * 100

print(f"• Total promotion profit lift: {promotion_lift:.1f}% vs control")
print(f"• Best campaign shows {significance_results[best_roi_campaign]['conversion_lift']:.1%} conversion lift")
print(f"• {sum([1 for r in significance_results.values() if r['significant_conversion']])} out of {len(significance_results)} campaigns show statistically significant improvements")

print("\n🎯 RECOMMENDATIONS:")
print(f"1. Scale {best_roi_campaign} campaign - highest ROI with statistical significance")
print(f"2. A/B test {best_conversion_campaign} vs {best_roi_campaign} for optimal balance")
print(f"3. Discontinue underperforming campaigns with negative ROI")
print(f"4. Implement dynamic allocation: 40% best campaign, 30% second best, 30% testing")

# Export results for dashboard
campaign_summary.to_csv('../dashboards/ab_test_results.csv')
ab_test_data.to_csv('../dashboards/ab_test_raw_data.csv', index=False)

print("\nData exported for dashboard visualization.")