In [38]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Function to generate mock data

def generate_ab_test_simpsons_paradox():
    """
    Generate A/B test data: Control vs Test
    
    TRUE Simpson's Paradox:
    - Control wins in EVERY user segment
    - But Test wins OVERALL due to traffic distribution
    """
    np.random.seed(42)
    
    # Define user segments with conversion rates
    segments = {
        'High_Value_Users': {
            'volume': 3000,
            'control_rate': 0.50,    # Control: 50% conversion
            'test_rate': 0.48,       # Test: 48% conversion (Control wins by 2pp)
        },
        'Medium_Value_Users': {
            'volume': 7000, 
            'control_rate': 0.20,    # Control: 20% conversion
            'test_rate': 0.18,       # Test: 18% conversion (Control wins by 2pp)
        },
        'Low_Value_Users': {
            'volume': 30000,         # HUGE segment
            'control_rate': 0.05,    # Control: 5% conversion  
            'test_rate': 0.04,       # Test: 4% conversion (Control wins by 1pp)
        }
    }
    
    # CRITICAL: Uneven traffic allocation creates the paradox
    # Test gets more high-converting users, Control gets more low-converting users
    traffic_split = {
        'High_Value_Users':   {'control': 0.3, 'test': 0.7},  # Test gets 70% of high-value
        'Medium_Value_Users': {'control': 0.4, 'test': 0.6},  # Test gets 60% of medium-value  
        'Low_Value_Users':    {'control': 0.6, 'test': 0.4}   # Control gets 60% of low-value
    }
    
    data = []
    user_id = 1
    
    for segment_name, segment_config in segments.items():
        split = traffic_split[segment_name]
        total_volume = segment_config['volume']
        
        # Calculate users per group
        control_users = int(total_volume * split['control'])
        test_users = int(total_volume * split['test'])
        
        # Generate Control group data
        for _ in range(control_users):
            converted = np.random.binomial(1, segment_config['control_rate'])
            revenue = np.random.normal(100, 25) if converted else 0
            
            data.append({
                'user_id': user_id,
                'group': 'Control',
                'user_segment': segment_name,
                'converted': converted,
                'revenue': max(0, revenue),
                'days_since_signup': np.random.randint(1, 365)
            })
            user_id += 1
            
        # Generate Test group data
        for _ in range(test_users):
            converted = np.random.binomial(1, segment_config['test_rate'])
            revenue = np.random.normal(100, 25) if converted else 0
            
            data.append({
                'user_id': user_id,
                'group': 'Test',
                'user_segment': segment_name, 
                'converted': converted,
                'revenue': max(0, revenue),
                'days_since_signup': np.random.randint(1, 365)
            })
            user_id += 1
    
    return pd.DataFrame(data)


# Function to analyze and detect simpson's paradox

def analyze_test_vs_control(df):
    """
    Analyze Test vs Control and reveal Simpson's Paradox
    """
    print("A/B TEST ANALYSIS: Test vs Control")
    print("="*60)
    
    # Overall Results
    print("\n OVERALL RESULTS:")
    print("-" * 30)
    
    overall_results = df.groupby('group').agg({
        'user_id': 'count',
        'converted': ['sum', 'mean'],
        'revenue': 'mean'
    }).round(4)
    
    control_users = overall_results.loc['Control', ('user_id', 'count')]
    control_conversions = overall_results.loc['Control', ('converted', 'sum')]
    control_rate = overall_results.loc['Control', ('converted', 'mean')]
    control_revenue = overall_results.loc['Control', ('revenue', 'mean')]
    
    test_users = overall_results.loc['Test', ('user_id', 'count')]
    test_conversions = overall_results.loc['Test', ('converted', 'sum')]
    test_rate = overall_results.loc['Test', ('converted', 'mean')]
    test_revenue = overall_results.loc['Test', ('revenue', 'mean')]
    
    print(f"Control: {control_users:,} users, {control_conversions:,} conversions")
    print(f"Conversion Rate: {control_rate:.4f} ({control_rate*100:.2f}%)")
    print(f"Avg Revenue: ${control_revenue:.2f}")
    
    print(f"\nTest:{test_users:,} users, {test_conversions:,} conversions") 
    print(f"Conversion Rate: {test_rate:.4f} ({test_rate*100:.2f}%)")
    print(f"Avg Revenue: ${test_revenue:.2f}")
    
    # Overall winner
    overall_winner = "Test" if test_rate > control_rate else "Control"
    lift = ((test_rate / control_rate - 1) * 100) if control_rate > 0 else 0
    
    print(f"\n OVERALL WINNER: {overall_winner}")
    print(f"Lift: {lift:+.2f}%")
    
    # By Segment Analysis
    print(f"\n{'='*60}")
    print(f"\n RESULTS BY USER SEGMENT:")
    print("-" * 30)
    
    segment_winners = []
    
    for segment in ['High_Value_Users', 'Medium_Value_Users', 'Low_Value_Users']:
        segment_data = df[df['user_segment'] == segment]
        segment_results = segment_data.groupby('group').agg({
            'user_id': 'count',
            'converted': ['sum', 'mean']
        }).round(4)
        
        print(f"\n{segment.replace('_', ' ')}:")
        
        control_seg_users = segment_results.loc['Control', ('user_id', 'count')]
        control_seg_rate = segment_results.loc['Control', ('converted', 'mean')]
        control_seg_conv = segment_results.loc['Control', ('converted', 'sum')]
        
        test_seg_users = segment_results.loc['Test', ('user_id', 'count')] 
        test_seg_rate = segment_results.loc['Test', ('converted', 'mean')]
        test_seg_conv = segment_results.loc['Test', ('converted', 'sum')]
        
        segment_winner = "Test" if test_seg_rate > control_seg_rate else "Control"
        segment_lift = ((test_seg_rate / control_seg_rate - 1) * 100) if control_seg_rate > 0 else 0
        segment_winners.append(segment_winner)
        
        print(f"  Control: {control_seg_users:,} users → {control_seg_conv:,} conv → {control_seg_rate:.4f} ({control_seg_rate*100:.2f}%)")
        print(f"  Test:    {test_seg_users:,} users → {test_seg_conv:,} conv → {test_seg_rate:.4f} ({test_seg_rate*100:.2f}%)")
        print(f"  Winner:  {segment_winner} (Lift: {segment_lift:+.2f}%)")
    
    # Simpson's Paradox Check
    print(f"\n{'='*60}")
    print(f"\n SIMPSON'S PARADOX ANALYSIS:")
    print("-" * 30)
    
    all_segments_same_winner = len(set(segment_winners)) == 1
    segment_consensus = segment_winners[0] if all_segments_same_winner else "Mixed"
    
    if all_segments_same_winner and segment_consensus != overall_winner:
        print("SIMPSON'S PARADOX DETECTED!")
        print(f"Overall Winner: {overall_winner}")
        print(f"Every Segment Won By: {segment_consensus}")
        
        print(f"EXPLANATION:")
        print(f"{overall_winner} gets more traffic from high-converting segments")
        print(f"{segment_consensus} performs better in each individual segment")
        print(f"But traffic distribution reverses the overall result!")
        
        print(f"\n TRAFFIC DISTRIBUTION BY SEGMENT:")
        for segment in ['High_Value_Users', 'Medium_Value_Users', 'Low_Value_Users']:
            seg_data = df[df['user_segment'] == segment]
            control_pct = len(seg_data[seg_data['group'] == 'Control']) / len(seg_data) * 100
            test_pct = len(seg_data[seg_data['group'] == 'Test']) / len(seg_data) * 100
            total_users = len(seg_data)
            
            print(f"   {segment.replace('_', ' '):18}: Control {control_pct:2.0f}% | Test {test_pct:2.0f}% | Total: {total_users:,}")
            
    else:
        print("No Simpson's Paradox detected")
        print(f"Overall: {overall_winner} | Segments: {segment_consensus}")
    
    print(f"\nSUMMARY:")
    print(f"Total Users: {len(df):,}")
    print(f"Total Conversions: {df['converted'].sum():,}")
    print(f"Overall Conversion Rate: {df['converted'].mean():.4f} ({df['converted'].mean()*100:.2f}%)")
    
    return df

# Generate the data and run analysis
print("Generating A/B Test Data with Simpson's Paradox...")
df = generate_ab_test_simpsons_paradox()
print(" Data generated! Running analysis...\n")
df.to_csv("data/AB_test_mock_data.csv")

# Run the analysis
analyzed_df = analyze_test_vs_control(df)

print(f"\n Dataset ready! Use 'df' to explore {len(df):,} users across Test and Control groups.")


Generating A/B Test Data with Simpson's Paradox...
 Data generated! Running analysis...

A/B TEST ANALYSIS: Test vs Control

 OVERALL RESULTS:
------------------------------
Control: 21,700 users, 1,907 conversions
Conversion Rate: 0.0879 (8.79%)
Avg Revenue: $8.88

Test:18,300 users, 2,292 conversions
Conversion Rate: 0.1252 (12.52%)
Avg Revenue: $12.52

 OVERALL WINNER: Test
Lift: +42.43%


 RESULTS BY USER SEGMENT:
------------------------------

High Value Users:
  Control: 900 users → 459 conv → 0.5100 (51.00%)
  Test:    2,100 users → 1,063 conv → 0.5062 (50.62%)
  Winner:  Control (Lift: -0.75%)

Medium Value Users:
  Control: 2,800 users → 547 conv → 0.1954 (19.54%)
  Test:    4,200 users → 750 conv → 0.1786 (17.86%)
  Winner:  Control (Lift: -8.60%)

Low Value Users:
  Control: 18,000 users → 901 conv → 0.0501 (5.01%)
  Test:    12,000 users → 479 conv → 0.0399 (3.99%)
  Winner:  Control (Lift: -20.36%)


 SIMPSON'S PARADOX ANALYSIS:
------------------------------
SIMPSON'S PA