# 🧪 A/B Testing Simulation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

## 1. Generate Synthetic Data

In [None]:
np.random.seed(42)

# Parameters
n_A, n_B = 5000, 5000
conv_rate_A, conv_rate_B = 0.10, 0.12
avg_revenue_A, avg_revenue_B = 50, 55

In [None]:
# Simulate conversions
converted_A = np.random.binomial(1, conv_rate_A, n_A)
converted_B = np.random.binomial(1, conv_rate_B, n_B)

In [None]:
# Simulate revenue
revenue_A = converted_A * np.random.normal(avg_revenue_A, 10, n_A)
revenue_B = converted_B * np.random.normal(avg_revenue_B, 10, n_B)

In [None]:
data = pd.DataFrame({
    'user_id': np.arange(1, n_A+n_B+1),
    'group': ['A']*n_A + ['B']*n_B,
    'converted': np.concatenate([converted_A, converted_B]),
    'revenue': np.concatenate([revenue_A, revenue_B])
})
data.head()

## 2. Calculate Metrics

In [None]:
metrics = data.groupby('group').agg(
    users=('converted', 'count'),
    conversions=('converted', 'sum'),
    conversion_rate=('converted', 'mean'),
    arpu=('revenue', 'mean')
)
metrics

## 3. Statistical Test (Z-test for proportions)

In [None]:
a_conv = metrics.loc['A','conversions']
b_conv = metrics.loc['B','conversions']
n_A_users = metrics.loc['A','users']
n_B_users = metrics.loc['B','users']

In [None]:
p_A = a_conv / n_A_users
p_B = b_conv / n_B_users
p_pool = (a_conv + b_conv) / (n_A_users + n_B_users)
se = np.sqrt(p_pool * (1 - p_pool) * (1/n_A_users + 1/n_B_users))

In [None]:
z_score = (p_B - p_A) / se
p_value = 1 - stats.norm.cdf(z_score)
print(f'Z-score: {z_score:.2f}')
print(f'P-value: {p_value:.4f}')

## 4. Visualize Conversion Rates

In [None]:
plt.bar(metrics.index, metrics['conversion_rate'], color=['skyblue','lightgreen'])
plt.title('Conversion Rate: A vs B')
plt.ylabel('Conversion Rate')
plt.show()

## 5. Visualize ARPU

In [None]:
plt.bar(metrics.index, metrics['arpu'], color=['orange','green'])
plt.title('ARPU: A vs B')
plt.ylabel('Average Revenue per User')
plt.show()

### ✅ Interpretation:
- If p-value < 0.05, Test (B) is statistically better than Control (A)
- Check both **conversion rate** and **ARPU** for full impact analysis