In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)
n = 5000

# Simulate users
users = pd.DataFrame({
    'user_id': np.arange(1, n+1),
    'variant': np.random.choice(['A', 'B'], size=n, p=[0.5, 0.5]),
    'step_1_view': 1,
})

# Dropout rates at each step
users['step_2_add_to_cart'] = users['step_1_view'] & (np.random.rand(n) < 0.6)
users['step_3_checkout'] = users['step_2_add_to_cart'] & (np.random.rand(n) < 0.5)
users['step_4_purchase'] = users['step_3_checkout'] & (np.random.rand(n) < 0.7)

users.head()


Unnamed: 0,user_id,variant,step_1_view,step_2_add_to_cart,step_3_checkout,step_4_purchase
0,1,A,1,True,True,True
1,2,B,1,True,True,False
2,3,B,1,False,False,False
3,4,B,1,True,False,False
4,5,A,1,False,False,False


# **✅ 3. Perform Funnel Analysis
🔍 Analyze Drop-Offs

Count number of users at each step

Calculate conversion rates between steps

Visualize as a funnel chart**

In [2]:
funnel = {
    'View': users['step_1_view'].sum(),
    'Add to Cart': users['step_2_add_to_cart'].sum(),
    'Checkout': users['step_3_checkout'].sum(),
    'Purchase': users['step_4_purchase'].sum()
}

funnel_df = pd.DataFrame.from_dict(funnel, orient='index', columns=['Users'])
funnel_df['Conversion Rate'] = funnel_df['Users'] / funnel_df['Users'].iloc[0]
print(funnel_df)


             Users  Conversion Rate
View          5000           1.0000
Add to Cart   3109           0.6218
Checkout      1556           0.3112
Purchase      1077           0.2154


#✅ 4. Run A/B Testing
🎯 Define Hypothesis

H0 (Null Hypothesis): Variant A and B have the same purchase conversion rate
H1 (Alternative Hypothesis): One variant has a higher conversion rate

📐 Calculate Conversion Rate per Group***

In [3]:
ab = users.groupby('variant')['step_4_purchase'].agg(['sum', 'count'])
ab['conversion_rate'] = ab['sum'] / ab['count']
print(ab)


         sum  count  conversion_rate
variant                             
A        544   2500           0.2176
B        533   2500           0.2132


Perform Statistical Test

Use a Chi-Square Test or Z-Test for proportions.**

In [4]:
from statsmodels.stats.proportion import proportions_ztest

count = ab['sum'].values
nobs = ab['count'].values

z_stat, p_val = proportions_ztest(count, nobs)
print(f"Z-Stat: {z_stat:.2f}, p-value: {p_val:.4f}")


Z-Stat: 0.38, p-value: 0.7051


# 📌 Report:

"We conducted an A/B test to evaluate the effect of two different variants (A and B) on user conversion at the purchase step.
Using a Z-test for proportions, we found a Z-statistic of 0.38 and a p-value of 0.7051.
Since p > 0.05, we conclude that the observed difference is not statistically significant.
Recommendation: No action is needed — both variants perform similarly."