In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm

In [None]:
# ----------------------- Simulating Click Data for A/B Testing -----------------------

N_exp = 10000  # Number of users in the experimental group
N_con = 10000  # Number of users in the control group

In [None]:
# Generating Click Data (Binomial Distribution)
click_exp = pd.Series(np.random.binomial(1, 0.4, size=N_exp))  # 40% click rate in experimental group
click_con = pd.Series(np.random.binomial(1, 0.2, size=N_con))  # 20% click rate in control group


In [None]:
# Generate Group Identifier
exp_id = pd.Series(np.repeat("exp", N_exp))
con_id = pd.Series(np.repeat("con", N_con))

In [None]:
# Create DataFrames for both groups
df_exp = pd.concat([click_exp, exp_id], axis=1)
df_con = pd.concat([click_con, con_id], axis=1)

In [None]:
# Naming columns
df_exp.columns = ["click", "group"]
df_con.columns = ["click", "group"]

In [None]:
# Combine both groups into one dataset
df_ab_test = pd.concat([df_exp, df_con], axis=0).reset_index(drop=True)

In [None]:
# ----------------------- A/B Test Analysis -----------------------

# Summarize clicks by group
X_con = df_ab_test.groupby("group")["click"].sum().loc["con"]
X_exp = df_ab_test.groupby("group")["click"].sum().loc["exp"]

print("Number of Clicks in Control: ", X_con)
print("Number of Clicks in Experimental: ", X_exp)

Number of Clicks in Control:  1987
Number of Clicks in Experimental:  4009


In [None]:
# Compute Click-through Rates (CTR)
p_con_hat = X_con / N_con
p_exp_hat = X_exp / N_exp

print("Click Probability in Control Group:", p_con_hat)
print("Click Probability in Experimental Group:", p_exp_hat)


Click Probability in Control Group: 0.1987
Click Probability in Experimental Group: 0.4009


In [None]:
# Compute Pooled Proportion
p_pooled_hat = (X_con + X_exp) / (N_con + N_exp)
print("p^_pooled is: ", p_pooled_hat)


p^_pooled is:  0.2998


In [None]:
# Compute Pooled Variance
pooled_variance = p_pooled_hat * (1 - p_pooled_hat) * (1/N_con + 1/N_exp)
print("Pooled Variance is: ", pooled_variance)

Pooled Variance is:  4.1983992e-05


In [None]:
# Compute Standard Error (SE)
SE = np.sqrt(pooled_variance)
print("Standard Error is: ", SE)

Standard Error is:  0.006479505536690281


In [None]:
# Compute Test Statistic (Z-score)
Test_stat = (p_con_hat - p_exp_hat) / SE
print("Test Statistic for 2-sample Z-test is:", Test_stat)

Test Statistic for 2-sample Z-test is: -31.206084917288823


In [None]:
# Define significance level
alpha = 0.05
print("Alpha (significance level) is:", alpha)

Alpha (significance level) is: 0.05


In [None]:
# Compute Critical Value from Standard Normal Distribution
Z_crit = norm.ppf(1 - alpha / 2)
print("Z-critical value from Standard Normal Distribution:", Z_crit)

Z-critical value from Standard Normal Distribution: 1.959963984540054


In [None]:
# Compute p-value
p_value = 2 * norm.sf(abs(Test_stat))
print("P-value of the 2-sample Z-test:", round(p_value, 3))

P-value of the 2-sample Z-test: 0.0


In [None]:
# Compute Confidence Interval for the difference in proportions
CI = [round((p_exp_hat - p_con_hat) - SE * Z_crit, 3), round((p_exp_hat - p_con_hat) + SE * Z_crit, 3)]
print("Confidence Interval of the 2-sample Z-test is: ", CI)


Confidence Interval of the 2-sample Z-test is:  [0.19, 0.215]


In [None]:
# Define Minimum Detectable Effect (if applicable)
delta = 0.31  # Example threshold for practical significance
print("Minimum Detectable Effect (Delta) is: ", delta)

Minimum Detectable Effect (Delta) is:  0.31
