In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm

np.random.seed(42)

In [2]:
N = 200_000
true_ctr_A = 0.055 
true_ctr_B = 0.060

variants = np.random.choice(["A", "B"], size=N, p=[0.5, 0.5])

clicks = np.zeros(N, dtype=int)
clicks[variants == "A"] = np.random.binomial(1, true_ctr_A, size=(variants == "A").sum())
clicks[variants == "B"] = np.random.binomial(1, true_ctr_B, size=(variants == "B").sum())

df = pd.DataFrame({
    "variant": variants,
    "clicked": clicks
})

df.head()

Unnamed: 0,variant,clicked
0,A,0
1,B,0
2,B,0
3,B,1
4,A,0


In [3]:
results = df.groupby("variant")["clicked"].agg(["count", "sum"])
results["ctr"] = results["sum"] / results["count"]
results

Unnamed: 0_level_0,count,sum,ctr
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,99768,5579,0.05592
B,100232,5986,0.059721


In [5]:
n_A = results.loc["A", "count"]
c_A = results.loc["A", "sum"]
n_B = results.loc["B", "count"]
c_B = results.loc["B", "sum"]

ctr_A = c_A / n_A
ctr_B = c_B / n_B

print(ctr_A, ctr_B)

0.05591973378237511 0.05972144624471227


In [8]:
p_pool = (c_A + c_B) / (n_A + n_B)                          #pooled probablity
se = np.sqrt(p_pool * (1- p_pool) * (1/n_A + 1/n_B))       #standard error
z = (ctr_B - ctr_A) / se                                    # Z-score
p_value = 1-norm.cdf(z)                                     # p-value 

{"z_score": z, "p_value": p_value, "ctr_A": ctr_A, "ctr_B": ctr_B}

{'z_score': np.float64(3.641996222168472),
 'p_value': np.float64(0.0001352660221265456),
 'ctr_A': np.float64(0.05591973378237511),
 'ctr_B': np.float64(0.05972144624471227)}

In [9]:
def sample_size_ztest(baseline_ctr, target_ctr, alpha=0.05, power=0.8):
    z_alpha = norm.ppf(1 - alpha/2)
    z_beta = norm.ppf(power)

    p1 = baseline_ctr
    p2 = target_ctr
    pooled = (p1*(1-p1) + p2*(1-p2))
    delta = abs(p2 - p1)

    n = (pooled * (z_alpha + z_beta)**2) / delta**2
    return int(np.ceil(n))

required = sample_size_ztest(true_ctr_A, true_ctr_B)
required

34025