In [1]:
import numpy as np
import statsmodels.api as sm
from scipy import stats

# Python 教學：計算 A/B Test 所需樣本數

[部落格教學文章：A/B Test 樣本數究竟該怎麼算？](https://haosquare.com/calculate-ab-testing-sample-size/)

[好豪筆記](https://haosquare.com/)

Z 分數查表方法

用於樣本數公式簡化：

$n=\frac{(Z_{\alpha/2}\sqrt{2p_1 (1-p_1)}+Z_{\beta}\sqrt{p_1(1-p_1)+p_2(1-p_2)})^2}{|p_2-p_1|^2} \\
\\
= \frac{(1.96*\sqrt{2}*\sqrt{variance} + 0.84*\sqrt{2}*\sqrt{variance} )^2}{\delta^2}\\
\\
= \frac{(2.8*1.414*\sqrt{variance})^2}{\delta^2}\\
\\
= \frac{3.96^2 * \sigma^2}{\delta^2}$


In [2]:
alpha = 0.05
beta = 0.2
print(stats.norm.ppf(alpha/2))
print(stats.norm.ppf(beta))

-1.9599639845400545
-0.8416212335729142


# 樣本數計算範例問題

已知 APP 的某按鈕點擊率為 20%

A/B Test：新按鈕是否能增加點擊率 5%

請問要符合 alpha = 0.05、檢定力 = 80% 的話，所需樣本數為何？

---

試圖還原 [Evan Miller 樣本數計算機](https://www.evanmiller.org/ab-testing/sample-size.html) 首頁的預設值算出的結果，如下圖：

![](https://imgur.com/6hgNNl1.png)

方法 1：直接計算公式

$n = 16 * \frac{\sigma^2}{\delta^2}$

In [3]:
delta = 0.05
sigma = np.sqrt(0.2 * (1-0.2))
n = 16 * sigma**2 / delta ** 2
n

1024.0

方法 2：[samplesize_proportions_2indep_onetail()](https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.samplesize_proportions_2indep_onetail.html)

In [4]:
sm.stats.samplesize_proportions_2indep_onetail(
    diff = 0.05,
    prop2 = 0.2,
    power = 0.8,
    alpha=0.05
)

1093.7390457661652

方法 3：使用效果量（Effect Size）與 [tt_ind_solve_power()](https://www.statsmodels.org/stable/generated/statsmodels.stats.power.tt_ind_solve_power.html)

注意：[proportion_effectsize()](https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.proportion_effectsize.html) 計算的是 [Cohen's h](https://en.wikipedia.org/wiki/Cohen%27s_h)

In [5]:
es = sm.stats.proportion_effectsize(0.2, 0.25)
n = sm.stats.tt_ind_solve_power(effect_size=es, power=0.8, alpha=0.05)
print(n)

1092.857352102658


  return np.clip(_boost._nct_sf(x, df, nc), 0, 1)
  return np.clip(_boost._nct_cdf(x, df, nc), 0, 1)


# 驗證樣本數公式正確性

$n = 16 * \frac{\sigma^2}{\delta^2}$

In [6]:
def simulate(mu, sigma, diff):
    print(f"required sample size = {n}\n")
    for n_chg in [-300, -200, -100, 0, 100, 200, 300]:
        sig_cnt = 0
        test_cnt = 2000
        for _ in range(test_cnt):
            test = stats.ttest_ind(
                np.random.normal(mu, sigma, n+n_chg),
                np.random.normal(mu+diff, sigma, n+n_chg)
                )
            if test.pvalue <= 0.05: sig_cnt += 1
        print(f"n = {n+n_chg}, proportion of statistical significance {round(sig_cnt/test_cnt*100, 1)}%\n")

In [7]:
#@title 連續型資料
np.random.seed(9527)

mu, sigma = 165, 10 # mean and standard deviation
diff = 2 # MDE
n = int(16*sigma**2/diff**2)

simulate(mu, sigma, diff)

required sample size = 400

n = 100, proportion of statistical significance 26.7%

n = 200, proportion of statistical significance 53.2%

n = 300, proportion of statistical significance 68.6%

n = 400, proportion of statistical significance 81.0%

n = 500, proportion of statistical significance 88.8%

n = 600, proportion of statistical significance 93.7%

n = 700, proportion of statistical significance 96.5%



In [8]:
#@title 比例資料
np.random.seed(9527)

mu = 0.2
sigma = np.sqrt(mu*(1-mu))
diff = 0.05
n = int(16*sigma**2/diff**2)

simulate(mu, sigma, diff)

required sample size = 1024

n = 724, proportion of statistical significance 66.3%

n = 824, proportion of statistical significance 72.7%

n = 924, proportion of statistical significance 76.2%

n = 1024, proportion of statistical significance 80.8%

n = 1124, proportion of statistical significance 84.2%

n = 1224, proportion of statistical significance 87.2%

n = 1324, proportion of statistical significance 90.2%

