In [4]:
import numpy as np
from tqdm.notebook import tqdm

from ab_stat_tests import calc_prop_test_sample_size, calc_prop_test_pvalue
from sequential_test import Sequential_test

In [5]:
from bayesian_testing.experiments import BinaryDataTest

In [8]:
COVERSION_RATE = 0.15
MDE = 0.05
MDE_REAL = 0.05
POWER = 0.9
ALPHA = 0.1
cr1 = COVERSION_RATE
cr2 = cr1 * (1 + MDE)
cr2_real = cr1 * (1 + MDE_REAL)

sample_size = int(calc_prop_test_sample_size(cr=cr1, mde=MDE, power=POWER, alpha=ALPHA, alternative='greater'))
data_a = np.random.choice([0,1], sample_size, p=[1 - cr1, cr1])
data_b = np.random.choice([0,1], sample_size, p=[1 - cr2_real, cr2_real])


test = BinaryDataTest()
test.add_variant_data('A', data_a)
test.add_variant_data('B', data_b)
test.probabs_of_being_best()['B']

0.99505

In [19]:
COVERSION_RATE = 0.35
MDE = 0.1
MDE_REAL = 0.1
ALPHA = 0.1
POWER = 0.9
N_SIMULATIONS = 10000 # 10000 recommended
SIMULATION_STEP = 1000 # how often do we check the result of the test

In [20]:
cr1 = COVERSION_RATE
cr2 = cr1 * (1 + MDE)
cr2_real = cr1 * (1 + MDE_REAL)
data_a = np.random.choice([0,1], 100000, p=[1 - cr1, cr1])
data_b = np.random.choice([0,1], 100000, p=[1 - cr2_real, cr2_real])
sample_size = int(calc_prop_test_sample_size(cr=cr1, mde=MDE, power=POWER, alpha=ALPHA, alternative='greater'))

seq_result_aa, seq_result_ab = [], []
sample_size_ab, sample_size_aa = [], []
times_classic_aa, times_classic_ab = 0, 0

for it in tqdm(range(N_SIMULATIONS)):
    a1 = np.random.choice(data_a, size=sample_size, replace=False)
    a2 = np.random.choice(data_a, size=sample_size, replace=False)
    b = np.random.choice(data_b, size=sample_size, replace=False)

    for i in range(500, sample_size, SIMULATION_STEP):
        a1_temp = a1[:i]
        a2_temp = a2[:i]
        test = BinaryDataTest()
        test.add_variant_data('A', a1_temp)
        test.add_variant_data('B', a2_temp)
        test_aa = test.probabs_of_being_best()['B']
        if test_aa > 1 - ALPHA:
            seq_result_aa.append(1)
            sample_size_aa.append(i)
            break
        elif test_aa < ALPHA:
            seq_result_aa.append(0)
            sample_size_aa.append(i)
            break
    if len(seq_result_aa) < it + 1:
        times_classic_aa += 1
        if calc_prop_test_pvalue(sum(a2), sample_size, sum(a1), sample_size) < ALPHA:
            seq_result_aa.append(1)
        else:
            seq_result_aa.append(0)
            sample_size_aa.append(sample_size)
            
    for i in range(500, sample_size, SIMULATION_STEP):
        a1_temp = a1[:i]
        b_temp = b[:i]
        test = BinaryDataTest()
        test.add_variant_data('A', a1_temp)
        test.add_variant_data('B', b_temp)
        test_ab = test.probabs_of_being_best()['B']
        if test_ab > 1 - ALPHA:
            seq_result_ab.append(1)
            sample_size_ab.append(i)
            break
        elif test_ab < ALPHA:
            seq_result_ab.append(0)
            sample_size_ab.append(i)
            break
    if len(seq_result_ab) < it + 1:
        times_classic_ab += 1
        if calc_prop_test_pvalue(sum(b), sample_size, sum(a1), sample_size) < ALPHA:
            seq_result_ab.append(1)
            sample_size_ab.append(sample_size)
        else:
            seq_result_ab.append(0)

HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))




In [21]:
print('Type I Error', np.sum(seq_result_aa) / N_SIMULATIONS)
print('Type II Error', (len(seq_result_ab) - np.sum(seq_result_ab)) / N_SIMULATIONS)

Type I Error 0.2012
Type II Error 0.1285


In [22]:
print('Stopped earlier if H0 True', 1 - times_classic_aa / N_SIMULATIONS)
print('Stopped earlier if H1 True', 1 - times_classic_ab / N_SIMULATIONS)

Stopped earlier if H0 True 0.3226
Stopped earlier if H1 True 0.7413000000000001


In [23]:
print('Saved time if H0 True', round((sample_size - np.mean(sample_size_aa)) / sample_size, 2))
print('Saved time if H1 True', round((sample_size - np.mean(sample_size_ab)) / sample_size, 2))

Saved time if H0 True 0.21
Saved time if H1 True 0.51


| Parameters  | Stopped earlier | Saved time | Sample size per var |
| --- | --- | --- | --- |
| CR 5% |
| MDE=5%, MDE_REAL=5% | 99% / 32% | 46% / 40% | 100k |
| MDE=5%, MDE_REAL=10% | 20% / 85% | 7% / 31% | 25k |
| MDE=5%, MDE_REAL=15% | 3% / 12% | 1% / 2% | 11k |
| MDE=5%, MDE_REAL=10% | 99% / 99% | 50% / 82% | 
| CR 15% |
| MDE=5%, MDE_REAL=5% | 85% / 97% | 47% / 59% | 30k |
| MDE=10%, MDE_REAL=10% | 23% / 68% | 8% / 25% | 7k |
| MDE=15%, MDE_REAL=15% | 3% / 35% | 1% / 7% | 3k |
| MDE=5%, MDE_REAL=10% | 85% / 99% | 47% / 75% | 
| MDE=5%, MDE_REAL=-5% | 85% / 99% | 48% / 80% | 
| CR 35% |
| MDE=5%, MDE_REAL=5% | 86% / 81% | 50% / 46% | 10k |
| MDE=10%, MDE_REAL=10% | 21% / 66% | 7% / 24% | 3k |
| MDE=15%, MDE_REAL=15% | 2% / 30% | 1% / 7% | 1k |
| MDE=5%, MDE_REAL=10% | 85% / 79% | 50% / 49% | 

In [262]:
calc_prob_between(beta_V, beta_C)

TypeError: 'JointGrid' object is not callable

In [261]:
beta_C.args[0]

178