In [1]:
from scipy.stats import norm
import numpy as np
import pandas as pd
import random
random.seed(10)

In [2]:
df = pd.read_csv('AB_test_data.csv')

In [3]:
df.describe()

Unnamed: 0,purchase_TF,Variant,date,id
count,130000,130000,130000,130000
unique,2,2,396,130000
top,False,A,2019-12-22,0x71d511
freq,110415,125000,384,1


## 1. Conduct an A/B test to determine whether Alternative B improved conversion rates (site users book the property) over alternative A. 

**Hypothesis:** 
$$
H_0:\bar{x_{a}} = \bar{x_{b}}
$$
$$
H_a:\bar{x_{a}} < \bar{x_{b}}
$$

In [4]:
df['tf']=df['purchase_TF'].apply(lambda v:1 if v==True else 0)

In [5]:
df_a = df[df.Variant == 'A']
df_b = df[df.Variant == 'B']

In [6]:
df_a_date = df_a.loc[(df_a['date'] >= "2020-08-01" )& (df_a['date'] <= "2020-08-30"),]

In [7]:
df_b_date = df_b.loc[(df_b['date'] >= "2020-08-01" )& (df_b['date'] <= "2020-08-30"),]

In [8]:
mu_a = df_a_date.tf.mean()
print('mu_a:',mu_a)
mu_b = df_b_date.tf.mean()
print('mu_b:',mu_b)

mu_a: 0.1548
mu_b: 0.1766


In [9]:
avg_p = (df_a_date[df_a_date.tf == 1].tf.sum() + df_b_date[df_b_date.tf == 1].tf.sum()) / (df_a_date.shape[0] + df_b_date.shape[0])

In [10]:
z = (mu_b - mu_a) / np.sqrt( avg_p * (1- avg_p) * (( 1/df_a_date.shape[0] + 1/df_b_date.shape[0])))

In [11]:
z

2.9315960100463303

In [12]:
norm.ppf(0.95)

1.6448536269514722

$$
z\_score= 2.9315960100463303>1.645
$$

Reject null hypothesis. Alternative B improved conversion rates over alternative A.

## 2. Calculate the optimal sample size for a 95% confidence rate and test with 80% power. Conduct the test 10 times using samples of the optimal size. Report results.

In [13]:
df_date = df.loc[(df['date'] >= "2020-08-01" )& (df['date'] <= "2020-08-30"),]

In [14]:
df_date.shape

(10000, 5)

In [15]:
z1 = norm.ppf(0.975)
z2 = norm.ppf(0.8)
p_bar = df_date.tf.mean()

In [16]:
#Assume mu_a, mu_b and p_bar are retrieved from Historical Data
delta = mu_b-mu_a
n = (z1*np.sqrt(2*p_bar*(1-p_bar))+z2*np.sqrt(mu_a*(1-mu_a)+mu_b*(1-mu_b)))**2/(delta**2)
n

4565.1733985134615

In [17]:
ns = int(n//1+1)
ns

4566

In [18]:
sampleA = []
sampleB = []
for i in range(10):
    a = random.sample(range(5000),ns)
    b = random.sample(range(5000),ns)
    gpA = df_a_date.iloc[a]
    gpB = df_b_date.iloc[b]
    sampleA.append(gpA)
    sampleB.append(gpB)
    mu_a1 = gpA.tf.mean()
    mu_b1 = gpB.tf.mean()
    avg_p = (mu_a1+mu_b1)/2
    z_score = (mu_b - mu_a) / np.sqrt( avg_p * (1- avg_p) * (( 1/ns + 1/ns)))
    print('z_score_%s:'%i,z_score)
    if z_score > norm.ppf(0.95):
        print('Reject null hypothesis.')
    else:
        print('Cannot reject null hypothesis.')

z_score_0: 2.798643678788233
Reject null hypothesis.
z_score_1: 2.803090626870924
Reject null hypothesis.
z_score_2: 2.8038347004747664
Reject null hypothesis.
z_score_3: 2.8053253590423712
Reject null hypothesis.
z_score_4: 2.796431389608503
Reject null hypothesis.
z_score_5: 2.825013534016372
Reject null hypothesis.
z_score_6: 2.803090626870924
Reject null hypothesis.
z_score_7: 2.8016049841835104
Reject null hypothesis.
z_score_8: 2.7993827610296536
Reject null hypothesis.
z_score_9: 2.7971679944218897
Reject null hypothesis.


## 3. Conduct a sequential test for the 10 samples. For any of the samples, were you able to stop the test prior to using the full sample? What was the average number of iterations required to stop the test?

In [19]:
alpha = 0.05
beta = 0.2
A = (1-beta)/alpha
B = beta/(1-alpha)
print('A:',A)
print('B:',B)

A: 16.0
B: 0.2105263157894737


**Hypothesis:** 
$$
H_0:\bar{x_{b}} = 0.1548
$$
$$
H_a:\bar{x_{b}} = 0.1766
$$

In [20]:
for j in range(10):
    print('Sequential test',j)
    gpB = sampleB[j].reset_index(drop=True)
    _lambda= 1
    i= 0
    while i<=ns:
        x_B = gpB['tf'][i]
        y_0 = 0.1548*x_B+(1-x_B)*(1-0.1548)
        y_1 = 0.1766*x_B+(1-x_B)*(1- 0.1766)
        _lambda = _lambda * y_1/y_0
        if _lambda<=B:
            print('Not Reject H_0')
            print('lambda:',_lambda)
            print('i:',i)
            break
        if _lambda>=A:
            print('Reject H_0')
            print('lambda:',_lambda)
            print('i:',i)
            break
        i +=1   

Sequential test 0
Reject H_0
lambda: 16.216658440043613
i: 1276
Sequential test 1
Reject H_0
lambda: 17.12576236332308
i: 2869
Sequential test 2
Reject H_0
lambda: 16.686042345233542
i: 1432
Sequential test 3
Reject H_0
lambda: 16.92330714458281
i: 646
Sequential test 4
Reject H_0
lambda: 16.089218585198747
i: 1089
Sequential test 5
Reject H_0
lambda: 18.012008127498557
i: 843
Sequential test 6
Reject H_0
lambda: 17.815416960871918
i: 783
Sequential test 7
Reject H_0
lambda: 16.508337636418087
i: 798
Sequential test 8
Reject H_0
lambda: 16.821699729988264
i: 1764
Sequential test 9
Reject H_0
lambda: 17.66807925169945
i: 306


In [21]:
(2102+1196+1014+1281+252+362+2065+428+1244+3046)/10

1299.0

Sequential test 0
Reject H_0
lambda: 16.216658440043613
i: 1276
Sequential test 1
Reject H_0
lambda: 17.12576236332308
i: 2869
Sequential test 2
Reject H_0
lambda: 16.686042345233542
i: 1432
Sequential test 3
Reject H_0
lambda: 16.92330714458281
i: 646