## General assumptions:

### Version A is treated as the population, while version B is treated as the sample

In [24]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
from math import ceil

In [25]:
df = pd.read_csv('/Users/reesezhuang/Desktop/AB_test_data.csv')
df.head(5)

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2019-11-08,0x25b44a
1,False,B,2020-08-27,0x46271e
2,False,A,2020-06-11,0x80b8f1
3,False,B,2020-08-22,0x8d736d
4,False,A,2020-08-05,0x96c9c8


In [26]:
purchase_statu = {
    False: 0,
    True: 1,
}

df.purchase_TF = df.purchase_TF.map(purchase_statu)
df.head()

Unnamed: 0,purchase_TF,Variant,date,id
0,0,A,2019-11-08,0x25b44a
1,0,B,2020-08-27,0x46271e
2,0,A,2020-06-11,0x80b8f1
3,0,B,2020-08-22,0x8d736d
4,0,A,2020-08-05,0x96c9c8


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130000 entries, 0 to 129999
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   purchase_TF  130000 non-null  int64 
 1   Variant      130000 non-null  object
 2   date         130000 non-null  object
 3   id           130000 non-null  object
dtypes: int64(1), object(3)
memory usage: 4.0+ MB


In [28]:
df.id.nunique()

130000

In [29]:
df.groupby("Variant")["purchase_TF"].value_counts()

Variant  purchase_TF
A        0              106298
         1               18702
B        0                4117
         1                 883
Name: purchase_TF, dtype: int64

# Q1

In [30]:
ab_summary = df.pivot_table(values='purchase_TF', index='Variant', aggfunc=np.sum)
# add additional columns to the pivot table
ab_summary['total'] = df.pivot_table(values='purchase_TF', index='Variant', aggfunc=lambda x: len(x))
ab_summary['rate'] = df.pivot_table(values='purchase_TF', index='Variant')

In [31]:
ab_summary

Unnamed: 0_level_0,purchase_TF,total,rate
Variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,18702,125000,0.149616
B,883,5000,0.1766


In [32]:
conversion_rates = df.groupby('Variant')['purchase_TF']

std_p = lambda x: np.std(x, ddof=0)              # Std. deviation of the proportion
se_p = lambda x: stats.sem(x, ddof=0)            # Std. error of the proportion (std / sqrt(n))

conversion_rates = conversion_rates.agg([np.mean, std_p, se_p])
conversion_rates.columns = ['conversion_rate', 'std_deviation', 'std_error']


conversion_rates.style.format('{:.3f}')

Unnamed: 0_level_0,conversion_rate,std_deviation,std_error
Variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0.15,0.357,0.001
B,0.177,0.381,0.005


## Null Hypothesis: P_hat = P
## Alternative Hypothesis: P_hat > P

In [33]:
import math
def z_score(p_hat:float, p:float, n = int)->float:
    z = (p_hat - p) / math.sqrt(p*(1-p)/n)
    return z

In [34]:
z_score(0.177,0.150,5000)

5.346796732074041

## Because 5.34 > 1.64, we reject the null hypothesis and Alternative B was effective and increased conversion rate.

# Q2

In [35]:
from scipy.stats import norm
def sample_size_p(a: float, b: float, p_bar: float, p0: float, p1: float) -> int:
    n = pow((norm.ppf(1 - a / 2) * math.sqrt(2*p_bar*(1-p_bar)) + norm.ppf(1 - b) * 
             math.sqrt(p0*(1-p0)) + p1*(1-p1)), 2) / pow(abs(p1-p0),2)
    return math.ceil(n)

In [36]:
sample_size_p(0.5, 0.2, 0.16, 0.15, 0.17)

1566

In [37]:
z_score_list = []
for i in range(1,11):
    control_sample = df[df['Variant'] == 'A']
    treatment_sample = df[df['Variant'] == 'B'].sample(n=sample_size_p(0.5, 0.2, 0.16, 0.15, 0.17), random_state=i)
    ab_test = pd.concat([control_sample, treatment_sample], axis=0)
    ab_test.reset_index(drop=True, inplace=True)
    control_results = ab_test[ab_test['Variant'] == 'A']['purchase_TF']
    treatment_results = ab_test[ab_test['Variant'] == 'B']['purchase_TF']
    n_con = control_results.count()
    n_treat = treatment_results.count()
    success_A = control_results.sum() / n_con 
    success_B = treatment_results.sum() / n_treat 
    z = z_score(success_B,success_A,1566)
    z_score_list.append(z)
print(z_score_list)

[3.0251652676419405, 1.6791153614563907, 3.237699463355447, 3.6627678547824627, 3.0251652676419405, 1.1832022381248728, 5.150507224777017, 2.529252144310423, 2.8126310719284313, 2.0333390209789046]


In [38]:
attempt = 0
for k in z_score_list:
    attempt += 1
    if k > 1.64:
        print("In the %d attempt, reject null hypothesis. Alternative B was effective and increased conversion rate." % attempt)
    else:
        print("In the %d attempt, fail to reject null hypothesis. Alternative B was not effective and did not increased conversion rate." % attempt)

In the 1 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 2 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 3 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 4 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 5 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 6 attempt, fail to reject null hypothesis. Alternative B was not effective and did not increased conversion rate.
In the 7 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 8 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 9 attempt, reject null hypothesis. Alternative B was effective and increased conversion rate.
In the 10 attempt, reject null hypothesis. Alternative B was effective 

# Q3

### for conservative boundaries, A = 1/alpha, B = beta
alpha = 0.05, beta = 0.2 as stated in Q2

## calculating boundaries

In [39]:
boundary1 = np.log(1/0.05)  #A
boundary2 = np.log(0.2)     #B

In [40]:
print(boundary1)

2.995732273553991


In [41]:
print(boundary2)

-1.6094379124341003


## iterations for 10 samples

In [42]:
iteration = []
for m in range (1,11):
    treatment_sample = df[df['Variant'] == 'B'].sample(n=sample_size_p(0.5, 0.2, 0.16, 0.15, 0.17), random_state=m)
    current = 0
    count = 0
    control_sample = df[df['Variant'] == 'A']
    ab_test = pd.concat([control_sample, treatment_sample], axis=0)
    ab_test.reset_index(drop=True, inplace=True)
    control_results = ab_test[ab_test['Variant'] == 'A']['purchase_TF']
    treatment_results = ab_test[ab_test['Variant'] == 'B']['purchase_TF']
    n_con = control_results.count()
    n_treat = treatment_results.count()
    success_A = control_results.sum() / n_con 
    success_B = treatment_results.sum() / n_treat
    new = treatment_sample.reset_index()
    X1 = np.log(success_B/success_A)
    X0 = np.log((1-success_B)/(1-success_A))
    for i in range(0,1566):
        if boundary2 < current < boundary1:
            if new['purchase_TF'][i] == 1:
                current = current + X1
            if new['purchase_TF'][i] == 0:
                current = current + X0
            count = i
        else:
            break  
    iteration.append(count)
    if current >= boundary1:
        print(m, "->", current, "Accept alternative hypothesis")
    elif current <= boundary2:
        print(m, "->", current, "Accept null hypothesis")
    else:
        print(m, "->", current, "continue test")
print(iteration)

1 -> 3.0067250725812054 Accept alternative hypothesis
2 -> 1.3724820536552997 continue test
3 -> 3.153216269437385 Accept alternative hypothesis
4 -> 2.9959757512565184 Accept alternative hypothesis
5 -> 3.1179004817015086 Accept alternative hypothesis
6 -> 0.6867641487585536 continue test
7 -> -1.6216000517650493 Accept null hypothesis
8 -> -1.618349790062373 Accept null hypothesis
9 -> 3.024690965980013 Accept alternative hypothesis
10 -> 2.0017957242900506 continue test
[545, 1565, 995, 714, 560, 1565, 243, 270, 615, 1565]


In [43]:
averageiteration = sum(iteration)/len(iteration)

In [44]:
averageiteration

863.7