In [107]:
# import libraries
import pandas as pd
from math import sqrt
from tqdm import tqdm

In [4]:
# read in the data
df = pd.read_csv("AB_test_data.csv")

In [5]:
# inspect data
df.head()

Unnamed: 0,Variant,purchase_TF,date,id
0,A,False,2019-12-26,0x6f9421
1,A,False,2019-08-16,0x59d442
2,A,True,2019-03-18,0x6db8f8
3,A,False,2019-02-13,0x68245d
4,A,False,2019-09-28,0x28566e


In [6]:
df['purchase_TF'].value_counts()

False    46416
True      8584
Name: purchase_TF, dtype: int64

So it looks like 8584 people converted, and 46416 did not.

In [7]:
df['Variant'].value_counts()

A    50000
B     5000
Name: Variant, dtype: int64

So in the experiment, 5000 users received the treatment.

Hypothesis setup:
    - Null Hypothesis: Variant B and Variant A had the same conversion rates
    - Alternative hypothesis: Variant B had a higher conversion rate than Variant A

#### Conduct A/B Test

In [8]:
p_treatment = df[df["Variant"]=="B"]['purchase_TF'].sum()/len(df[df["Variant"]=="B"]['purchase_TF'])

In [11]:
p_varA = df[df["Variant"]=="A"]['purchase_TF'].sum()/len(df[df["Variant"]=="A"]['purchase_TF'])

In [13]:
n = len(df[df["Variant"]=="B"]['purchase_TF'])

In [38]:
z = (p_treatment-p_varA)/(((p_varA*(1-p_varA))/n)**0.5)

In [40]:
p_varA

0.15206

In [39]:
z

8.692151285198767

With 95% confidence level, Z0.05 = 1.64. Reject null if z > 1.64. Since z is 8.7, **we reject the null hypothesis and conclude that at 95% confidence level, variant B generates more conversion than variant A.**

#### Optimal Sample Size

In [104]:
# Calculate optumal sample size
t_alpha = 1.96
t_beta = 0.842
p0 = p_varA
p1 = p_treatment
delta = 0.2
p_bar = (p0+p1)/2

# plug into the formula
n_star = ((t_alpha*((2*p_bar*(1-p_bar))**.5)+(t_beta*((p0*(1-p0)+p1*(1-p1))**.5)))**2)*(delta**-2)
print("The optimal sample size for each segment is {}".format(n_star))

The optimal sample size for each segment is 56.396096281610525


In [80]:
# seperate treatment and control groups
A = df[df["Variant"]=="A"]
B = df[df["Variant"]=="B"]

In [83]:
# get sample from each group
sample_A = A.sample(n=57,random_state=42)
sample_B = B.sample(n=57,random_state=42)

In [158]:
# test 

log = []
for i in range(10000):
    n = 900
    sample_A = A.sample(n=57)
    sample_B = B.sample(n=n)

    convA = sample_A['purchase_TF'].sum()/57
    convB = sample_B['purchase_TF'].sum()/n

    z_sample = (convB-p_varA)/(((p_varA*(1-p_varA))/n)**0.5)
    if z_sample >= 1.64:
#         print("We reject the null hypothesis. Challenger wins!")
        log.append(1)
    else:
#         print("Old version wins!")
        log.append(0)
        
        
print("The challenger wins {}% of the time.".format(sum(log)/len(log)*100))

The challenger wins 98.24000000000001% of the time.


In [166]:
# test 

log = []
for i in range(10000):
    n = 57*10
    sample_A = A.sample(n=n)
    sample_B = B.sample(n=n)

    convA = sample_A['purchase_TF'].sum()/n
    convB = sample_B['purchase_TF'].sum()/n
    p_bar_test = (convA+convB)/2

    z_sample = (convB-convA)/(((p_bar_test*(1-p_bar_test))*(2/n)**0.5))
    
    if z_sample >= 1.64:
#         print("We reject the null hypothesis. Challenger wins!")
        log.append(1)
    else:
#         print("Old version wins!")
        log.append(0)
        
        
print("The challenger wins {}% of the time.".format(sum(log)/len(log)*100))

The challenger wins 91.75% of the time.
