In [None]:
# This is based on the following page:
# https://medium.com/@robbiegeoghegan/implementing-a-b-tests-in-python-514e9eb5b3a1

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
import statsmodels.stats.api as sms

In [2]:
dataset = pd.read_csv('ab_data.csv') 

In [3]:
dataset.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       294478 non-null  int64 
 1   timestamp     294478 non-null  object
 2   group         294478 non-null  object
 3   landing_page  294478 non-null  object
 4   converted     294478 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.2+ MB


In [5]:
dataset.describe()

Unnamed: 0,user_id,converted
count,294478.0,294478.0
mean,787974.124733,0.119659
std,91210.823776,0.324563
min,630000.0,0.0
25%,709032.25,0.0
50%,787933.5,0.0
75%,866911.75,0.0
max,945999.0,1.0


In [6]:
dataset['group'].value_counts()

treatment    147276
control      147202
Name: group, dtype: int64

In [7]:
#delete the non acceptable data points. The control group should just see the old page and the treatment group should see the new page
# define masks for these two conditions
mask_1 = (dataset['group']=='control')&(dataset['landing_page']=='new_page')
mask_2 = (dataset['group']=='treatment')&(dataset['landing_page']=='old_page')
drop_elements = dataset[mask_1].index
drop_elements = drop_elements.union(dataset[mask_2].index)

In [8]:
dataset = dataset.drop(drop_elements)

In [9]:
dataset['group'].value_counts()

treatment    145311
control      145274
Name: group, dtype: int64

In [10]:
# drop the duplicates
dataset.drop_duplicates(subset='user_id',keep='first',inplace = True)

In [11]:
dataset['group'].value_counts()

treatment    145310
control      145274
Name: group, dtype: int64

In [15]:
# calculate the conversions in contrled and treatment group
mask = (dataset['group']=='control')
control_conversions = dataset['converted'][mask].sum()
control_total = dataset['converted'][mask].count()

mask = (dataset['group']=='treatment')
treatment_conversions = dataset['converted'][mask].sum()
treatment_total = dataset['converted'][mask].count()

total = dataset['converted'].count()
control_split = round(control_total/total*100,2)
treatment_split = round(treatment_total/total*100,2)

control_converted = round(control_conversions/control_total*100,2)
treatment_converted = round(treatment_conversions/treatment_total*100,2)

print(f'Split is {control_split}% control {treatment_split}% treatment\n ')
print(f'percentage of conversions is {control_converted}% in control and {treatment_converted}% in treatment group\n')

Split is 49.99% control 50.01% treatment
 
percentage of conversions is 12.04% in control and 11.88% in treatment group



#### Set test parameters

In [22]:
# check what sample size is required
baseline_rate = control_conversions/control_total
practical_significance = 0.01 # user defined
confidence_level = 0.05 # user defined for 95% confidence interval
sensitivity = 0.8 # user defined

effect_size = sms.proportion_effectsize(baseline_rate, baseline_rate+practical_significance)
sample_size = round(sms.NormalIndPower().solve_power(effect_size = effect_size, power = sensitivity,
                                              alpha = confidence_interval, ratio = 1))
print (f' The required sample size is {sample_size} per group\n')

 The required sample size is 17209 per group



#### A/B test

In [23]:
# calculate pooled probability
pooled_prob = (control_conversions+treatment_conversions)/(control_total+treatment_total)

In [24]:
# Calculate pooled standard error and margin of error
import math
se_pooled = math.sqrt(pooled_prob * (1- pooled_prob)*(1/control_total+1/treatment_total))
z_score = st.norm.ppf(1-confidence_level/2)
margin_of_error = se_pooled*z_score

# calculate dhat, the estimated difference between probability of conversions in the experiment nd control group
d_hat = (treatment_conversions/treatment_total)-(control_conversions/control_total)

# Test if we can reject the null hypothesis
lower_bound = d_hat - margin_of_error
upper_bound = d_hat + margin_of_error

if practical_significance <lower_bound:
    print("Reject the null hypothesis")
else:
    print("Do not reject the null hypothesis")
    
print ( f'The lower bound of the confidence interval is {round(lower_bound*100,2)}%\n')
print(f'The upper bound of the confidence interval is {round(upper_bound*100,2)}%\n')

Do not reject the null hypothesis
The lower bound of the confidence interval is -0.39%

The upper bound of the confidence interval is 0.08%



In [25]:
practical_significance

0.01