
# A/B Test Example

## I. Cleaning data

In [18]:
import statsmodels.stats.api as sms
import scipy.stats as st
import pandas as pd
import math

In [46]:
#import data
raw_data = pd.read_csv("C:/Users/jpolancoroque/Desktop/IESEG/Experimentos/Databases/ab_data.csv")
df = raw_data.copy()

In [47]:
print("Number of rows: ", df.shape[0], " Number of columns: ", df.shape[1])
df.head()

Number of rows:  294478  Number of columns:  5


Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [48]:
df["group"].value_counts()

treatment    147276
control      147202
Name: group, dtype: int64

In [49]:
# Some of the control group saw the new_page and some tretment group saw the old_page - delete these instances
mask1 = (df["group"] == "control") & (df["landing_page"] == "new_page")
index_to_drop1 = df[mask1].index
df = df.drop(index_to_drop1)

mask2 = (df["group"] == "treatment") & (df["landing_page"] == "old_page")
index_to_drop2 = df[mask2].index
df = df.drop(index_to_drop2)

In [50]:
print(df.shape)
df["group"].value_counts()

(290585, 5)


treatment    145311
control      145274
Name: group, dtype: int64

In [51]:
# These ones were eliminated because they didn't make any sense
print(mask1.sum(),mask2.sum())

1928 1965


In [56]:
# Check how many duplicated users exist
print(df["user_id"].count())
print(df["user_id"].nunique())

290585
290584


In [58]:
#drop duplicated users
df.drop_duplicates(subset ='user_id',keep ='first',inplace = True)

In [59]:
# Show the % split between users who saw new vs old page
# Calculate pooled probability
mask = (df["group"] == "control")
conversions_control = df["converted"][mask].sum()
total_users_control = df["converted"][mask].count()

mask = (df["group"] == "treatment")
conversions_treatment = df["converted"][mask].sum()
total_users_treatment = df["converted"][mask].count()

print("Split of control users who saw old page vs treatment users who saw new page: ", 
          round(total_users_control / df["converted"].count() * 100, 2), "% ",
          round((total_users_treatment / df["converted"].count()) * 100, 2), "%")

#count number of users who converted in each group
print("Number of control users who converted on old page: ", conversions_control)
print("Percentage of control users who converted: ", round((conversions_control / total_users_control) * 100, 2), "%")

mask = (df["group"] == "treatment")
print("Number of treatment users who converted on new page: ", conversions_treatment)
print("Percentage of treatment users who converted: ", round((conversions_treatment/ total_users_treatment) * 100, 2), "%")

Split of control users who saw old page vs treatment users who saw new page:  49.99 %  50.01 %
Number of control users who converted on old page:  17489
Percentage of control users who converted:  12.04 %
Number of treatment users who converted on new page:  17264
Percentage of treatment users who converted:  11.88 %


## II. Set Test Parameters

In [71]:
# Check what sample size is required (Benchmark!!!)
baseline_rate = conversions_control / total_users_control

# User defined
practical_significance = 0.01

# User defined, for a 95% confidence interval
confidence_level = 0.05

# User defined (chance to detect effect!!!) = Power 
sensitivity = 0.8 

Required sample size:  17209  per group


In [72]:
effect_size = sms.proportion_effectsize(baseline_rate, baseline_rate + practical_significance)

sample_size = sms.NormalIndPower().solve_power(effect_size = effect_size, power = sensitivity, 
                                               alpha = confidence_level, ratio=1)

0.1203863045004612

In [75]:
print("Required sample size: ", round(sample_size), " per group")

Required sample size:  17209  per group


## III. A/B Test

In [78]:
# Calculate pooled probability
prob_pooled = (conversions_control + conversions_treatment) / (total_users_control + total_users_treatment)

In [82]:
# Calculate pooled standard error and margin of error
se_pooled = math.sqrt(prob_pooled * (1 - prob_pooled) * (1 / total_users_control + 1 / total_users_treatment))
z_score = st.norm.ppf(1 - confidence_level / 2)
margin_of_error = se_pooled * z_score

In [87]:
# Calculate dhat, the estimated difference between probability of conversions in the experiment and control groups
d_hat = (conversions_treatment / total_users_treatment) - (conversions_control / total_users_control)

In [89]:
# Test if we can reject the null hypothesis
lower_bound = d_hat - margin_of_error
upper_bound = d_hat + margin_of_error

In [91]:
if practical_significance < lower_bound:
    print("Reject null hypothesis")
else: 
    print("Do not reject the null hypothesis")

Do not reject the null hypothesis


In [95]:
print("The lower bound of the confidence interval is ", round(lower_bound * 100, 2), "%")
print("The upper bound of the confidence interval is ", round(upper_bound * 100, 2), "%")

The lower bound of the confidence interval is  -0.39 %
The upper bound of the confidence interval is  0.08 %


In [97]:
d_hat*100

-0.15782389853555567