In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

In [2]:
ab_test = pd.read_csv('ab_data.csv')

In [3]:
ab_test.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [6]:
conversion_rates = ab_test.groupby('group')['converted']

std_p = lambda x: np.std(x, ddof=0)              # Std. deviation of the proportion
se_p = lambda x: stats.sem(x, ddof=0)            # Std. error of the proportion (std / sqrt(n))

conversion_rates = conversion_rates.agg([np.mean, std_p, se_p])
conversion_rates.columns = ['conversion_rate', 'std_deviation', 'std_error']


conversion_rates.style.format('{:.3f}')

Unnamed: 0_level_0,conversion_rate,std_deviation,std_error
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
control,0.12,0.325,0.001
treatment,0.119,0.324,0.001


In [7]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [8]:
control_results = ab_test[ab_test['group'] == 'control']['converted']
treatment_results = ab_test[ab_test['group'] == 'treatment']['converted']

In [18]:
n_successes = [control_results.sum(), treatment_results.sum()]

In [28]:
n_con = control_results.count()
n_treat = treatment_results.count()
nobs = [n_con, n_treat]

In [34]:
z_stat, pval = proportions_ztest(successes, nobs=nobs)
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(n_successes, nobs=nobs, alpha=0.05)

In [35]:
print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'ci 95% for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'ci 95% for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

z statistic: 1.24
p-value: 0.216
ci 95% for control group: [0.119, 0.122]
ci 95% for treatment group: [0.117, 0.121]


## Conclusion
When testing hypothesis, it is important to define a baseline and a goal first. For instance, when we defined a baseline of 1.20 and a goal of 1.30, we can conclude that there is no difference between the groups. This is because both groups contain the baseline and neither our set goal. Furthermore, the p-value does not support any significant difference between the groups either. This conclusion was already predictable since the characteristics of the two groups (conversation rate, standard deviation and standard error) are pretty similar as well.