In [78]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns 
from math import ceil 

%matplotlib inline

The product manager said that the current conversion rate is about 13% on average throughout the year, and that the team would be happy with an increase of 2%, meaning that the new design will be considered a success if it raises the conversion rate to 15%

In [79]:
effect_size = sms.proportion_effectsize(0.13, 0.15)     # calculating the effect size based on expected rates

# Power is a digit that corresponds 80% chance to detect change statistically significant in 
# the test with the calculated sample size.
# Aplha is a significance level, e.g. 0.05, is the probability of a type I error, 
# that is wrong rejections if the Null Hypothesis is true.
required_n = sms.NormalIndPower().solve_power(
                                effect_size,
                                power=0.8,
                                alpha=0.05,
                                ratio=1)                # calculating the sample size needed 


required_n = ceil(required_n)                      

print(required_n)

4720


In [80]:
import pandas as pd

df = pd.read_csv("archive/ab_data.csv")
df.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [81]:
pd.crosstab(df['group'], df['landing_page'])

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,1928,145274
treatment,145311,1965


In [82]:
session_counts = df['user_id'].value_counts(ascending=False)
session_counts.count()


290584

In [83]:
multi_users = session_counts[session_counts > 1].count()

print(f"There are {multi_users} in the dataset")

There are 3894 in the dataset


In [84]:
users_to_drop = session_counts[session_counts > 1].index


df = df[-df['user_id'].isin(users_to_drop)]

print(f"The updated dataset now has {df.shape[0]} rows")

The updated dataset now has 286690 rows


In [85]:
pd.crosstab(df['group'], df['landing_page'])

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0,143293
treatment,143397,0


In [86]:
control_sample = df[df['group'] == 'control'].sample(n=required_n)
treatment_sample = df[df['group'] == 'treatment'].sample(n=required_n)

In [87]:
# control_sample.head()
treatment_sample.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
211901,649138,2017-01-14 05:44:17.035241,treatment,new_page,1
70778,647419,2017-01-21 06:41:10.858337,treatment,new_page,0
281939,859486,2017-01-22 20:46:05.646396,treatment,new_page,0
254763,911531,2017-01-20 18:54:27.469426,treatment,new_page,0
165446,764877,2017-01-21 06:01:00.378135,treatment,new_page,0


In [88]:
ab_test = pd.concat([control_sample, treatment_sample], axis=0)
ab_test.reset_index(drop=True, inplace=True)
ab_test.head()


Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,685854,2017-01-13 03:08:34.416749,control,old_page,0
1,791657,2017-01-22 04:34:25.427592,control,old_page,0
2,904665,2017-01-13 14:31:37.110456,control,old_page,0
3,904249,2017-01-15 07:39:32.829808,control,old_page,0
4,740007,2017-01-15 15:15:56.621879,control,old_page,0


In [89]:
conversion_rates = ab_test.groupby('group')['converted']

std_p = lambda x: np.std(x, ddof=0)            # Std. deviation of the proportion
se_p = lambda x: stats.sem(x, ddof=0)          # Std. error of the proportion (std / sqrt(n))

conversion_rates = conversion_rates.agg([np.mean, std_p, se_p])
conversion_rates.columns = ['conversion_rate', 'std_deviation', 'std_error']
conversion_rates

  conversion_rates = conversion_rates.agg([np.mean, std_p, se_p])


Unnamed: 0_level_0,conversion_rate,std_deviation,std_error
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
control,0.123729,0.329272,0.004793
treatment,0.122034,0.327325,0.004764


In [94]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [95]:
control_results = ab_test[ab_test['group'] == 'control']['converted']
treatment_results = ab_test[ab_test['group'] == 'treatment']['converted']

In [97]:
n_con = control_results.count()
n_treat = treatment_results.count()
successes = [control_results.sum(), treatment_results.sum()]
nobs = [n_con, n_treat]


z_stat, pval = proportions_ztest(successes, nobs=nobs)

(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(successes, nobs=nobs, alpha=0.05)

print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'ci 95% for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'ci 95% for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

z statistic: 0.25
p-value: 0.802
ci 95% for control group: [0.114, 0.133]
ci 95% for treatment group: [0.113, 0.131]


Since our 
p-value=0.802 which is way above \alpha=0.05, we cannot reject the null hypothesis, which means that our new design did not perform significantly different (let alone better) than our old one.

Additionally, if we look at the confidence interval for the treatment group ([0.113, 0.131], i.e. 11.3-13.1%) we notice that:

It includes our baseline value of 13% conversion rate
It doesn't include our target value of 15% (the 2% uplift we were aiming for)

What this means is that it is more likely that the true conversion rate of the new design is similar to our baseline, rather than the 15% target we had hoped for. This is further proof that our new design is not likely to be an improvement on our old design, and that unfortunately we are back to the drawing board!