# A/B/n Testing

# 0.0 Imports

In [4]:
import numpy as np

import pandas as pd
from itertools import combinations
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import HTML

from statsmodels.stats.gof import chisquare_effectsize
from statsmodels.stats.power import GofChisquarePower , TTestIndPower
from statsmodels.sandbox.stats.multicomp import multipletests
from scipy import stats

##  0.1 Helper Functions

In [2]:
def my_settings():
    %matplotlib inline
    
    # plotly settings
    plt.style.use( 'ggplot' )
    plt.rcParams['figure.figsize'] = [25, 12]
    plt.rcParams['font.size'] = 8
    
    # sets global SEED
    SEED = 42
 
    # notebook settings
    display(HTML('<style>.container{width:100% !important;}</style>'))
    np.set_printoptions(suppress=True)
    pd.set_option('display.float_format', '{:.3f}'.format)
    
    # seaborn settings
    sns.set(rc={'figure.figsize':(25,12)})
    sns.set_theme(style = 'darkgrid', font_scale = 1)
my_settings()

NameError: name 'pd' is not defined

# 1.0 Loading Data

In [5]:
d = {'variant' : ['interact' , 'connect' , 'learn' , 'help' , 'services'],
     'visits': [10283 , 2742 , 2747, 3180 , 2064],
     'clicks_all' : [3714 , 1587 , 1652 , 1717 ,1348],
     'clicks_link' : [42 , 53 , 21 , 38 ,45]}

data = pd.DataFrame(d)

In [6]:
data ['conversion'] = data['clicks_link'] / data['clicks_all']

In [7]:
data

Unnamed: 0,variant,visits,clicks_all,clicks_link,conversion
0,interact,10283,3714,42,0.011309
1,connect,2742,1587,53,0.033396
2,learn,2747,1652,21,0.012712
3,help,3180,1717,38,0.022132
4,services,2064,1348,45,0.033383


# 2.0 Design de Experimentos

## 2.1 Formulação de Hipóteses

CTR ( Click-Through Rate)
- H0 : Não há nenhuma diferença entre o CTR das variantes da página
- H1 : Há uma diferença entre o CTR das variantes da página

**Teste Bi-Caudal**

## 2.2 Parâmetros do Experimento

In [8]:

k = len(data['clicks_all'])

actual_dist = data['clicks_link'] / data['clicks_link'].sum()


expected_dist = [1 / k] * k

In [9]:
effect_size = chisquare_effectsize( expected_dist , actual_dist)
alpha = 0.05
power = 0.80
# ncols = 2
# nrows = 5

# sample_size = GofChisquarePower().solve_power(
#     effect_size = effect_size, 
#     alpha = alpha, 
#     power = power,
#     n_bins = (ncols -1) * (nrows - 1)) # graus de liberdade

sample_size = TTestIndPower().solve_power(
    effect_size = effect_size, 
    alpha = alpha, 
    power = power,
)

In [10]:
sample_size = np.ceil(sample_size).astype(int)

print('Minimum Sample Size per Variant : {}'.format(sample_size))
print('Total Sample Size : {}'.format(k * sample_size))

Minimum Sample Size per Variant : 222
Total Sample Size : 1110


# 3.0 Aplicação de Teste Chi Squared

In [11]:

data['no_clicks_link'] = data ['clicks_all'] - data ['clicks_link']
df = data[['variant' , 'clicks_link', 'no_clicks_link']].head()
df = df.set_index('variant')
df

Unnamed: 0_level_0,clicks_link,no_clicks_link
variant,Unnamed: 1_level_1,Unnamed: 2_level_1
interact,42,3672
connect,53,1534
learn,21,1631
help,38,1679
services,45,1303


In [12]:
chi2 , p_value ,dof , ex = stats.chi2_contingency(df)


print( ' Chi Squared : {} - p_value : {} '.format(chi2 , p_value))

 Chi Squared : 46.33660181942126 - p_value : 2.0959498129984563e-09 


# 4.0 Post-hoc Testing

In [13]:
all_comb = list(combinations(df.index , 2))
p_values = []


for comb in all_comb:
    new_df = df[(df.index == comb[0]) | (df.index ==comb[1])]
    chi2 , p_value , dof , ex = stats.chi2_contingency(new_df)
    p_values.append(p_value)
    
# Correction of Bonrrefoni
reject_list , corrected_p_values= multipletests(p_values, method = 'bonferroni')[:2]

In [14]:
for comb, p_val , corr_p_val , reject in zip(all_comb, p_values , corrected_p_values,reject_list):
    print('\n{}: p_values: {}; corrected_p_values: {}; reject_list: {} '.format(comb,p_val,corr_p_val,reject))


('interact', 'connect'): p_values: 5.3676772349808135e-08; corrected_p_values: 5.367677234980813e-07; reject_list: True 

('interact', 'learn'): p_values: 0.7616980743361713; corrected_p_values: 1.0; reject_list: False 

('interact', 'help'): p_values: 0.0031030587017400212; corrected_p_values: 0.03103058701740021; reject_list: True 

('interact', 'services'): p_values: 1.798089447385411e-07; corrected_p_values: 1.7980894473854111e-06; reject_list: True 

('connect', 'learn'): p_values: 0.00013292868361715983; corrected_p_values: 0.0013292868361715984; reject_list: True 

('connect', 'help'): p_values: 0.06144184057612575; corrected_p_values: 0.6144184057612575; reject_list: False 

('connect', 'services'): p_values: 1.0; corrected_p_values: 1.0; reject_list: False 

('learn', 'help'): p_values: 0.0508958228881819; corrected_p_values: 0.5089582288818191; reject_list: False 

('learn', 'services'): p_values: 0.00020374035733741825; corrected_p_values: 0.0020374035733741825; reject_list