# AB Testing

In [1]:
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split

# library for chi2 testing 
from scipy.stats import chi2 
from scipy.stats import chi2_contingency

## Data

In [16]:
np.random.seed(666)

tot_n = 1200

df = pd.DataFrame()
df["age"] = np.random.randint(20, 90, tot_n)
df.head()

Unnamed: 0,age
0,22
1,65
2,50
3,82
4,50


## Experiment Design

In [19]:
np.random.seed(666)

# Split the groups such that ages are well divided 
control_size = 200
treatment_size = 1000 

# Randomized
tot_index = list(range(0, control_size + treatment_size))
control_index = list(np.random.choice(tot_index, size = control_size, replace = False))
treatment_index = list(set(tot_index) - set(control_index))

control = df.iloc[control_index].copy()
treatment = df.iloc[treatment_index].copy()

print('Randomized:')
print(f'{control.shape = }')
print(f'{treatment.shape = }')
print(f'{control.age.mean() = }')
print(f'{treatment.age.mean()  = }\n')

# Stratified
treatment, control = train_test_split(df, test_size=control_size / (control_size + treatment_size), random_state=666, shuffle = True, stratify=df.age)

print('Stratified:')
print(f'{control.shape = }')
print(f'{treatment.shape = }')
print(f'{control.age.mean() = }')
print(f'{treatment.age.mean()  = }')


Randomized:
control.shape = (200, 1)
treatment.shape = (1000, 1)
control.age.mean() = 55.47
treatment.age.mean()  = 54.508

Stratified:
control.shape = (200, 1)
treatment.shape = (1000, 1)
control.age.mean() = 54.5
treatment.age.mean()  = 54.702


In [26]:
treatment["group"] = "treatment"
treatment["result"] = ["click"]*int(len(treatment)*0.8) + ["no click"]*int(len(treatment)*0.2)
control["group"] = "control"
control["result"] = ["click"]*int(len(control)*0.7) + ["no click"]*int(len(control)*0.3)

data = pd.concat([treatment, control], axis = 0).reset_index(drop = True)
data.drop(columns="age", inplace = True)
data["result_value"] = 1
data


Unnamed: 0,group,result,result_value
0,treatment,click,1
1,treatment,click,1
2,treatment,click,1
3,treatment,click,1
4,treatment,click,1
...,...,...,...
1195,control,no click,1
1196,control,no click,1
1197,control,no click,1
1198,control,no click,1


## Chi Squared Test

In [28]:
data_pivot = data.pivot_table(index = ["result"], columns = ["group"], aggfunc="count")
results_obs = pd.DataFrame(data = data_pivot.values, columns=["variant A", "variant B"])
index = ["click", "no click"]
results_obs.index = index
results_obs.index.name = "action"
results_obs

Unnamed: 0_level_0,variant A,variant B
action,Unnamed: 1_level_1,Unnamed: 2_level_1
click,140,800
no click,60,200


### Manual Calculation

In [29]:
variant_total = results_obs.sum(axis = 0)
variant_total

variant A     200
variant B    1000
dtype: int64

In [30]:
click_total = results_obs.sum(axis = 1)
click_total

action
click       940
no click    260
dtype: int64

In [31]:
# Calculate the expected number of clicks 
print("Expected Results")
results_exp = pd.DataFrame()
results_exp["action"] = index 
results_exp.set_index("action", inplace = True)
results_exp["variant A"] = click_total / click_total.sum() * variant_total["variant A"]
results_exp["variant B"] = click_total / click_total.sum() * variant_total["variant B"]
results_exp

Expected Results


Unnamed: 0_level_0,variant A,variant B
action,Unnamed: 1_level_1,Unnamed: 2_level_1
click,156.666667,783.333333
no click,43.333333,216.666667


In [32]:
# Calculate the square difference
print("Square Difference")
squared_difference = (results_obs - results_exp)**2 / results_exp
squared_difference

Square Difference


Unnamed: 0_level_0,variant A,variant B
action,Unnamed: 1_level_1,Unnamed: 2_level_1
click,1.77305,0.35461
no click,6.410256,1.282051


In [33]:
chi_2_stat = squared_difference.sum().sum()
print(f"Chi2 Statistic Value: {chi_2_stat}")

Chi2 Statistic Value: 9.819967266775775


Now, we calculate the p value using this Chi2 statistic

In [34]:
# Degrees of Freedom 
dof = (results_obs.shape[0] - 1) * (results_obs.shape[1] - 1)
print(f"Degrees of Freedom: {dof}")

Degrees of Freedom: 1


In [35]:
# Use the Survival function of (1-cdf) to get the p-value
p_value = chi2.sf(chi_2_stat, dof)
print(f"The p value is: {p_value}")

The p value is: 0.001726274144256204


In [36]:
p_value_threshold = 0.05
if p_value < p_value_threshold: 
    print("The null hyphotesis is rejected")
else: 
    print("Not enough evidence to reject the null hyphotesis")

The null hyphotesis is rejected


### Using Scipy

In [37]:
test_results = chi2_contingency(results_obs, correction=False)
test_results

Chi2ContingencyResult(statistic=9.819967266775771, pvalue=0.0017262741442562101, dof=1, expected_freq=array([[156.66666667, 783.33333333],
       [ 43.33333333, 216.66666667]]))

In [38]:
p_value_threshold = 0.05
if test_results.pvalue < p_value_threshold: 
    print("The null hyphotesis is rejected")
else: 
    print("Not enough evidence to reject the null hyphotesis")


The null hyphotesis is rejected
