In [1]:
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd
from sklearn.preprocessing import StandardScaler
import scipy.stats as st

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# tested lib
from ab_test.user import User
from ab_test.experiment import *
from ab_test.hasher_implems import *

# tools
from tools.chi_squared import ChiSquaredTest as chi
from tools.data import DataHelper as data

import time

# Chi Squared Test

We consider experiments with 2 variations of 50% each.

## Definitions

### Hypothesis

* Experiment A has _r = 2_ levels (variation 1 and variation 2)
* Experiment B has _c = 2_ levels (variation 1 and variation 2)

The null hypothesis states that knowing the variation in experiment A does not help you predict the variation in experiment B.

Ho: Variable A and Variable B are independent.

Ha: Variable A and Variable B are not independent.

### Degrees of Freedom

`DF = (r - 1) * (c - 1) = 1`

### Expected Frequencies

The expected frequency counts are computed separately for each level of one categorical variable at each level of the other categorical variable. Compute r * c expected frequencies, according to the following formula.
`Er,c = (nr * nc) / n`

where Er,c is the expected frequency count for level r of Variable A and level c of Variable B, nr is the total number of sample observations at level r of Variable A, nc is the total number of sample observations at level c of Variable B, and n is the total sample size.

For example, for a sample of 100 users, in a perfect case, we would have:

`Er,c = (50*50) / 100 = 25`

### Test Statistic

The test statistic is a chi-square random variable (Χ2) defined by the following equation.
`Χ2 = Σ [ (Or,c - Er,c)2 / Er,c ]`

where Or,c is the observed frequency count at level r of Variable A and level c of Variable B, and Er,c is the expected frequency count at level r of Variable A and level c of Variable B.


### P-value

The P-value is the probability of observing a sample statistic as extreme as the test statistic.

### Conclude

* If observed chi-square < critical chi-square, then variables are not related.
* If observed chi-square > critical chi-square, then variables are not independent (and hence may be related).

For DF=1 and a precision of 5% (α=0.05), the critical chi-square is 3.841.

### Test definitions

In [2]:
population = 1000
nb_exp = 1000

## Built-In Hash Method

### Data Generation

In [3]:
# Built-In Hash

matrix_bi = data.generate(BuiltInHasher(), population)

data.display(matrix_bi)

/         ExpA:Var1 ExpA:Var2 Sum       
ExpB:Var1        241       265       506
ExpB:Var2        244       250       494
Sum              485       515      1000


### Test Statistic

In [4]:
chi.statistic_test(matrix_bi)

0.3114947601598864

### Bigger scale test

In [5]:
begin = time.time()
stat_tests_bi = []

for i in range(0, nb_exp):
  matrix_bi = data.generate(BuiltInHasher(), population, i)
  stat_tests_bi.append(chi.statistic_test(matrix_bi))
print(f"{time.time() - begin}s")


4.806703805923462s


In [6]:
print(f"median: {np.median(stat_tests_bi)}")
print(f"mean: {np.mean(stat_tests_bi)}")
print(f"std deviation: {np.std(stat_tests_bi)}")

median: 0.49848839162172415
mean: 1.0976286131174529
std deviation: 1.570428571973351


### Conclusion

We can accept the null hypothesis (assignations of experiments A and B are independant) if the median above is inferior to the critical value 3.84.

## MD5

### Data Generation

In [7]:
matrix_md5 = data.generate(Md5Hasher(), population)
data.display(matrix_md5)

/         ExpA:Var1 ExpA:Var2 Sum       
ExpB:Var1        265       266       531
ExpB:Var2        239       230       469
Sum              504       496      1000


### Chi Squared Test

In [8]:
chi.statistic_test(matrix_md5)

0.11059820658239633

### Bigger scale test

In [9]:
begin = time.time()
stat_tests_md5 = []

for i in range(0, nb_exp):
  matrix_md5 = data.generate(Md5Hasher(), population)
  stat_tests_md5.append(chi.statistic_test(matrix_md5))
print(f"{time.time() - begin}s")


11.028624057769775s


In [10]:
print(f"median: {np.median(stat_tests_md5)}")
print(f"mean: {np.mean(stat_tests_md5)}")
print(f"std deviation: {np.std(stat_tests_md5)}")

median: 0.4023301212544441
mean: 0.9106694227575067
std deviation: 1.280602754906453


### Conclusion

We can accept the null hypothesis (assignations of experiments A and B are independant) if the median above is inferior to the critical value 3.84.

## Sha256

### Data Generation

In [11]:
matrix_sha = data.generate(Sha256Hasher(), population)

data.display(matrix_sha)

/         ExpA:Var1 ExpA:Var2 Sum       
ExpB:Var1        256       244       500
ExpB:Var2        258       242       500
Sum              514       486      1000


### Chi Squared Test

In [12]:
chi.statistic_test(matrix_sha)

0.016012553842212295

### Bigger scale test

In [13]:
begin = time.time()
stat_tests_sha = []

for i in range(0, nb_exp):
  matrix_sha = data.generate(Sha256Hasher(), population)
  stat_tests_sha.append(chi.statistic_test(matrix_sha))
print(f"{time.time() - begin}s")


11.556113004684448s


In [14]:
print(f"median: {np.median(stat_tests_sha)}")
print(f"mean: {np.mean(stat_tests_sha)}")
print(f"std deviation: {np.std(stat_tests_sha)}")

median: 0.4762430969300927
mean: 0.9235882968620195
std deviation: 1.2267275988020638


### Conclusion

We can accept the null hypothesis (assignations of experiments A and B are independant) if the median above is inferior to the critical value 3.84.