In [93]:
from scipy.stats import chi2, chi2_contingency
import numpy as np
import pandas as pd

In [65]:
class DataGenerator:
    def __init__(self, p1, p2):
        self.p1 = p1
        self.p2 = p2
    
    def next(self):
        click1 = 1 if np.random.random() > self.p1 else 0
        click2 = 1 if np.random.random() > self.p2 else 0
        return click1, click2
    
    # build contingency table
    def buildContingency(self, N1, N2=None, equal_size=True):
        T = np.zeros((2,2)).astype(np.float32)
        
        if equal_size:
            for i in range(N1):
                c1, c2 = self.next()
                T[0,c1] += 1
                T[1,c2] += 1
            return T
        
        elif not equal_size:
            for i in range(N1):
                c1 = 1 if np.random.random() > self.p1 else 0
                T[0,c1] += 1
            for i in range(N2):
                c2 = 1 if np.random.random() > self.p2 else 0
                T[1,c2] += 1
            return T

In [66]:
dg = DataGenerator(0.2, 0.5)
t1 = dg.buildContingency(N1=30)
print(t1)

[[ 4. 26.]
 [16. 14.]]


In [67]:
t2 = dg.buildContingency(30, 50, equal_size=False)
print(t2)

[[ 1. 29.]
 [27. 23.]]


In [86]:
chi2, p, dof, ex = chi2_contingency(t2, correction=False)
print(chi2, p)
print(ex)

21.157509157509157 4.230404132173198e-06
[[10.5 19.5]
 [17.5 32.5]]


In [96]:
def MyChi2TestContingency(table):
    expected_odd = [np.sum(table[:,i]) for i in range(table.shape[1])]
    expected_odd = [expected_odd[i]/np.sum(expected_odd) for i in range(table.shape[1])]
    
    # table with expected value
    expected_t = np.zeros((table.shape[0], table.shape[1]))
    for row in range(expected_t.shape[0]):
        expected_t[row] = np.sum(table[row])*np.array(expected_odd)
    
    # chi sqaure test statitics
    def calChi2(expected, observed):
        return (observed-expected)**2/expected
        
    chi2Func = np.vectorize(calChi2)
    chi2_t = chi2Func(expected_t, table)
    chi2_v = np.sum(chi2_t)
    
    # p-value
    p = 1- chi2.cdf(chi2_v, df=1)

    return chi2_v, p, expected_t

In [98]:
chi2_v, p, expected_t = MyChi2TestContingency(t2)
print(chi2_v, p)
print(expected_t)

21.157509157509157 4.2304041322216435e-06
[[10.5 19.5]
 [17.5 32.5]]
