[Reference](https://towardsdatascience.com/gentle-introduction-to-chi-square-test-for-independence-7182a7414a95)

In [1]:
from scipy.stats import chi2_contingency
import pandas as pd
import numpy as np

In [2]:
tshirts = pd.DataFrame(
    [
        [48,22,33,47],
        [35,36,42,27]
    ],
    index=["Male","Female"],
    columns=["Balck","White","Red","Blue"])
tshirts

Unnamed: 0,Balck,White,Red,Blue
Male,48,22,33,47
Female,35,36,42,27


In [3]:
tshirts.columns

Index(['Balck', 'White', 'Red', 'Blue'], dtype='object')

In [4]:
chi2_contingency(tshirts)

(11.56978992417547,
 0.00901202511379703,
 3,
 array([[42.93103448, 30.        , 38.79310345, 38.27586207],
        [40.06896552, 28.        , 36.20689655, 35.72413793]]))

In [5]:
df=chi2_contingency(tshirts)[3]

In [7]:
pd.DataFrame(
    data=df[:,:], 
    index=["Male","Female"],
    columns=["Black","White","Red","Blue"]
).round(2)

Unnamed: 0,Black,White,Red,Blue
Male,42.93,30.0,38.79,38.28
Female,40.07,28.0,36.21,35.72


In [8]:
chisquare=chi2_contingency(tshirts)[0]
chisquare

11.56978992417547

\begin{equation}
\chi^2=\Sigma\frac{(O-E)^2}{E} \\
\text{where O is the actual value and E is the expected value.}
\end{equation}

In [9]:
pvalue=chi2_contingency(tshirts)[1]
pvalue

0.00901202511379703

In [10]:
dof=chi2_contingency(tshirts)[2]
dof

3

In [12]:
csvfile = 'https://raw.githubusercontent.com/shinokada/python-for-ib-diploma-mathematics/master/Data/tshirts-horizontal.csv'
tshirtshor = pd.read_csv(csvfile,index_col='gender')
tshirtshor

Unnamed: 0_level_0,Black,White,Red,Blue
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,48,12,33,57
Female,35,46,42,27


In [13]:
chi2_contingency(tshirtshor)

(33.76146477535758, 2.2247293911334693e-07, 3, array([[41.5, 29. , 37.5, 42. ],
        [41.5, 29. , 37.5, 42. ]]))

In [14]:
csvfile2 = 'https://raw.githubusercontent.com/shinokada/python-for-ib-diploma-mathematics/master/Data/tshirts-vertical.csv'
tshirtsver = pd.read_csv(csvfile2,index_col='Color')
tshirtsver

Unnamed: 0_level_0,Male,Female
Color,Unnamed: 1_level_1,Unnamed: 2_level_1
Black,48,35
White,12,46
Red,33,42
Blue,57,27


In [15]:
chi2_contingency(tshirtsver)

(33.76146477535758, 2.2247293911334693e-07, 3, array([[41.5, 41.5],
        [29. , 29. ],
        [37.5, 37.5],
        [42. , 42. ]]))

In [16]:
tshirtsver.T

Color,Black,White,Red,Blue
Male,48,12,33,57
Female,35,46,42,27


In [17]:
chi2_contingency(tshirtsver.T)

(33.76146477535758, 2.2247293911334693e-07, 3, array([[41.5, 29. , 37.5, 42. ],
        [41.5, 29. , 37.5, 42. ]]))

In [18]:
from scipy.stats import chi2
significance = 0.01
p = 1 - significance
dof = chi2_contingency(tshirtshor)[2]
critical_value = chi2.ppf(p, dof)
critical_value

11.344866730144373

In [19]:
p = chi2.cdf(critical_value, dof)
p

0.99

In [20]:
subjects = pd.DataFrame(
    [
        [25,46,15],
        [15,44,15],
        [10,10,20]
    ],
    index=['Biology','Chemistry','Physics'],
    columns=['Math SL AA','Math SL AI','Math HL'])
subjects

Unnamed: 0,Math SL AA,Math SL AI,Math HL
Biology,25,46,15
Chemistry,15,44,15
Physics,10,10,20


In [22]:
chi, pval, dof, exp = chi2_contingency(subjects)
print('p-value is: ', pval)
significance = 0.05
p = 1 - significance
critical_value = chi2.ppf(p, dof)
print('chi=%.6f, critical value=%.6f\n' % (chi, critical_value))
if chi > critical_value:
    print("""At %.2f level of significance, we reject the null hypotheses and accept H1. 
They are not independent.""" % (significance))
else:
    print("""At %.2f level of significance, we accept the null hypotheses. 
They are independent.""" % (significance))

p-value is:  0.0004176680832291999
chi=20.392835, critical value=9.487729

At 0.05 level of significance, we reject the null hypotheses and accept H1. 
They are not independent.


In [23]:
chi, pval, dof, exp = chi2_contingency(subjects)
significance = 0.05
print('p-value=%.6f, significance=%.2f\n' % (pval, significance))
if pval < significance:
    print("""At %.2f level of significance, we reject the null hypotheses and accept H1. 
They are not independent.""" % (significance))
else:
    print("""At %.2f level of significance, we accept the null hypotheses. 
They are independent.""" % (significance))

p-value=0.000418, significance=0.05

At 0.05 level of significance, we reject the null hypotheses and accept H1. 
They are not independent.
