In [59]:
import numpy as np
import pandas as pd
import scipy

In [60]:
np.random.seed(10)
# Sample data randomly at fixed probabilities
type_bottle = np.random.choice(a= ["paper","cans","glass","others","plastic"],
                              p = [0.05, 0.15 ,0.25, 0.05, 0.5],
                              size=12000)
 
# Sample data randomly at fixed probabilities
month = np.random.choice(a= ["January","February","March", "April", "May", "June", "July",
                            "August", "September", "October", "November", "December"],
                              p = [1/12, 2/12, 0.25/12, 0.5/12, 0.25/12, 2/12, 1/12, 0.5/12, 1/12, 0.5/12, 1/12, 2/12],
                              size=12000)
 
bottles = pd.DataFrame({"types":type_bottle, 
                       "months":month})
 
bottles_tab = pd.crosstab(bottles.types, bottles.months, margins = True)
 
bottles_tab.columns = ["January","February","March", "April", "May", "June", "July",
                            "August", "September", "October", "November", "December","row_totals"]
 
bottles_tab.index = ["paper","cans","glass","others","plastic","col_totals"]
 
observed = bottles_tab.iloc[0:5,0:12]   # Get table without totals for later use

bottles_tab

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December,row_totals
paper,85,66,300,304,159,174,296,41,44,147,71,155,1842
cans,107,136,490,487,261,274,499,46,70,228,114,202,2914
glass,19,23,105,98,52,49,93,9,19,58,25,52,602
others,19,23,137,104,60,54,124,16,16,52,33,57,695
plastic,241,268,1024,997,495,467,924,134,141,514,240,502,5947
col_totals,471,516,2056,1990,1027,1018,1936,246,290,999,483,968,12000


In [61]:
res = scipy.stats.chi2_contingency(observed=observed, correction=True)

In [62]:
chi2 = res.statistic
pvalue = res.pvalue
dof = res.dof
expected = pd.DataFrame(res.expected_freq, index = ["paper","cans","glass","others","plastic"], columns = ["January","February","March", "April", "May", "June", "July",
                            "August", "September", "October", "November", "December"])
expected

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December
paper,72.2985,79.206,315.596,305.465,157.6445,156.263,297.176,37.761,44.515,153.3465,74.1405,148.588
cans,114.3745,125.302,499.265333,483.238333,249.389833,247.204333,470.125333,59.737,70.421667,242.5905,117.2885,235.062667
glass,23.6285,25.886,103.142667,99.831667,51.521167,51.069667,97.122667,12.341,14.548333,50.1165,24.2305,48.561333
others,27.27875,29.885,119.076667,115.254167,59.480417,58.959167,112.126667,14.2475,16.795833,57.85875,27.97375,56.063333
plastic,233.41975,255.721,1018.919333,986.210833,508.964083,504.503833,959.449333,121.9135,143.719167,495.08775,239.36675,479.724667


In [63]:

print("\n" + 100*"-" + "\n")
print("chi2: %.2f \npvalue: %.2f" % (chi2, pvalue))

if pvalue < 0.5:
    print("Null Hypothesis rejected: The test results show a significant relationship between the variables.")
else:
    print("Null Hypothesis accepted: The the results don't show a significant relationship between the variables.")
print("\n" + 100*"-" + "\n")


----------------------------------------------------------------------------------------------------

chi2: 48.93 
pvalue: 0.28
Null Hypothesis rejected: The test results show a significant relationship between the variables.

----------------------------------------------------------------------------------------------------

