In [43]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import chi2

In [44]:
# Load csv file
cat = pd.read_csv("F:\\github\\doon_pet_survey\\cat_combine.csv")
cat_dep_var = ['cat_hunt_yn', 'cat_hunt_freq'] # Add cat_hunt
cat_indep_var = ['cat_sex','cat_neutered','cat_describe'
             'cat_age','cat_time',
             'cat_time_out','cat_stay',
             'cat_feed','cat_feed_freq'] # Add cat_feed
dog = pd.read_csv("F:\\github\\doon_pet_survey\\dog_combine.csv")
dog_dep_var = ['dog_hunt_yn','dog_hunt_freq'] # Add dog_hunt
dog_indep_var = ['dog_neutered', 'dog_sex', 'dog_age'] # Add dog_feed


In [72]:

# function that runs chisq for two pandas series

def run_chi2(df, s1, s2, alpha):
    c_tab = pd.crosstab(df[s1],df[s2]) # Create contingency table
    obs = c_tab.values # Observed values
    vals = stats.chi2_contingency(c_tab) # Run Chi2 test of independence of variables
    exp = vals[3] # Expected values
    
    norow = len(c_tab) # No of rows in c_tab
    nocol = len(c_tab.columns) # No of cols in c_tab
    dof = (norow-1) * (nocol-1) # Degree of freedom
    
    chi_sq = sum([(o-e)**2/e for o,e in zip(obs,exp)]).sum() # Find chi2 value
    crit = chi2.ppf(q = 1-alpha, df=dof) # Find critical value using alpha and degree of freedom
    
    pval = 1-chi2.cdf(x=chi_sq, df =dof)
    
    # Print results
    
    print('This chi2 test is run for columns : ',
          s1,' and ',s2)
    
    if chi_sq >= crit:
        print('reject h0, there is a relationship', '\n',
              'chi2 value = ', chi_sq, '\n',
              'critical value = ', crit, '\n')
    else:
        print('accept h0, there is no relationship', '\n',
              'chi2 value = ', chi_sq, '\n',
              'critical value = ', crit, '\n')
    if pval <= alpha:
        print('reject h0, there is a relationship', '\n',
              'p value = ', pval)
    else:
        print('accept h0, there is no relationship', '\n',
              'p value = ', pval)
    
    

In [73]:
run_chi2(df= cat, s1='cat_time_out', s2='cat_hunt_yn', alpha = 0.05)

This chi2 test is run for columns :  cat_time_out  and  cat_hunt_yn
accept h0, there is no relationship 
 chi2 value =  15.60209045935345 
 critical value =  21.02606981748307 

accept h0, there is no relationship 
 p value =  0.2101480847149243


In [47]:
tab = pd.crosstab(cat['cat_time_out'], cat['cat_hunt_yn'])
obs = tab.values
val = stats.chi2_contingency(tab)
exp = val[3]


In [48]:
print(tab,'\n',
      obs,'\n',
      val,'\n',
      exp)

cat_hunt_yn         Have not observed  No  Yes
cat_time_out                                  
1-3 hours                           2   6   30
3-5 hours                           1   4   11
5-7 hours                           2   4   16
7- 10 hours                         0   0    3
>10 hours                           2   6   15
Completely indoors                  0  14   13
I don't know                        0   0    1 
 [[ 2  6 30]
 [ 1  4 11]
 [ 2  4 16]
 [ 0  0  3]
 [ 2  6 15]
 [ 0 14 13]
 [ 0  0  1]] 
 Chi2ContingencyResult(statistic=15.60209045935345, pvalue=0.21014808471492433, dof=12, expected_freq=array([[ 2.04615385,  9.93846154, 26.01538462],
       [ 0.86153846,  4.18461538, 10.95384615],
       [ 1.18461538,  5.75384615, 15.06153846],
       [ 0.16153846,  0.78461538,  2.05384615],
       [ 1.23846154,  6.01538462, 15.74615385],
       [ 1.45384615,  7.06153846, 18.48461538],
       [ 0.05384615,  0.26153846,  0.68461538]])) 
 [[ 2.04615385  9.93846154 26.01538462]
 [ 0.861

In [62]:
norow = len(tab)
nocol = len(tab.columns)
dof = (norow-1) * (nocol-1)
alpha = 0.05
dof

12

In [63]:
chi_sq = sum([(o-e)**2/e for o,e in zip(obs,exp)])
#chi_sq_st = chi_sq[0]+chi_sq[1]+chi_sq[2]

chi_sq_st = chi_sq.sum()
print(chi_sq, '\n','\n',chi_sq_st)


[2.72203854 9.96721511 2.9128368 ] 
 
 15.60209045935345


In [64]:
crit = chi2.ppf(q=1-alpha, df=dof)
crit

21.02606981748307

In [65]:
pval = 1 - chi2.cdf(x=chi_sq_st, df =dof)
pval

0.2101480847149243

In [66]:
if chi_sq_st >= crit:
    print('reject h0, there is a relationship')
else:
    print('accept h0, there is no relationship')

accept h0, there is no relationship
