In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import chi2
import os

In [3]:
# Load csv file
cat = pd.read_csv("F:\\github\\doon_pet_survey\\cat_combine.csv")
cat_dep_var = ['cat_hunt_yn', 'cat_hunt_freq'] # Add cat_hunt
cat_indep_var = ['cat_sex','cat_neutered','cat_describe',
             'cat_age','cat_time',
             'cat_time_out','cat_stay',
             'cat_feed_freq'] # Add cat_feed
dog = pd.read_csv("F:\\github\\doon_pet_survey\\dog_combine.csv")
dog_dep_var = ['dog_hunt_yn','dog_hunt_freq'] # Add dog_hunt
dog_indep_var = ['dog_neutered', 'dog_sex', 'dog_age']

In [1]:

# function that runs chisq for two pandas series

def run_chi2(df, s1, s2, alpha):
    
    df1 = df.dropna()
    
    c_tab = pd.crosstab(df1[s1], df1[s2]) # Create contingency table
    
    obs = c_tab.values # Observed values
    vals = stats.chi2_contingency(c_tab) # Run Chi2 test of independence of variables
    exp = vals[3] # Expected values
    
    norow = len(c_tab) # No of rows in c_tab
    nocol = len(c_tab.columns) # No of cols in c_tab
    dof = (norow-1) * (nocol-1) # Degree of freedom
    
    chi_sq = sum([(o-e)**2/e for o,e in zip(obs,exp)]).sum() # Find chi2 value
    crit = chi2.ppf(q = 1-alpha, df=dof) # Find critical value using alpha and degree of freedom
    
    pval = 1-chi2.cdf(x=chi_sq, df =dof)
    
    if chi_sq >= crit:
          chi_sq_result = 'reject h0, there is a relationship'
    else:
        chi_sq_result = 'accept h0, there is no  relationship'
    if pval <= alpha:
        pval_result = 'reject h0, there is a relationship'
    else:
        pval_result = 'accept h0, there is no relationship'
    
    return f'{chi_sq_result} Chi square value = {chi_sq} and Critical value = {crit} {pval_result} as P value = {pval}\n'
    

In [4]:
res = run_chi2(df= cat, s1='cat_time_out', s2='cat_hunt_yn', alpha = 0.05)
res

'accept h0, there is no  relationship Chi square value = 0.0 and Critical value = nan accept h0, there is no relationship as P value = nan\n'

In [None]:
for dv in cat_dep_var:
    for iv in cat_indep_var:
        res = run_chi2(df= cat, s1=dv, s2=iv, alpha = 0.05)
        print(res)

In [None]:
with open('chisq_test_cat.txt','w') as f:
    for dv in cat_dep_var:
        for iv in cat_indep_var:
            res = run_chi2(df= cat, s1=dv, s2=iv, alpha = 0.05)
            f.write(f'Result for {dv} and {iv}: \n{res}\n\n')


In [5]:
filename = 'chisq_test2'
if not os.path.isfile(filename):
    # If the file does not exist, create it
    with open(filename, 'w') as file:
        file.write('')  # Create an empty file
    print(f"'{filename}' has been created.")
else:
    print(f"'{filename}' already exists.")

'chisq_test2' already exists.


In [9]:
df=cat
s1= 'cat_age'
s2 = 'cat_sex'

df1 = df.dropna()
    
c_tab = pd.crosstab(df[s1], df[s2]) # Create contingency table
c_tab


cat_sex,Female,Male
cat_age,Unnamed: 1_level_1,Unnamed: 2_level_1
Adult (3-6 years),26,14
Junior (7 months - 2 years),24,31
Kitten (0-6 months),16,15
Mature (7-10 years),3,0
Senior (11-14 years),2,0
