# Chi Squared Test

* evaluates whether there is a significant association between two categorical variables
* tests the null hypothesis that the variables are independent
* assumptions: the data are categorical, randomly sampled, and independent

In [1]:
# Chi-square Goodness-of-Fit Test
# determines if an observed frequency distribution significantly differs from an expected distribution
from scipy.stats import chisquare

# Observed frequencies
observed_frequencies = [9, 11, 16, 10, 9, 5] 

# Expected frequencies (e.g., for a fair six-sided die rolled 60 times)
expected_frequencies = [10, 10, 10, 10, 10, 10]

# Perform the chi-square goodness-of-fit test
chi2_statistic, p_value = chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)

print(f"Chi-square Statistic: {chi2_statistic}")
print(f"P-value: {p_value}")

Chi-square Statistic: 6.4
P-value: 0.2692187989871035


In [2]:
# Chi-square Test for Independence
# assesses whether there is a significant relationship between two categorical variables
# contingency here refers to the relationship between two categorical vars
from scipy.stats import chi2_contingency
import pandas as pd

# Create a contingency table (example: gender vs. product preference)
data = {'Product A': [30, 20], 'Product B': [15, 25], 'Product C': [5, 10]}
contingency_table = pd.DataFrame(data, index=['Male', 'Female'])

print("Contingency Table:")
print(contingency_table)

# Perform the chi-square test for independence
chi2_statistic, p_value, degrees_of_freedom, expected_frequencies = chi2_contingency(contingency_table)

print(f"\nChi-square Statistic: {chi2_statistic}")
print(f"P-value: {p_value}")
print(f"Degrees of Freedom: {degrees_of_freedom}")
print(f"Expected Frequencies:\n{expected_frequencies}")

Contingency Table:
        Product A  Product B  Product C
Male           30         15          5
Female         20         25         10

Chi-square Statistic: 5.942045454545454
P-value: 0.05125086785900626
Degrees of Freedom: 2
Expected Frequencies:
[[23.80952381 19.04761905  7.14285714]
 [26.19047619 20.95238095  7.85714286]]


# Documentation

[chisquare](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chisquare.html)

[chi2_contingency](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2_contingency.html)