In [None]:
## CHI-SQUARE TEST

In [1]:
import numpy as np
import scipy.stats as stats

# Step 1: Observed data (contingency table)
observed = np.array([[50, 70], 
                     [80, 100], 
                     [60, 90], 
                     [30, 50], 
                     [20, 50]])



In [2]:
# Step 2: Calculate row totals, column totals, and grand total
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
grand_total = observed.sum()

In [3]:
# Step 3: Calculate the expected frequencies
expected = np.outer(row_totals, col_totals) / grand_total

In [4]:
# Step 4: Compute the Chi-Square statistic
chi_square_statistic = ((observed - expected) ** 2 / expected).sum()

In [5]:
# Step 5: Degrees of freedom (df = (r - 1) * (c - 1))
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

In [6]:
# Step 6: Critical value at alpha = 0.05
alpha = 0.05
critical_value = stats.chi2.ppf(1 - alpha, df)

In [7]:
# Step 7: Make the decision
if chi_square_statistic > critical_value:
    decision = "Reject the null hypothesis (there is an association)."
else:
    decision = "Fail to reject the null hypothesis (no association)."

In [8]:
# Step 8: Output the results
print("Observed Frequencies:")
print(observed)

print("\nExpected Frequencies:")
print(expected)

print(f"\nChi-Square Statistic: {chi_square_statistic}")
print(f"Critical Value at alpha = {alpha}: {critical_value}")
print(f"\nDegrees of Freedom: {df}")
print(f"\nDecision: {decision}")

Observed Frequencies:
[[ 50  70]
 [ 80 100]
 [ 60  90]
 [ 30  50]
 [ 20  50]]

Expected Frequencies:
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]

Chi-Square Statistic: 5.638227513227513
Critical Value at alpha = 0.05: 9.487729036781154

Degrees of Freedom: 4

Decision: Fail to reject the null hypothesis (no association).
