In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chisquare, chi2_contingency

# Create a dataset with 10 rows
data = {
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'Preference': ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'B', 'A', 'A']
}
df = pd.DataFrame(data)
print(df)

   Gender Preference
0    Male          A
1  Female          B
2    Male          A
3  Female          A
4    Male          B
5  Female          B
6    Male          A
7  Female          B
8    Male          A
9  Female          A


In [2]:
# Chi-Square Goodness-of-Fit
# H0: Preferences are equally distributed (50% A, 50% B)
observed_counts = df['Preference'].value_counts().sort_index()
expected_counts = [5, 5] # Since n=10, expect 5 of each
print(f"Observed: {observed_counts.values}")
print(f"Expected: {expected_counts}")

chi2_stat, p_val = chisquare(f_obs=observed_counts, f_exp=expected_counts)
print(f"Chi-Square Goodness-of-Fit: Stat={chi2_stat:.4f}, P-value={p_val:.4f}")

Observed: [6 4]
Expected: [5, 5]
Chi-Square Goodness-of-Fit: Stat=0.4000, P-value=0.5271


In [3]:
# Chi-Square Test of Independence
# H0: Gender and Preference are independent
contingency_table = pd.crosstab(df['Gender'], df['Preference'])
print("\nContingency Table:")
print(contingency_table)

chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-Square Test of Independence: Stat={chi2:.4f}, P-value={p:.4f}")


Contingency Table:
Preference  A  B
Gender          
Female      2  3
Male        4  1
Chi-Square Test of Independence: Stat=0.4167, P-value=0.5186
