### Chi-Square Test for Independence (Example 1)

In [18]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# Create a contingency table
data=np.array([[50,30],[70,50]])
data
# Perform the Chi-Square Test
chi2, p_value, dof, expected = chi2_contingency(data)

print(f"Chi-squere statistics : {chi2:.3f}")
print(f"P_value : {p_value:.3f}")
print("Degree of freedom : ",dof)
print("Expected frequency : ",expected)


# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Gender and product preference are dependent.")
else:
    print("Fail to reject the null hypothesis: Gender and product preference are independent.")

Chi-squere statistics : 0.195
P_value : 0.659
Degree of freedom :  1
Expected frequency :  [[48. 32.]
 [72. 48.]]
Fail to reject the null hypothesis: Gender and product preference are independent.


### Chi-Square Goodness of Fit Test (Example 2)

In [21]:
from scipy.stats import chisquare

# Observed frequencies
observed = [20, 15, 25, 20, 18, 22]
# Expected frequencies (equal probabilities for a fair dice)
expected = [120/6] * 6

# Perform the Goodness of Fit Test
chi2, p = chisquare(f_obs=observed, f_exp=expected)

# Print results
print("Chi-Square Statistic:", chi2)
print("P-value:", p)

# Interpretation
alpha = 0.05
if p < alpha:
    print("Reject the null hypothesis: The dice is not fair.")
else:
    print("Fail to reject the null hypothesis: The dice is fair.")


Chi-Square Statistic: 2.9000000000000004
P-value: 0.7153995143435801
Fail to reject the null hypothesis: The dice is fair.


### Chi-Square Test for Independence (example 3)

In [26]:
# Contingency table for smoker status and disease outcome
data = np.array([[90, 110], [60, 140]])
# n.b   [90,110]=spoker and [60,140]=non_smoker
# Perform Chi-Square Test
chi2, p, dof, expected = chi2_contingency(data)

# Display results
print("Chi-Square Statistic:", chi2)
print("P-value:", p)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

# Interpretation
alpha = 0.05
if p < alpha:
    print("Reject the null hypothesis: Smoking status and disease outcome are dependent.")
else:
    print("Fail to reject the null hypothesis: Smoking status and disease outcome are independent.")


Chi-Square Statistic: 8.970666666666666
P-value: 0.002743484745753856
Degrees of Freedom: 1
Expected Frequencies:
 [[ 75. 125.]
 [ 75. 125.]]
Reject the null hypothesis: Smoking status and disease outcome are dependent.


In [29]:
import pandas as pd
from sklearn.feature_selection import chi2
from sklearn.preprocessing import LabelEncoder
from scipy.stats import chi2_contingency

# Load the Titanic dataset
data = {
    'Pclass': [1, 2, 3, 1, 3, 2, 3, 1, 3, 3],
    'Sex': ['male', 'female', 'female', 'female', 'male', 'male', 'female', 'male', 'female', 'male'],
    'Survived': [1, 1, 0, 1, 0, 0, 1, 0, 1, 0],
}
df = pd.DataFrame(data)

# Encode categorical variables
encoder = LabelEncoder()
df['Sex_Encoded'] = encoder.fit_transform(df['Sex'])

# Separate features and target
X = df[['Pclass', 'Sex_Encoded']]
y = df['Survived']

# Chi-Square Test using sklearn
chi_scores, p_values = chi2(X, y)
chi_results = pd.DataFrame({'Feature': X.columns, 'Chi-Square Score': chi_scores, 'P-Value': p_values})
print("Chi-Square Test Results:")
print(chi_results)

# Perform Chi-Square Test manually for Sex
contingency_table = pd.crosstab(df['Sex'], df['Survived'])
chi2_stat, p, dof, expected = chi2_contingency(contingency_table)

print("\nChi-Square Test for 'Sex':")
print(f"Chi-Square Statistic: {chi2_stat:.3f}")
print(f"P-Value: {p:.3f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies:")
print(expected)

# Interpretation
alpha = 0.05
if p < alpha:
    print("Reject the null hypothesis: 'Sex' is significantly associated with survival.")
else:
    print("Fail to reject the null hypothesis: 'Sex' is not significantly associated with survival.")


Chi-Square Test Results:
       Feature  Chi-Square Score   P-Value
0       Pclass          0.181818  0.669815
1  Sex_Encoded          1.800000  0.179712

Chi-Square Test for 'Sex':
Chi-Square Statistic: 1.600
P-Value: 0.206
Degrees of Freedom: 1
Expected Frequencies:
[[2.5 2.5]
 [2.5 2.5]]
Fail to reject the null hypothesis: 'Sex' is not significantly associated with survival.
