# Chi-Square Test of Independence
### Association between Device Type and Customer Satisfaction

In [3]:
import numpy as np
import pandas as pd
from scipy.stats import chi2

In [4]:
# H0: Device type and customer satisfaction are independent
# H1: Device type and customer satisfaction are not independent

In [5]:
observed = np.array([
    [50, 70],   # Very Satisfied
    [80, 100],  # Satisfied
    [60, 90],   # Neutral
    [30, 50],   # Unsatisfied
    [20, 50]    # Very Unsatisfied
])

In [6]:
row_labels = ["Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied"]
col_labels = ["Smart Thermostat", "Smart Light"]

observed_df = pd.DataFrame(observed, index=row_labels, columns=col_labels)
display(observed_df)


Unnamed: 0,Smart Thermostat,Smart Light
Very Satisfied,50,70
Satisfied,80,100
Neutral,60,90
Unsatisfied,30,50
Very Unsatisfied,20,50


In [7]:
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
N = observed.sum()

In [8]:
expected = np.outer(row_totals, col_totals) / N
expected_df = pd.DataFrame(expected, index=row_labels, columns=col_labels)
display(expected_df.round(2))

Unnamed: 0,Smart Thermostat,Smart Light
Very Satisfied,48.0,72.0
Satisfied,72.0,108.0
Neutral,60.0,90.0
Unsatisfied,32.0,48.0
Very Unsatisfied,28.0,42.0


## 4. Chi-Square Statistic

In [9]:
chi_square_contrib = (observed - expected)**2 / expected
chi_square_stat = chi_square_contrib.sum()

chi_square_contrib_df = pd.DataFrame(chi_square_contrib, index=row_labels, columns=col_labels)
display(chi_square_contrib_df.round(4))

print("Chi-Square Statistic:", round(chi_square_stat, 4))

Unnamed: 0,Smart Thermostat,Smart Light
Very Satisfied,0.0833,0.0556
Satisfied,0.8889,0.5926
Neutral,0.0,0.0
Unsatisfied,0.125,0.0833
Very Unsatisfied,2.2857,1.5238


Chi-Square Statistic: 5.6382


In [10]:
r, c = observed.shape # rows and colunms
df = (r-1) * (c-1)
print("Degrees of Freedom:", df)

Degrees of Freedom: 4


In [11]:
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, df)
print(f"Critical Value (alpha={alpha}, df={df}):", round(critical_value, 4))

Critical Value (alpha=0.05, df=4): 9.4877


# Final Desicion

In [12]:
if chi_square_stat > critical_value:
    print("Decision: Reject H0")
    print("Conclusion: There IS a significant association between device type and satisfaction.")
else:
    print("Decision: Fail to Reject H0")
    print("Conclusion: No significant evidence of an association between device type and satisfaction.")

Decision: Fail to Reject H0
Conclusion: No significant evidence of an association between device type and satisfaction.


# Conclusion

At the 5% significance level, there is not sufficient evidence to conclude that customer satisfaction level is associated with device type (Smart Thermostat vs. Smart Light). In other words, the observed differences are consistent with random variation under independence.