In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2

In [2]:
# Null Hypothesis (H0)
H0 = "There is no significant association between the type of smart home device purchased and the customer satisfaction level."
# Alternative Hypothesis (H1)
H1 = "There is a significant association between the type of smart home device purchased and the customer satisfaction level."

In [3]:
# Read the provided data
data = pd.read_csv("ChiSquareData.csv")
data

Unnamed: 0,Satisfaction,Smart Thermostat,Smart Light,Total
0,Very Satisfied,50,70,120
1,Satisfied,80,100,180
2,Neutral,60,90,150
3,Unsatisfied,30,50,80
4,Very Unsatisfied,20,50,70
5,Total,240,360,600


In [4]:
# Extract the given frequencies 
observed_frequencies = data.iloc[:-1, 1:-1].values
observed_frequencies

array([[ 50,  70],
       [ 80, 100],
       [ 60,  90],
       [ 30,  50],
       [ 20,  50]])

In [5]:
row_totals = np.sum(observed_frequencies, axis=1)
col_totals = np.sum(observed_frequencies, axis=0)

# Calculate the grand total of observed frequencies
grand_total = np.sum(observed_frequencies)
print(grand_total)
# Calculate expected frequencies. Here np.outer gives the outer product of each row total and column total
expected_frequencies = np.outer(row_totals, col_totals) / grand_total
print(expected_frequencies)

600
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]


In [6]:
# We can calculate the chi2 result using the below formula
chi2_statistic = np.sum((observed_frequencies - expected_frequencies)**2 / expected_frequencies)
print(chi2_statistic)

5.638227513227513


In [7]:
alpha = 0.05
# Degrees of freedom = (rows-1)*(columns-1)
# data.shape[0] = rows ; data.shape[1] = columns (We have to remove the Satisfaction and Total columns)
dof = ((data.shape[0]-1)-1)*((data.shape[1]-2)-1)
print(dof)

4


In [8]:
# We can calculate the critical value using chi2 percent point function (for 5% alpha it is around 9.48)
critical_value = chi2.ppf(1-alpha,dof)
print(critical_value)

9.487729036781154


In [9]:
# We can reject the null hypothesis if Chi-squared calculated value is greater than Chi-square critical value
if chi2_statistic>critical_value:
    print(H1)
else:
    print(H0)

There is no significant association between the type of smart home device purchased and the customer satisfaction level.
