# Chi-square_Test

## Import Dependencies

In [1]:
import pandas as pd 
import math 
from scipy.stats import chi2_contingency

## Read the Data

In [2]:
dict = {'satisfaction': ['Very Satisfied','Satisfied','Neutral','Unsatisfied','Very Unsatisfied'],
       'smart_thermostat':[50,80,60,30,20], 'smart_light':[70,100,90,50,50],
       'total':[120,180,150,80,70]}
data = pd.DataFrame(data=dict)
data.head()

Unnamed: 0,satisfaction,smart_thermostat,smart_light,total
0,Very Satisfied,50,70,120
1,Satisfied,80,100,180
2,Neutral,60,90,150
3,Unsatisfied,30,50,80
4,Very Unsatisfied,20,50,70


## State the Hypotheses:

Null Hypothesis (H₀): There is no association between the type of device purchased and customer satisfaction level (they are independent).

Alternative Hypothesis (H₁): There is an association between the type of device purchased and customer satisfaction level (they are not independent).

In [3]:
Alternative_Hypothesis = 'There is an association between the type of device purchased and customer satisfaction level (they are not independent).'

Null_Hypothesis: 'There is no association between the type of device purchased and customer satisfaction level (they are independent).'

## Calculate the Expected Frequencies:

In [4]:
smart_thermostate = []

smart_light = []

grand_total = data.total.sum()

for i in range(0,2): 
    for j in range(0,len(data)): 
        if i == 0:
            expected_frequencies = (data.smart_thermostat.sum() * data['total'][j])/grand_total
            smart_thermostate.append(expected_frequencies)
        else : 
            expected_frequencies = (data.smart_light.sum() * data['total'][j])/grand_total
            smart_light.append(expected_frequencies)



In [5]:
expected_frequencies_data = pd.DataFrame(data={'satisfaction':list(data['satisfaction']),'smart_thermostate':smart_thermostate,'smart_light':smart_light})
expected_frequencies_data.head()

Unnamed: 0,satisfaction,smart_thermostate,smart_light
0,Very Satisfied,48.0,72.0
1,Satisfied,72.0,108.0
2,Neutral,60.0,90.0
3,Unsatisfied,32.0,48.0
4,Very Unsatisfied,28.0,42.0


## Calculate the Chi-square Statistic:

In [6]:
Chi_square = []

for i in range(0,2):
    for j in range(0,len(data)): 
        if i == 0: 
            Chi_square.append(math.pow(data['smart_thermostat'][j]-expected_frequencies_data['smart_thermostate'][j],2)/expected_frequencies_data['smart_thermostate'][j])
        else : 
            Chi_square.append(math.pow(data['smart_light'][j]-expected_frequencies_data['smart_light'][j],2)/expected_frequencies_data['smart_light'][j])

sum(Chi_square)

5.638227513227513

## Calculate the Degrees of Freedom:

In [7]:
degrees_of_freedom = (len(data)-1) * (len(data.drop(columns=['satisfaction','total']).columns)-1)
degrees_of_freedom

4

## Find the Critical Value:

In [8]:
alpha = 0.05
p_value = 0.227846

### The P-Value is 0.227846 i got this from online p-value calculator

In [9]:
print(f'{p_value} > {alpha}')

0.227846 < 0.05


In [14]:
print('Since the p-value is greater than the significance level (α), we failed to reject the null hypothesis. \nThis indicates that there is a no significant association between the type of smart home device purchased and the customer satisfaction level.')

Since the p-value is greater than the significance level (α), we failed to reject the null hypothesis. 
This indicates that there is a no significant association between the type of smart home device purchased and the customer satisfaction level.


## Chi-square using stats module

In [11]:
chi2, p, dof, expected = chi2_contingency(data.drop(columns=['satisfaction','total']))

In [12]:
print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

Chi-square Statistic: 5.638227513227513
P-value: 0.22784371130697179
Degrees of Freedom: 4
Expected Frequencies:
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]
