Chi-Square Testing

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, chi2

In [2]:
data = {
    'Very Satisfied':[50,70],
    'Satisfied':[80,100],
    'Neutral':[60,90],
    'Unsatisfied':[30,50],
    'Very Unsatisfied':[20,50]
}

In [3]:
df = pd.DataFrame(data, index=['Smart Thermostat','Smart Light'])
print('Contingency Table:\n',df,'\n')

Contingency Table:
                   Very Satisfied  Satisfied  Neutral  Unsatisfied  \
Smart Thermostat              50         80       60           30   
Smart Light                   70        100       90           50   

                  Very Unsatisfied  
Smart Thermostat                20  
Smart Light                     50   



In [5]:
print('Step1: State the Hypotheses')
print('H0: There is no association between device type and customer satisfaction.')
print('H1: There is a significant association between device type and customer satisfaction.')

Step1: State the Hypotheses
H0: There is no association between device type and customer satisfaction.
H1: There is a significant association between device type and customer satisfaction.


In [6]:
chi2_stat, p_val, dof, expected = chi2_contingency(df)

In [9]:
print('Step2: Chi-Square Test Results')
print(f'Chi-Square Statistic = {chi2_stat:.4f}')
print(f'Degrees of Freedom = {dof}')
print(f'P-Value = {p_val:.4f}')
print('\nExpected Frequencies:\n', pd.DataFrame(expected, index=df.index, columns=df.columns))

Step2: Chi-Square Test Results
Chi-Square Statistic = 5.6382
Degrees of Freedom = 4
P-Value = 0.2278

Expected Frequencies:
                   Very Satisfied  Satisfied  Neutral  Unsatisfied  \
Smart Thermostat            48.0       72.0     60.0         32.0   
Smart Light                 72.0      108.0     90.0         48.0   

                  Very Unsatisfied  
Smart Thermostat              28.0  
Smart Light                   42.0  


In [10]:
alpha = 0.5
critical_value = chi2.ppf(1 - alpha, dof)
print('Step3: Critical Value')
print(f'Critical Value at alpha={alpha} and dof={dof} = {critical_value:.4f}')

Step3: Critical Value
Critical Value at alpha=0.5 and dof=4 = 3.3567


In [11]:
print('Step4: Decision')
if chi2_stat > critical_value:
    print('Reject the Null Hypothesis (H0).')
    print('Conclusion: There is a significant association between device type and customer satisfaction.')
else:
    print('Fail to Reject the Null Hypothesis (H0).')
    print('Conclusion: There is no significant association between device type and customer satisfaction.') 

Step4: Decision
Reject the Null Hypothesis (H0).
Conclusion: There is a significant association between device type and customer satisfaction.


Hypothesis Testing

In [12]:
import math
from scipy.stats import norm

In [13]:
sample_mean = 3050  #x
n = 25              #sample size
X_mean = 600        #mean number of units produced
sigma_X = 25        #std dev of units
fixed_cost = 1000
variable_cost = 5 

In [14]:
print('Step1: Hypothese')
print('H0: The weekly operating cost follows the theoretical model (W = 1000 + 5X).')
print('H1: The weekly operating cost is higher than the theoretical model.')

Step1: Hypothese
H0: The weekly operating cost follows the theoretical model (W = 1000 + 5X).
H1: The weekly operating cost is higher than the theoretical model.


In [16]:
theoretical_mean = fixed_cost + variable_cost * X_mean
print('Step2: Theoretical mean weeekly cost')
print(f'Theoretical Mean(μ) = {theoretical_mean}')

Step2: Theoretical mean weeekly cost
Theoretical Mean(μ) = 4000


In [17]:
sigma = variable_cost * sigma_X
print('step3: Standard deviation of weekly cost')
print(f'σ = {sigma}') 

step3: Standard deviation of weekly cost
σ = 125


In [18]:
test_stat = (sample_mean - theoretical_mean) / (sigma / math.sqrt(n))
print('Step4: Test Statistic')
print(f'Z = {test_stat:.4f}')

Step4: Test Statistic
Z = -38.0000


In [19]:
alpha = 0.5
critical_value = norm.ppf(1 - alpha)
print('Step5: Critical Value')
print(f'Critical Value (Z_critical at α=0.05) = {critical_value:.4f}')

Step5: Critical Value
Critical Value (Z_critical at α=0.05) = 0.0000


In [20]:
print('Step6: Decision')
if test_stat > critical_value:
    print('Reject H0: There is evidence that weekly operating costs are higher than the model suggests.')
else:
    print('Fail to Reject H0: There is no sufficient evidence that costs are higher than the model suggests.')

Step6: Decision
Fail to Reject H0: There is no sufficient evidence that costs are higher than the model suggests.
