## CHI-SQUARE TEST

In [2]:
import scipy.stats as stats

In [3]:
import pandas as pd
import seaborn as sns
import numpy as np


In [4]:
data = {
    "Satisfaction Level": ["Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied","Total"],
    "Smart Thermostat": [50, 80, 60, 30, 20,240],
    "Smart Light": [70, 100, 90, 50, 50,360],
    "Total" : [120,180,150,80,70,600]
}
df = pd.DataFrame(data)

In [5]:
df.head(6)

Unnamed: 0,Satisfaction Level,Smart Thermostat,Smart Light,Total
0,Very Satisfied,50,70,120
1,Satisfied,80,100,180
2,Neutral,60,90,150
3,Unsatisfied,30,50,80
4,Very Unsatisfied,20,50,70
5,Total,240,360,600


## State the Hypotheses

In [10]:
# Extracting observed values
observed_values = np.array([
    [50, 70],
    [80, 100],
    [60, 90],
    [30, 50],
    [20, 50]
])

In [11]:
print("Observed values :",observed_values)

Observed values : [[ 50  70]
 [ 80 100]
 [ 60  90]
 [ 30  50]
 [ 20  50]]


In [19]:
# Perform Chi-Square Test for independence

chi2_stat, p_value, dof, expected = chi2_contingency(observed_values)

In [20]:
from scipy.stats import chi2_contingency , chi2

In [21]:
# Display results of the test

print("\nChi-Square Test Results:")
print(f"Chi-Square Statistic: {chi2_stat}")
print(f"P-Value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies:")
print(expected)


Chi-Square Test Results:
Chi-Square Statistic: 5.638227513227513
P-Value: 0.22784371130697179
Degrees of Freedom: 4
Expected Frequencies:
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]


In [22]:
# Contingency table data
observed = np.array([[50, 70],
                     [80, 100],
                     [60, 90],
                     [30, 50],
                     [20, 50]])

# Calculate expected frequencies
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
total = observed.sum()

expected = np.outer(row_totals, col_totals) / total

# Compute Chi-Square statistic
chi2 = np.sum((observed - expected)**2 / expected)

print("Chi-Square Statistic (Manual):", chi2)

Chi-Square Statistic (Manual): 5.638227513227513


In [23]:
from scipy.stats import chi2

alpha = 0.05
df = 4

critical_value = chi2.ppf(1 - alpha, df)

print("Critical Value:", critical_value)

Critical Value: 9.487729036781154


In [24]:
# Chi-Square statistic from earlier calculation
chi2_statistic = 5.638227513227513  

# Critical value from earlier calculation
critical_value = 9.487729036781154  

# Make a decision
if chi2_statistic > critical_value:
    print("Reject the null hypothesis. There is a significant association between device type and satisfaction level.")
else:
    print("Fail to reject the null hypothesis. There is no significant association between device type and satisfaction level.")

Fail to reject the null hypothesis. There is no significant association between device type and satisfaction level.


## Conclusion

In [25]:
# Based on the Chi-Square test, the calculated chi-square statistic is 5.64, which is less than the critical value of 9.49 at the 5% significance level.

# Therefore, we fail to reject the null hypothesis.

# This means that there is no statistically significant association between device type and satisfaction level


## HYPOTHESIS TESTING

### State the hypothesis statement

In [33]:
from scipy.stats import norm

# Given data
# Mean weekly cost observed from the sample of 25 restaurants
sample_mean = 3050  

# Theoretical mean weekly cost based on the model
theoretical_mean = 1000 + 5 * 600  

# Sample Size in given example
sample_size = 25

# Mean number of units produced in a week
population_mean = 600 

# Standard deviation of number of units produced in a week
population_std = 25  

In [37]:
# Calculate the standard error of the sample mean
standard_error = population_std / (sample_size ** 0.5)

# Calculate the z-score
z_score = (sample_mean - theoretical_mean) / standard_error

# Calculate the p-value for the one-tailed test
p_value = 1 - norm.cdf(z_score)

In [38]:
# Set the significance level
alpha = 0.05

In [39]:
# State the hypotheses
print("Null Hypothesis (H0): The mean weekly operating cost is equal to the theoretical model's prediction.")
print("Alternative Hypothesis (H1): The mean weekly operating cost is greater than the theoretical model's prediction.")

Null Hypothesis (H0): The mean weekly operating cost is equal to the theoretical model's prediction.
Alternative Hypothesis (H1): The mean weekly operating cost is greater than the theoretical model's prediction.


In [50]:
# Make a decision based on the p-value
if p_value < alpha:
    print("Reject the null hypothesis. There is sufficient evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.")

Fail to reject the null hypothesis. There is not enough evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.


### Calculate the Test Statistic

In [40]:
import scipy.stats as stats

# Given data
sample_mean = 3050  # Sample mean weekly cost
theoretical_mean = 1000 + 5 * 600  # Theoretical mean weekly cost based on the model
population_std = 5 * 25  # Standard deviation of the population
sample_size = 25  # Sample size

In [41]:
# Calculate the standard error
standard_error = population_std / (sample_size ** 0.5)

# Calculate the test statistic (t)
t_statistic = (sample_mean - theoretical_mean) / standard_error

print("Test Statistic (t):", t_statistic)

Test Statistic (t): -38.0


In [42]:
# Degrees of freedom
df = sample_size - 1

In [51]:
# Calculate the critical value
alpha = 0.05  # Significance level
critical_value = stats.t.ppf(1 - alpha, df)

print("Critical Value (t_critical):", critical_value)

Critical Value (t_critical): 1.7108820799094275


### Determine the Critical Value

In [44]:
alpha = 0.05  # Significance level
critical_value = norm.ppf(1 - alpha) 
print("Critical Value (Z_critical):", critical_value)

Critical Value (Z_critical): 1.6448536269514722


### Make Decision

In [48]:
# Test statistic and critical value from previous calculations
test_statistic = -38.0
critical_value = 1.645  
# Significance level (alpha)
alpha = 0.05



In [49]:
# Make a decision
if test_statistic > critical_value:
    print("Reject the null hypothesis. There is sufficient evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.")

Fail to reject the null hypothesis. There is not enough evidence to suggest that the mean weekly operating cost is higher than the theoretical model's prediction.


## Conclusion

In [47]:
# We fail to reject the null hypothesis.

# This means there is not enough evidence to say that the mean weekly operating cost is higher than what the theoretical model predicts.