### CHI-SQUARE TEST

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency, chi2

In [2]:
data = {
    "Satisfaction": ["Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied"],
    "Smart Thermostat": [50, 80, 60, 30, 20],
    "Smart Light": [70, 100, 90, 50, 50],
}


In [3]:
df = pd.DataFrame(data)

df.set_index("Satisfaction", inplace=True)

In [4]:
df.head(3)

Unnamed: 0_level_0,Smart Thermostat,Smart Light
Satisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
Very Satisfied,50,70
Satisfied,80,100
Neutral,60,90


In [5]:
# Adding row and column totals
df["Total"] = df.sum(axis=1)
col_totals = df.sum(axis=0)

In [6]:
df.loc["Total"] = col_totals
print("Contingency Table:")
print(df)

Contingency Table:
                  Smart Thermostat  Smart Light  Total
Satisfaction                                          
Very Satisfied                  50           70    120
Satisfied                       80          100    180
Neutral                         60           90    150
Unsatisfied                     30           50     80
Very Unsatisfied                20           50     70
Total                          240          360    600


In [7]:
# 1. State the Hypotheses
print("\nHypotheses:")
print("H0: There is no association between the type of smart home device and customer satisfaction.")
print("H1: There is an association between the type of smart home device and customer satisfaction.")


Hypotheses:
H0: There is no association between the type of smart home device and customer satisfaction.
H1: There is an association between the type of smart home device and customer satisfaction.


In [8]:
# 2. Compute the Chi-Square Statistic
observed = np.array([
    [50, 70],
    [80, 100],
    [60, 90],
    [30, 50],
    [20, 50]
])

In [9]:
chi2_stat, p, dof, expected = chi2_contingency(observed)

In [10]:
print("\nChi-Square Test Results:")
print(f"Chi-Square Statistic: {chi2_stat:.4f}")
print(f"p-value: {p:.4f}")
print(f"Degrees of Freedom: {dof}")


Chi-Square Test Results:
Chi-Square Statistic: 5.6382
p-value: 0.2278
Degrees of Freedom: 4


In [11]:
# Displaying the expected frequencies
expected_df = pd.DataFrame(
    expected,
    index=["Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied"],
    columns=["Smart Thermostat", "Smart Light"]
)
print("\nExpected Frequencies:")
print(expected_df)


Expected Frequencies:
                  Smart Thermostat  Smart Light
Very Satisfied                48.0         72.0
Satisfied                     72.0        108.0
Neutral                       60.0         90.0
Unsatisfied                   32.0         48.0
Very Unsatisfied              28.0         42.0


In [12]:
# 3. Determine the Critical Value
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, dof)

print("\nCritical Value:")
print(f"Critical Value (alpha={alpha}): {critical_value:.4f}")


Critical Value:
Critical Value (alpha=0.05): 9.4877


In [13]:
# 4. Make a Decision
if chi2_stat > critical_value:
    print("\nDecision: Reject the null hypothesis (H0).")
    print("Conclusion: There is a significant association between the type of smart home device and customer satisfaction.")
else:
    print("\nDecision: Fail to reject the null hypothesis (H0).")
    print("Conclusion: There is no significant association between the type of smart home device and customer satisfaction.")


Decision: Fail to reject the null hypothesis (H0).
Conclusion: There is no significant association between the type of smart home device and customer satisfaction.


### HYPOTHESIS TESTING

In [14]:
import numpy as np
from scipy.stats import norm

In [15]:
# 1. State the Hypotheses
print("Hypotheses:")
print("H0: The weekly operating cost follows the theoretical model W = $1,000 + $5X.")
print("H1: The weekly operating cost is higher than the theoretical model W = $1,000 + $5X.")

Hypotheses:
H0: The weekly operating cost follows the theoretical model W = $1,000 + $5X.
H1: The weekly operating cost is higher than the theoretical model W = $1,000 + $5X.


In [16]:
# Given Data
sample_mean = 3050  # Rs.
theoretical_mean = 1000 + 5 * 600  # Calculated theoretical weekly cost for X = 600
sigma = 5 * 25  # Standard deviation of weekly costs
n = 25  # Sample size

In [17]:
# 2. Calculate the Test Statistic
t_stat = (sample_mean - theoretical_mean) / (sigma / np.sqrt(n))

In [19]:
print("\nTest Statistic Calculation:")
print(f"Sample Mean: {sample_mean}")
print(f"Theoretical Mean: {theoretical_mean}")
print(f"Standard Deviation : {sigma}")
print(f"Sample size : {n}")
print(f"Test Statistic (t): {t_stat:.4f}")


Test Statistic Calculation:
Sample Mean: 3050
Theoretical Mean: 4000
Standard Deviation : 125
Sample size : 25
Test Statistic (t): -38.0000


In [20]:
# 3. Determine the Critical Value
alpha = 0.05
critical_value = norm.ppf(1 - alpha)  # Z-value for a one-tailed test

print("\nCritical Value:")
print(f"Alpha (\u03b1): {alpha}")
print(f"Critical Value (Z): {critical_value:.4f}")



Critical Value:
Alpha (α): 0.05
Critical Value (Z): 1.6449
