## HypothesisTesting

In [1]:
import math
from scipy.stats import norm

In [3]:
# Given data
sample_mean = 3050
x_mean = 600
x_std = 25
cost_per_unit = 5
n = 25
alpha = 0.05

In [5]:
# Theoretical mean from cost model
mu_0 = 1000 + cost_per_unit * x_mean
mu_0

4000

In [6]:
# Population standard deviation of cost
sigma = cost_per_unit * x_std
sigma

125

In [7]:
# Test statistic (z-test, sigma known)
z = (sample_mean - mu_0) / (sigma / math.sqrt(n))
z

-38.0

In [9]:
# Critical value for right-tailed test
z_crit = norm.ppf(1 - alpha)
z

-38.0

In [10]:
# Decision
if z > z_crit:
    decision = "Reject H0"
else:
    decision = "Fail to reject H0"

In [11]:
# Conclusion
if decision == "Reject H0":
    conclusion = "There is strong evidence that the mean weekly operating cost is higher than the model predicts."
else:
    conclusion = "There is no evidence that the mean weekly operating cost is higher than the model predicts."

### Final Output

In [14]:
print(f"Theoretical mean (mu_0): {mu_0}")
print(f"Sample mean: {sample_mean}")
print(f"Sigma: {sigma}")
print(f"Test statistic (z): {z:.2f}")
print(f"Critical value (z_crit): {z_crit:.2f}")
print(f"Decision: {decision}")
print(f"Conclusion: {conclusion}")

Theoretical mean (mu_0): 4000
Sample mean: 3050
Sigma: 125
Test statistic (z): -38.00
Critical value (z_crit): 1.64
Decision: Fail to reject H0
Conclusion: There is no evidence that the mean weekly operating cost is higher than the model predicts.


## CHI-SQUARE TEST

## Hypotheses

In [1]:
import numpy as np
import scipy.stats as stats

In [3]:
observed = np.array([[50, 70],[80, 100],[60, 90],[30, 50],[20, 50]])
observed 

array([[ 50,  70],
       [ 80, 100],
       [ 60,  90],
       [ 30,  50],
       [ 20,  50]])

In [5]:
# Step 1: Row totals, column totals, and grand total
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
grand_total = observed.sum()
print(row_totals)
print(col_totals)
print(grand_total)

[120 180 150  80  70]
[240 360]
600


In [6]:
# Step 2: Expected frequencies
expected_manual = np.outer(row_totals, col_totals) / grand_total
expected_manual

array([[ 48.,  72.],
       [ 72., 108.],
       [ 60.,  90.],
       [ 32.,  48.],
       [ 28.,  42.]])

In [7]:
# Step 3: Chi-Square statistic
chi_square_manual = ((observed - expected_manual)**2 / expected_manual).sum()
chi_square_manual

5.638227513227513

In [9]:
# Step 4: Degrees of freedom
df_manual = (observed.shape[0] - 1) * (observed.shape[1] - 1)
df_manual

4

In [10]:
# Step 5: Critical value at alpha = 0.05
# Critical Value (α = 0.05)
#From the Chi-square table for df = 4
critical_value_manual = stats.chi2.ppf(0.95, df_manual)
critical_value_manual

9.487729036781154

### Final conclusion 

In [11]:
print("Chi-Square Statistic:", chi_square_manual)
print("Degrees of Freedom:", df_manual)
print("Critical Value (0.05):", critical_value_manual)
print("Expected Frequencies:\n", expected_manual)

Chi-Square Statistic: 5.638227513227513
Degrees of Freedom: 4
Critical Value (0.05): 9.487729036781154
Expected Frequencies:
 [[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]
