### CHI SQUARE

In [11]:
import numpy as np
from scipy.stats import chi2

# Observed frequencies (in a 5x2 table, e.g., from a contingency table)
observed = np.array([[50, 70],
                     [80, 100],
                     [60, 90],
                     [30, 50],
                     [20, 50]])

# Row totals: sum across each row
row_totals = observed.sum(axis=1)

# Column totals: sum across each column
col_totals = observed.sum(axis=0)

# Grand total: sum of all observed values
grand_total = observed.sum()

# Expected frequencies, calculated based on row and column totals
# np.outer computes the outer product, then we normalize by the grand total
expected = np.outer(row_totals, col_totals) / grand_total

# Chi-square statistic calculation:
# For each cell, we compute (observed - expected)^2 / expected
# and sum them up to get the chi-square statistic
chi_sqt = ((observed - expected) ** 2 / expected).sum()

# Significance level (alpha)
alpha = 0.05

# Degrees of freedom for the chi-square test: (number of rows - 1) * (number of columns - 1)
df = 4  # In this case, (5 rows - 1) * (2 columns - 1) = 4

# Critical value from the chi-square distribution for the given alpha and degrees of freedom
critical_value = chi2.ppf(1 - alpha, df)

# Decision rule: if the chi-square statistic is greater than the critical value, reject the null hypothesis
if chi_sqt > critical_value:
    decision = "Reject the null hypothesis"
else:
    decision = "Fail to reject the null hypothesis"

# Output the chi-square statistic, critical value, and decision
chi_sqt, critical_value, decision


(5.638227513227513, 9.487729036781154, 'Fail to reject the null hypothesis')

### Hypothesis testing

In [19]:
import numpy as np
from scipy.stats import norm

# Given data
sample_mean = 3050  # The average cost from our sample
theoretical_mean = 4000  # The average cost we expect (hypothesis)
std_dev_units = 25  # The standard deviation of the units produced
cost_per_unit = 5  # The cost of producing each unit
sample_size = 25  # The number of units in our sample

#  Calculate the total standard deviation of the weekly cost
std_dev_cost = cost_per_unit * std_dev_units  # This gives us the overall cost variability

# Calculate the standard error of the mean (how much our sample mean might vary)
standard_error = std_dev_cost / np.sqrt(sample_size)

# Calculate the test statistic
# This tells us how far our sample mean is from the expected mean in terms of standard error
test_statistic = (sample_mean - theoretical_mean) / standard_error

# Set the alpha level (the risk of making a mistake) and find the critical value
alpha = 0.05  # We are willing to accept a 5% chance of error
critical_value = norm.ppf(1 - alpha)  # This gives us the cutoff point for our test

#  Make a decision
# If our test statistic is greater than the critical value, we reject the hypothesis
decision = "Reject the null hypothesis" if test_statistic > critical_value else "Fail to reject the null hypothesis"

# Print results
print(f"Test Statistic: {test_statistic:.3f}")
print(f"Critical Value: {critical_value:.3f}")
print(f"Decision: {decision}")


Test Statistic: -38.000
Critical Value: 1.645
Decision: Fail to reject the null hypothesis
