In [1]:
# STAT 381: Hypothesis Testing
# Author: Cory Suzuki

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [4]:
# Note that for any two-sided hypothesis tests, we multiply the p-value by 2
# to account for both tails of the distribution. A one-sided test would not require
# this adjustment.

# 1 Sample Z test for proportion

# H0: p = 0.45
# Ha: p ≠ 0.45

p0 = 0.45
x = 32
n = 80
alpha_lvl = 0.01
p_hat = x / n
z_stat = (p_hat - p0) / np.sqrt((p0 * (1 - p0)) / n)
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value < alpha_lvl:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# 2 Sample Z test for proportion

# H0: p1 - p2 = 0
# Ha: p1 - p2 ≠ 0

alpha_lvl = 0.01
n1 = 6124
n2 = 5512
x1 = 40 
x2 = 37
p1_hat = x1 / n1
p2_hat = x2 / n2
z_stat = (p1_hat - p2_hat) / np.sqrt((p1_hat * (1 - p1_hat) / n1) + (p2_hat * (1 - p2_hat) / n2))
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < alpha_lvl:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# 1 sample z test for mean (known sigma)

# H0: mu = 130
# Ha: mu < 130

mu0 = 130
x_bar = 12.6
sigma = 2.1
alpha = 0.05
n = 40
z_stat = (x_bar - mu0) / (sigma /  np.sqrt(n))
p_value = stats.norm.cdf(z_stat)
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# 1 sample t test for mean (unknown sigma)

# H0: mu = 800
# Ha: mu > 800

mu0 = 800
x_bar = 825
sample_var = 2350
n = 40
alpha = 0.05
df = n - 1
t_stat = (x_bar - mu0) / (np.sqrt(sample_var) / np.sqrt(n))
p_value = 1 - stats.t.cdf(t_stat, df)
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# 2 sample t test for mean (unknown sigma)

# In the case when sigma is known, we would use a 2 sample z test.

# H0: mu1 - mu2 = 0
# Ha: mu1 - mu2 ≠ 0

alpha = 0.05
n1 = 15
n2 = 12
x1_bar = 78.9
x2_bar = 69.2
s1_sq = 24.86
s2_sq = 19.24
t_stat = (x1_bar - x2_bar) / np.sqrt((s1_sq / n1) + (s2_sq / n2))
df = (((s1_sq / n1) + (s2_sq / n2))**2) / (((s1_sq / n1)**2) / (n1 - 1) + ((s2_sq / n2)**2) / (n2 - 1))
p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df))
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# 1 sample chi-square test for variance

# H0: sigma^2 = 1600
# Ha: sigma^2 > 1600

alpha = 0.05
s2_sq = 2350
sigma2 = 1600
n = 40
chi_sq_stat = ((n - 1) * (s2_sq)) / sigma2
p_value = 1 - stats.chi2.cdf(chi_sq_stat, n - 1)
print(f"Chi-square statistic: {chi_sq_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# F test for equality of variances

# H0: sigma1^2 - sigma2^2 = 0
# Ha: sigma1^2 - sigma2^2 ≠ 0

alpha = 0.05
n1 = 10
n2 = 10
s1_sq = 3.92
s2_sq = 3.98
F_stat = (max(s1_sq, s2_sq))**2 / (min(s1_sq, s2_sq))**2
df1 = n1 - 1
df2 = n2 - 1
p_value = 2 * stats.f.cdf(F_stat, df1, df2)
print(f"F statistic: {F_stat:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")



Z-statistic: -0.8989
P-value: 0.3687
Fail to reject the null hypothesis.
Z-statistic: -0.1201
P-value: 0.9044
Fail to reject the null hypothesis.
Z-statistic: -353.5728
P-value: 0.0000
Reject the null hypothesis.
T-statistic: 3.2616
P-value: 0.0012
Reject the null hypothesis.
T-statistic: 5.3718
P-value: 0.0000
Reject the null hypothesis.
Chi-square statistic: 57.2812
P-value: 0.0296
Reject the null hypothesis.
F statistic: 1.0308
P-value: 1.0354
Fail to reject the null hypothesis.
