In [9]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import scipy.stats as stats
from scipy.stats import t
import math

In [18]:
# Hypotheses test for two populations
# pooled z-test for two populations
# right-tailed test

# Data
n1, x1 = 21300, 69  # No vasectomy
n2, x2 = 22000, 113  # Vasectomy

alpha = 0.01

# Sample proportions
p1_hat = x1 / n1
p2_hat = x2 / n2

# Difference in proportions
diff = p2_hat - p1_hat

# Pooled proportion for standard error
p_hat = (x1 + x2) / (n1 + n2)
p_hat = round(p1_hat, 4)

# Standard error for the difference in proportions
se = math.sqrt(p_hat * (1 - p_hat) * (1/n1 + 1/n2))

# Critical value
z_critical = norm.ppf(1 - alpha) # right-tailed test
z_critical = round(z_critical,2)

# Confidence interval
left = diff - z_critical * se
right = diff + z_critical * se
confidence_interval = (round(left, 4), round(right, 4))

# Hypothesis test
z = diff / se
if z > z_critical:
    conclusion = "Reject H0: There is sufficient evidence to suggest that men with a vasectomy are at greater risk of prostate cancer."
else:
    conclusion = "Fail to reject H0: There is insufficient evidence to suggest that men with a vasectomy are at greater risk of prostate cancer."

# Output
print("Example 1: Vasectomy Study")
print(f"Proportion difference (p2_hat - p1_hat): {round(diff, 4)}")
print(f'Pooled sample proportion: {p_hat}')
print(f"Standard error: {round(se, 4)}")
print(f"Test statistic (z): {round(z, 4)}")
print(f'Critical Value: {z_critical}')
print(f"Confidence Interval: {confidence_interval}")
print(f"Decision: {conclusion}\n")

Example 1: Vasectomy Study
Proportion difference (p2_hat - p1_hat): 0.0019
Pooled sample proportion: 0.0032
Standard error: 0.0005
Test statistic (z): 3.494
Critical Value: 2.33
Confidence Interval: (0.0006, 0.0032)
Decision: Reject H0: There is sufficient evidence to suggest that men with a vasectomy are at greater risk of prostate cancer.



In [17]:
# Hypotheses test for two populations
# pooled z-test for two populations
# left-tailed test

# Data
n1, x1 = 2701, 35  # Folic acid group
n2, x2 = 2052, 47  # Trace elements group

alpha = 0.02

# Sample proportions
p1_hat = x1 / n1
p2_hat = x2 / n2

# Difference in proportions
diff = p1_hat - p2_hat

# Pooled proportion for standard error
p_hat = (x1 + x2) / (n1 + n2)
p_hat = round(p1_hat, 4)

# Standard error for the difference in proportions
se = math.sqrt(p_hat * (1 - p_hat) * (1/n1 + 1/n2))

# Critical value
z_critical = norm.ppf(alpha) # left-tailed test
z_critical = round(z_critical,2)

# Confidence interval
left = diff - z_critical * se
right = diff + z_critical * se
confidence_interval = (round(left, 4), round(right, 4))

# Hypothesis test
z = diff / se
if z < -z_critical:
    conclusion = "Reject H0: There is sufficient evidence to suggest that women who take folic acid are at lesser risk of major birth defects."
else:
    conclusion = "Fail to reject H0: There is insufficient evidence to suggest that women who take folic acid are at lesser risk of major birth defects."

# Output
print("Example 2: Folic Acid Study")
print(f"Proportion difference (p1_hat - p2_hat): {round(diff, 4)}")
print(f'Pooled sample proportion: {p_hat}')
print(f"Standard error: {round(se, 4)}")
print(f"Confidence Interval: {confidence_interval}")
print(f"Test statistic (z): {round(z, 4)}")
print(f'Critical Value: {z_critical}')
print(f"Decision: {conclusion}")


Example 2: Folic Acid Study
Proportion difference (p1_hat - p2_hat): -0.0099
Pooled sample proportion: 0.013
Standard error: 0.0033
Confidence Interval: (-0.0031, -0.0167)
Test statistic (z): -2.9985
Critical Value: -2.05
Decision: Reject H0: There is sufficient evidence to suggest that women who take folic acid are at lesser risk of major birth defects.


In [16]:
# Hypotheses test for two populations
# pooled z-test for two populations
# two-tailed test

# Data
n1, x1 = 4989, 529  # White elderly
n2, x2 = 906, 103   # African-American elderly
alpha = 0.05

# Sample proportions
p1_hat = x1 / n1
p2_hat = x2 / n2

# Difference in proportions
diff = p1_hat - p2_hat

# Pooled proportion for hypothesis test
p_hat = (x1 + x2) / (n1 + n2)
p_hat = round(p1_hat, 4)

# Standard error for hypothesis test
se_pooled = math.sqrt(p_hat * (1 - p_hat) * (1/n1 + 1/n2))

# Test statistic
z = diff / se_pooled

# Critical value for two-tailed test at % significance
# two-tailed test
z_critical = norm.ppf(1 - alpha/2) # positive score
# z_critical = norm.ppf(alpha/2) # negative score (you need to adjust the if else statement if you use this)

# Decision
if abs(z) > z_critical:
    conclusion = "Reject H0: There is sufficient evidence to suggest a difference in stroke incidence between white and African-American elderly."
else:
    conclusion = "Fail to reject H0: There is insufficient evidence to suggest a difference in stroke incidence between white and African-American elderly."

# Confidence interval
se_diff = math.sqrt((p1_hat * (1 - p1_hat) / n1) + (p2_hat * (1 - p2_hat) / n2))
ci_left = diff - z_critical * se_diff
ci_right = diff + z_critical * se_diff
confidence_interval = (round(ci_left, 4), round(ci_right, 4))

# Output
print("Stroke Incidence Study")
print(f'Pooled sample proportion: {p_hat}')
print(f"Proportion difference (p1_hat - p2_hat): {round(diff, 4)}")
print(f"Pooled Standard Error: {round(se_pooled, 4)}")
print(f"Test statistic (z): {round(z, 4)}")
print(f"Critical value (z_critical): {round(z_critical, 4)}")
print(f"Decision: {conclusion}")
print(f"Confidence Interval: {confidence_interval}")


Stroke Incidence Study
Pooled sample proportion: 0.106
Proportion difference (p1_hat - p2_hat): -0.0077
Pooled Standard Error: 0.0111
Test statistic (z): -0.6884
Critical value (z_critical): 1.96
Decision: Fail to reject H0: There is insufficient evidence to suggest a difference in stroke incidence between white and African-American elderly.
95% Confidence Interval: (-0.03, 0.0147)
