In [1]:
import pandas as pd
import numpy as np
from scipy.stats import t, norm, ttest_1samp

# 1. Load dataset
df = pd.read_csv("heights_weights.csv")
heights = df["Height"].dropna()

# 2. Basic stats
n = len(heights)
df_ = n - 1
mean_h = np.mean(heights)
std_h = np.std(heights, ddof=1)
SE = std_h / np.sqrt(n)

# 3. 95% CI (t-dist)
alpha = 0.05
t_crit = t.ppf(1 - alpha/2, df_)
margin = t_crit * SE
ci_low = mean_h - margin
ci_high = mean_h + margin
print("====== 95% CI (t-based) ======")
print(f"Mean height: {mean_h:.2f}")
print(f"95% CI: [{ci_low:.2f}, {ci_high:.2f}]")

# 4. Two-tailed T-test: H0: mu=165
res_t = ttest_1samp(heights, popmean=165)
print("====== Two-Tailed T-Test (H0: mu=165) ======")
print(f"t-statistic = {res_t.statistic:.3f}, p-value = {res_t.
pvalue:.4f}")

# 5. Two-tailed Z-test for comparison
z_stat = (mean_h - 165) / SE
p_val_z = (1 - norm.cdf(abs(z_stat))) * 2
print("====== Two-Tailed Z-Test (H0: mu=165) ======")
print(f"z-statistic = {z_stat:.3f}, p-value = {p_val_z:.4f}")


Mean height: 170.04
95% CI: [168.57, 171.51]
t-statistic = 6.753, p-value = 0.0000
z-statistic = 6.753, p-value = 0.0000
