In [1]:
import numpy as np
from scipy.stats import norm, t
import statsmodels.stats.proportion as smp
from scipy.stats import bootstrap

np.random.seed(1)

# 1) z-interval (sigma known)
n = 50
mu_true = 1.0
sigma = 1.2
x = np.random.normal(mu_true, sigma, size=n)
xbar = x.mean()
alpha = 0.05
z = norm.ppf(1-alpha/2)
ci_z = (xbar - z * sigma/np.sqrt(n), xbar + z * sigma/np.sqrt(n))
print("z-CI:", ci_z)
print("Reject H0 mu0=0?" , not (0>=ci_z[0] and 0<=ci_z[1]))

# 2) t-interval (sigma unknown)
s = x.std(ddof=1)
tq = t.ppf(1-alpha/2, df=n-1)
ci_t = (xbar - tq * s/np.sqrt(n), xbar + tq * s/np.sqrt(n))
print("t-CI:", ci_t)

# 3) proportion: exact binomial test via inversion of CI
k = 7; N = 40
wilson = smp.proportion_confint(count=k, nobs=N, alpha=alpha, method='wilson')
cp = smp.proportion_confint(count=k, nobs=N, alpha=alpha, method='beta')  # Clopper-Pearson
print("Wilson CI:", wilson)
print("Clopper-Pearson CI:", cp)
# test H0: p=0.1 by checking inclusion
p0 = 0.1
print("Reject H0 via Wilson?", not (wilson[0] <= p0 <= wilson[1]))

# 4) bootstrap percentile CI for mean (simple)
def mean_stat(data, axis):
    return np.mean(data, axis=axis)

res = bootstrap((x,), np.mean, vectorized=False, paired=False, confidence_level=0.95, n_resamples=5000, method='percentile', random_state=2)
print("Bootstrap percentile CI for mean:", res.confidence_interval)

# 5) difference of means (Welch CI) and test via CI
a = np.random.normal(0.5, 1.0, size=30)
b = np.random.normal(0.0, 1.3, size=40)
diff = a.mean() - b.mean()
se = np.sqrt(a.var(ddof=1)/len(a) + b.var(ddof=1)/len(b))
# Welch df
num = (a.var(ddof=1)/len(a) + b.var(ddof=1)/len(b))**2
den = (a.var(ddof=1)/len(a))**2/(len(a)-1) + (b.var(ddof=1)/len(b))**2/(len(b)-1)
df_w = num/den
tq_w = t.ppf(1-alpha/2, df=df_w)
ci_welch = (diff - tq_w*se, diff + tq_w*se)
print("Welch CI for mean diff:", ci_welch)
print("Reject H0: diff=0?", not (ci_welch[0] <= 0 <= ci_welch[1]))

z-CI: (np.float64(0.636765264546897), np.float64(1.3019991002347422))
Reject H0 mu0=0? True
t-CI: (np.float64(0.6353600201247799), np.float64(1.3034043446568595))
Wilson CI: (0.0874541374603592, 0.3194999033178772)
Clopper-Pearson CI: (0.0733827293520852, 0.32779013336158586)
Reject H0 via Wilson? False
Bootstrap percentile CI for mean: ConfidenceInterval(low=np.float64(0.6383632517070221), high=np.float64(1.2922142136695463))
Welch CI for mean diff: (np.float64(0.21429377762830693), np.float64(1.1125198167043397))
Reject H0: diff=0? True


Упражнение 1.1.

In [None]:
np.random.seed(1)

# 1) z-interval (sigma known)
n = 20
mu_true = 0
sigma = 1.2
x = np.random.normal(mu_true, sigma, size=n)
xbar = x.mean()
alpha = 0.05
z = norm.ppf(1-alpha/2)
ci_z = (xbar - z * sigma/np.sqrt(n), xbar + z * sigma/np.sqrt(n))
print("z-CI:", ci_z)
print("Reject H0 mu0=0?" , not (0>=ci_z[0] and 0<=ci_z[1]))

# 2) t-interval (sigma unknown)
s = x.std(ddof=1)
tq = t.ppf(1-alpha/2, df=n-1)
ci_t = (xbar - tq * s/np.sqrt(n), xbar + tq * s/np.sqrt(n))
print("t-CI:", ci_t)

# 3) proportion: exact binomial test via inversion of CI
k = 7; N = 40
wilson = smp.proportion_confint(count=k, nobs=N, alpha=alpha, method='wilson')
cp = smp.proportion_confint(count=k, nobs=N, alpha=alpha, method='beta')  # Clopper-Pearson
print("Wilson CI:", wilson)
print("Clopper-Pearson CI:", cp)
# test H0: p=0.1 by checking inclusion
p0 = 0.1
print("Reject H0 via Wilson?", not (wilson[0] <= p0 <= wilson[1]))

# 4) bootstrap percentile CI for mean (simple)
def mean_stat(data, axis):
    return np.mean(data, axis=axis)

res = bootstrap((x,), np.mean, vectorized=False, paired=False, confidence_level=0.95, n_resamples=5000, method='percentile', random_state=2)
print("Bootstrap percentile CI for mean:", res.confidence_interval)

# 5) difference of means (Welch CI) and test via CI
a = np.random.normal(0.5, 1.0, size=30)
b = np.random.normal(0.0, 1.3, size=40)
diff = a.mean() - b.mean()
se = np.sqrt(a.var(ddof=1)/len(a) + b.var(ddof=1)/len(b))
# Welch df
num = (a.var(ddof=1)/len(a) + b.var(ddof=1)/len(b))**2
den = (a.var(ddof=1)/len(a))**2/(len(a)-1) + (b.var(ddof=1)/len(b))**2/(len(b)-1)
df_w = num/den
tq_w = t.ppf(1-alpha/2, df=df_w)
ci_welch = (diff - tq_w*se, diff + tq_w*se)
print("Welch CI for mean diff:", ci_welch)
print("Reject H0: diff=0?", not (ci_welch[0] <= 0 <= ci_welch[1]))