In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# Last inn datasettet
penguins = sns.load_dataset("penguins")

# Velg ut kolonnen 'body_mass_g' og dropp manglende verdier
data = penguins['body_mass_g'].dropna()

# Trekk et tilfeldig utvalg på 50 observasjoner
np.random.seed(42)  # for reproduserbarhet
sample = np.random.choice(data, size=50, replace=False)

# --- Bootstrap ---
n_boot = 1000
boot_means = []

for _ in range(n_boot):
    resample = np.random.choice(sample, size=len(sample), replace=True)
    boot_means.append(np.mean(resample))

boot_means = np.array(boot_means)

# 95% bootstrap CI (percentilmetoden)
ci_lower, ci_upper = np.percentile(boot_means, [2.5, 97.5])

# --- Normalantakelsen ---
sample_mean = np.mean(sample)
sample_std = np.std(sample, ddof=1)
n = len(sample)

se = sample_std / np.sqrt(n)
ci_norm = stats.t.interval(0.95, df=n-1, loc=sample_mean, scale=se)

# --- Resultater ---
print(f"Utvalgsgjennomsnitt: {sample_mean:.2f}")
print(f"Bootstrap 95% CI: ({ci_lower:.2f}, {ci_upper:.2f})")
print(f"Normalantakelse 95% CI: ({ci_norm[0]:.2f}, {ci_norm[1]:.2f})")

# --- Plot ---
plt.figure(figsize=(8,5))
sns.histplot(boot_means, bins=30, kde=True, color="skyblue")
plt.axvline(ci_lower, color='red', linestyle='--', label="Bootstrap CI nedre")
plt.axvline(ci_upper, color='red', linestyle='--', label="Bootstrap CI øvre")
plt.axvline(ci_norm[0], color='green', linestyle=':', label="Normal CI nedre")
plt.axvline(ci_norm[1], color='green', linestyle=':', label="Normal CI øvre")
plt.axvline(sample_mean, color='black', linestyle='-', label="Utvalgsgjennomsnitt")
plt.legend()
plt.title("Bootstrap-fordeling av gjennomsnittet")
plt.show()
