# Bootstrap

#### Monte carlo Test

In [1]:
import numpy as np

arr = np.array([6.20, 4.34, 8.14, 6.24, 3.72, 3.54, 4.35, 2.67, 7.16, 6.00])

n = len(arr)
B = 10_000
alpha = 0.05
np.random.seed(42)

In [2]:
mean = arr.mean()
std = arr.std(ddof=1)

print("Mean:", mean)
print("Std dev:", std)

Mean: 5.236
Std dev: 1.7639236567002177


In [3]:
samples = np.random.normal(loc=mean, scale=std, size=(B, n))
means = samples.mean(axis=1)

In [5]:
means_sorted = np.sort(means)

lower_idx = int(np.floor((alpha/2) * B))
upper_idx = int(np.floor((1 - alpha/2) * B)) - 1

ci_lower = means_sorted[lower_idx]
ci_upper = means_sorted[upper_idx]

print(f"[{ci_lower:.4f}, {ci_upper:.4f}]")
print("If 0 inside:", ci_lower <= 0 <= ci_upper)

[4.1271, 6.3834]
If 0 inside: False


#### Test Bootstrap

In [7]:
arr = np.array([6.20, 4.34, 8.14, 6.24, 3.72, 3.54, 4.35, 2.67, 7.16, 6.00])

n = len(arr)
B = 10_000
alpha = 0.05
np.random.seed(42)

In [8]:
sample_mean = arr.mean()
print("Sample mean:", sample_mean)

Sample mean: 5.236


In [9]:
bootstrap_samples = np.random.choice(arr, size=(B, n), replace=True)
bootstrap_means = bootstrap_samples.mean(axis=1)

In [10]:
sorted_means = np.sort(bootstrap_means)

lower_idx = int(np.floor((alpha / 2) * B))
upper_idx = int(np.floor((1 - alpha / 2) * B)) - 1

ci_lower = sorted_means[lower_idx]
ci_upper = sorted_means[upper_idx]

print("95% bootstrap CI for the mean:")
print(f"[{ci_lower:.4f}, {ci_upper:.4f}]")
print("Does 0 lie in the interval?:", ci_lower <= 0 <= ci_upper)

95% bootstrap CI for the mean:
[4.2090, 6.2540]
Does 0 lie in the interval?: False


In [11]:
count_greater = np.sum(bootstrap_means > sample_mean)
ratio = count_greater / B

print("Number of bootstrap means greater than the sample mean:", count_greater)
print("Total number of bootstrap means:", B)
print("Ratio:", ratio)

Number of bootstrap means greater than the sample mean: 5000
Total number of bootstrap means: 10000
Ratio: 0.5


#### Premutation test (1)

In [28]:
high = np.array([378, 346, 245, 285, 365, 245, 208, 360, 296, 224, 292])
low  = np.array([218, 264, 211, 180, 256, 240, 261, 205, 145, 195, 187,
                 210, 378, 204, 232, 237, 310])

obs_diff = high.mean() - low.mean()
print("Observed mean difference (high - low):", obs_diff)

Observed mean difference (high - low): 63.556149732620355


In [29]:
B = 10_000
np.random.seed(42)

combined = np.concatenate([high, low])
n_high = len(high)

diffs = np.zeros(B)
for i in range(B):
    np.random.shuffle(combined)
    new_high = combined[:n_high]
    new_low = combined[n_high:]
    diffs[i] = new_high.mean() - new_low.mean()

p_value = np.mean(np.abs(diffs) >= np.abs(obs_diff))
print("p-value (two-sided):", p_value)

ci_lower, ci_upper = np.percentile(diffs, [2.5, 97.5])
print("95% :", (ci_lower, ci_upper))
print("Is observed diff inside this region?:", ci_lower <= obs_diff <= ci_upper)


p-value (two-sided): 0.0082
95% central region of null distribution: (np.float64(-46.95026737967913), np.float64(48.882352941176464))
Is observed diff inside this region?: False


#### Permutation test (2)

In [31]:
before = np.array([278, 230, 208, 224, 446, 365, 360, 292, 245, 245, 296])
after  = np.array([277, 211, 205, 235, 424, 345, 341, 287, 219, 200, 254])

diffs = after - before
obs_mean_diff = diffs.mean()
print("Observed mean difference (after - before):", obs_mean_diff)

Observed mean difference (after - before): -17.363636363636363


In [32]:
B = 10000
np.random.seed(42)

perm_diffs = np.zeros(B)
for i in range(B):
    # losowa zamiana znaku różnic (symulacja braku efektu)
    signs = np.random.choice([-1, 1], size=n)
    perm_diffs[i] = np.mean(signs * diffs)

In [33]:
p_value = np.mean(np.abs(perm_diffs) >= np.abs(obs_mean_diff))
print("Permutation test p-value (two-sided):", p_value)

Permutation test p-value (two-sided): 0.01


In [34]:
ci_lower, ci_upper = np.percentile(perm_diffs, [2.5, 97.5])
print("95% central region under H0:", (ci_lower, ci_upper))
print("Is observed diff inside this region?:", ci_lower <= obs_mean_diff <= ci_upper)


95% central region under H0: (np.float64(-14.090909090909092), np.float64(13.909090909090908))
Is observed diff inside this region?: False
