In [None]:
import matplotlib.pyplot as plt
import numpy as np

from scipy import stats

In [None]:
def repeated_samples(arr, n_samples, sample_size, statistic):
    """For an input array, draw samples of size sample_size and calculate
    a statistic of interest. We repeat this n_samples times."""
    samples = np.random.choice(arr, n_samples * sample_size)
    sub_samples = samples.reshape(n_samples, sample_size)
    return np.apply_along_axis(statistic, 1, sub_samples)

### How well calibrated are bootstrap confidence intervals?

We can assess how well calibrated bootstrap confidence intervals are by simulating how often they contain the true population value.

To do this we'll perform the following steps:


1. Define a population distribution 
2. Sample a dataset from the population distribution
3. With this sample dataset, perform the bootstrap and calculate the confidence intervals (CIs) i.e. quantiles
4. Repeat steps 2 and 3 `num_bs_procedures` times

After we've done this we can check how many times the bootstrap CIs contained the true population value.

In [None]:
pop_mean = 5
pop_scale = 5
sample_size = 250
statistic = np.mean

population_dist = stats.norm(loc=pop_mean, scale=pop_scale)

In [None]:
num_bs_samples = 1_000  # each time we bootstrap, how many datasets to sample
num_bs_procedures = 500  # number of times to repeat the bootstrap procedure
ci_interval = 0.95
alpha = 1 - ci_interval

bs_quantiles = []
for i in range(num_bs_procedures):
    # In each BS iteration we sample from the population
    samples = population_dist.rvs(size=sample_size)
    
    # For a given sample from the population, perform the bootstrap and calc the CIs:
    bs_sampling_distn = repeated_samples(samples, num_bs_samples, sample_size, statistic) 
    quantiles = np.quantile(bs_sampling_distn, q=[alpha / 2, 1 - alpha / 2])
    bs_quantiles.append(quantiles)
bs_quantiles = np.array(bs_quantiles)
coverage = np.mean((bs_quantiles[:, 0] < pop_mean) & (bs_quantiles[:, 1] > pop_mean))
print(f"Coverage: {coverage*100:.2f}% of BS intervals contained popn value in {ci_interval*100:.2f} % CI")

We can plot the bootstrap CIs vs. the true population value - see how well-calibrated they are!

In [None]:
f, ax = plt.subplots(figsize=(20, 8))
q_sorted = np.argsort(bs_quantiles[:, 0])

ax.vlines(x=range(num_bs_procedures), ymin=bs_quantiles[q_sorted, 0], ymax=bs_quantiles[q_sorted, 1]);
ax.axhline(pop_mean, color="red");
ax.set_title(f"Coverage: {coverage*100:.2f}% of BS intervals contained population value at {ci_interval*100:.2f}% CI");