# Simple Nonparametric Bootstrap for Regression

## Simulate Data

In [1]:
# set parameters of the simulation

# sample size
n = 100 

# true alpha
alpha = 3

# true beta
beta = 7

# true residual standard deviation 
s = 2

In [2]:
import numpy as np
# simulate data 
x = np.random.normal(0,1,n)
y = alpha + beta*x + np.random.normal(0,s,n)

## Fit OLS

In [3]:
import statsmodels.api as sm
# add a constant to covariates
X   = sm.add_constant(x)

# fit ols model
ols = sm.OLS(y, X).fit()

# estimated coefficients
ols.params

array([3.07362   , 6.96670523])

## Bootstrap samples

In [4]:
# nonparametric bootstrap using for loop

## number of bootstrap replications
B = 10000

## vectors to store results
alpha_boot = np.zeros(B)
beta_boot  = np.zeros(B)

# bootstrap samples
for j in range(B):
    # sample cases (rows)
    idx=np.random.choice(np.arange(n), size=n, replace=True)
    
    # fit OLS to boostrapped cases
    x_boot   = x[idx]
    y_boot   = y[idx]
    X   = sm.add_constant(x_boot)
    ols_boot = sm.OLS(y_boot, X).fit()
    
    # save results
    alpha_boot[j] = ols_boot.params[0]
    beta_boot[j]  = ols_boot.params[1]

## Percentile CI

In [5]:
# set significance level
alpha = 0.05

# Percentile CI

## CI for alpha
print("Percentile CI for alpha:",np.quantile(alpha_boot,[alpha/2,1-alpha/2]))


## CI for beta
print("Percentile CI for beta:",np.quantile(beta_boot, (alpha/2, 1-alpha/2)))


Percentile CI for alpha: [2.71145782 3.44704429]
Percentile CI for beta: [6.57381737 7.31994956]


## Normal approximation CI

In [6]:
# Normal approximation CI

## critical threshold
z = 1.96

## compute bootstrap standard errors
sd_boot_a = np.std(alpha_boot)
sd_boot_b = np.std(beta_boot)

## CI for alpha
print("Normal approximation CI for alpha:",ols.params[0]-z*sd_boot_a,ols.params[0]+z*sd_boot_a)

## CI for beta
print("Normal approximation CI for beta:",ols.params[1]-z*sd_boot_b,ols.params[1]+z*sd_boot_b)

Normal approximation CI for alpha: 2.7084410201763345 3.438798970048191
Normal approximation CI for beta: 6.594173324486633 7.339237129865804


## Comparison to parametric CI

In [7]:
ols.conf_int(alpha=0.05)

array([[2.70110281, 3.44613718],
       [6.579273  , 7.35413745]])