# Cross-Validation and the Bootstrap

## 1. The Validation Set Approach

In [13]:
library(ISLR)
dim(Auto)

use `mean()` function to calculate the MSE of 196 observations in the validation set

In [14]:
set.seed(1)
train=sample(392,196)
MSE1=mean((mpg-predict(lm(mpg~horsepower,data=Auto,subset=train),Auto))[-train]^2)
MSE2=mean((mpg-predict(lm(mpg~poly(horsepower,2),data=Auto,subset=train),Auto))[-train]^2)
MSE3=mean((mpg-predict(lm(mpg~poly(horsepower,3),data=Auto,subset=train),Auto))[-train]^2)
c(MSE1,MSE2,MSE3)

In [15]:
set.seed(2)
train=sample(392,196)
MSE1=mean((mpg-predict(lm(mpg~horsepower,data=Auto,subset=train),Auto))[-train]^2)
MSE2=mean((mpg-predict(lm(mpg~poly(horsepower,2),data=Auto,subset=train),Auto))[-train]^2)
MSE3=mean((mpg-predict(lm(mpg~poly(horsepower,3),data=Auto,subset=train),Auto))[-train]^2)
c(MSE1,MSE2,MSE3)

## 2. Leave-One-Out Cross-Validation

In [16]:
library(boot)

In [20]:

glm_fit <- glm(mpg~horsepower,data=Auto)
cv_err <- cv.glm(Auto,glm_fit)
cv_err$delta

In [23]:
cv_error <- rep(0,5)
for (i in 1:5) {
    glm_fit <- glm(mpg~poly(horsepower,i),data=Auto)
    cv_error[i] <- cv.glm(Auto,glm_fit)$delta[1]
}
cv_error

## 3. k-Fold Cross-Validation

In [24]:
set.seed(17)
cv_error_10 <- rep(0,10)
for (i in 1:10) {
  glm_fit <- glm(mpg~poly(horsepower,i),data=Auto)
  cv_error_10[i] <- cv.glm(Auto,glm_fit,K=10)$delta[1]  
}
cv_error_10

## 4. The Bootstrap

### Estimating the Accuracy of a Statistic of Interest

In [34]:
library(ISLR)
alpha_fn <- function (data,index) {
    x=data$X[index]
    y=data$Y[index]
    return ((var(y)-cov(x,y))/(var(x)+var(y)-2*cov(x,y)))
}
boot(Portfolio,alpha_fn,R=1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Portfolio, statistic = alpha_fn, R = 1000)


Bootstrap Statistics :
     original       bias    std. error
t1* 0.5758321 -0.003250718  0.09323856

### Estimating the Accuracy fo a Linear Regression Model

In [37]:
boot_fn <- function (data,index) {
    coef_r <- coef(lm(mpg~horsepower,data=data,subset=index))
    return (coef_r)
}
boot(Auto,boot_fn,R =1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot_fn, R = 1000)


Bootstrap Statistics :
      original        bias    std. error
t1* 39.9358610  0.0527532126 0.862713943
t2* -0.1578447 -0.0005466704 0.007373387

In [38]:
summary(lm(mpg~horsepower,data=Auto))$coef

Unnamed: 0,Estimate,Std. Error,t value,Pr(>|t|)
(Intercept),39.935861,0.717498656,55.65984,1.2203619999999999e-187
horsepower,-0.1578447,0.006445501,-24.48914,7.031989000000001e-81


In [39]:
boot_fn <- function (data,index) {
    coef_r <- coef(lm(mpg~horsepower+I(horsepower^2),data=data,subset=index))
    return (coef_r)
}
boot(Auto,boot_fn,R =1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot_fn, R = 1000)


Bootstrap Statistics :
        original        bias     std. error
t1* 56.900099702  2.162453e-02 2.0457182987
t2* -0.466189630 -4.439542e-04 0.0324717111
t3*  0.001230536  2.084314e-06 0.0001173579

In [41]:
summary(lm(mpg~horsepower+I(horsepower^2),data=Auto))$coef

Unnamed: 0,Estimate,Std. Error,t value,Pr(>|t|)
(Intercept),56.900099702,1.8004268063,31.60367,1.740911e-109
horsepower,-0.46618963,0.0311246171,-14.97816,2.289429e-40
I(horsepower^2),0.001230536,0.0001220759,10.08009,2.19634e-21
