In [2]:
library(MASS)
library(KRLS)
data <- Boston


method <- function(n, B, k, train_data, test_data){
    success <- FALSE
  while (!success) {
      tryCatch({
 index <- sample(nrow(train_data), n)
 data <- train_data[index, ]

 X <- data[,-14]
 y <- data[,14]
 X_test <- test_data[,-14]
 y_test <- test_data[,14]
  
  time_1 <- 0        
  start_time_1 <- proc.time()["elapsed"]
  krlsout_1 <- krls(y=y,X=X,print.level=0,lambda =n^(-2/3))
  end_time_1 <- proc.time()["elapsed"]
  prediction_1 <- matrix(predict(krlsout_1,newdata=X_test,se.fit=FALSE)$fit,ncol=1)
  predict_error_1 <- (y_test-prediction_1)^2
  time_1 <- end_time_1-start_time_1
      
   success <- TRUE
      }, error = function(e) {
          })
        }
  
  sub_prediction_2 <- matrix(nrow=nrow(X_test),ncol=B)
  time_2 <- 0
  for (i in 1:B) {
    success <- FALSE
    while (!success) {
        tryCatch({
        
         indices <- sample(1:n, size = k, replace = FALSE)
         X_subsample <- X[indices, , drop = FALSE]
         y_subsample <- y[indices]
    
        start_time_2 <- proc.time()["elapsed"]
        krlsout <-  krls(y=y_subsample,X=X_subsample,print.level=0,lambda =n^(-2/3))
        end_time_2 <- proc.time()["elapsed"]
       time_2 <- time_2 + end_time_2-start_time_2
        sub_prediction_2[,i] <- predict(krlsout, newdata = X_test,se.fit=FALSE)$fit
        success <- TRUE
          }, error = function(e) {
 
          })
        }
   
  }
  prediction_2 <- matrix(rowMeans(sub_prediction_2), ncol = 1)
  predict_error_2 <- (y_test-prediction_2)^2

  
  sub_prediction_3 <- matrix(nrow=nrow(X_test),ncol=B)
  time_3 <- 0
  for (i in 1:B) {
    success <- FALSE
    while (!success) {
        tryCatch({
         indices <- sample(1:n, size = k, replace = TRUE)
         X_subsample <- X[indices, , drop = FALSE]
         y_subsample <- y[indices]
        start_time_3 <- proc.time()["elapsed"]
        krlsout <-  krls(y=y_subsample,X=X_subsample,print.level=0,lambda =n^(-2/3))
        end_time_3 <- proc.time()["elapsed"]
        time_3 <- time_3 + end_time_3-start_time_3
        sub_prediction_3[,i] <- predict(krlsout, newdata = X_test,se.fit=FALSE)$fit
        success <- TRUE
          }, error = function(e) {
    
          })
        }
   
  }
  prediction_3 <- matrix(rowMeans(sub_prediction_3), ncol = 1)
  predict_error_3 <- (y_test-prediction_3)^2
  
  return(list(
  prediction = matrix(c(prediction_1, prediction_2, prediction_3),ncol=3),
  predict_error = matrix(c(predict_error_1, predict_error_2, predict_error_3),ncol=3),
  time = c(time_1, time_2, time_3)
))

}


experiment <- function(num_test,n, B, k, times){
  random_row_index <- sample(nrow(data), num_test)
  train_data <- data[-random_row_index, ]
  test_data <- data[random_row_index, ]
  MSE_matrix <- matrix(ncol = 3, nrow = times)
  variance <- matrix(ncol = 3, nrow = num_test)
  MSE <-  matrix(ncol = 3, nrow = num_test)
  matrices_prediction <- lapply(1:num_test, function(x) matrix(0, nrow = times, ncol = 3))
  matrices_error <- lapply(1:num_test, function(x) matrix(0, nrow = times, ncol = 3))
  Time_all <- c(0,0,0)
  for (i in 1:times){
    result <- suppressWarnings({method(n, B, k, train_data, test_data)})
    MSE_matrix[i,] <- colMeans( result[['predict_error']])
    Time_all <- Time_all + result[['time']]
    for (j in 1:num_test){
       matrices_prediction[[j]][i,]<- result[['prediction']][j,]
       matrices_error[[j]][i,]<- result[['predict_error']][j,]
    }
  }
  for (k in 1:num_test){
    variance[k,] <- apply(matrices_prediction[[k]], 2, var)
    MSE[k,] <- apply(matrices_error[[k]], 2, mean)
  }
  return(list(
    MSE_matrix = MSE_matrix, variance  = variance, MSE = MSE, Time_all = Time_all
  ))
  
}

## KRLS Package for Kernel-based Regularized Least Squares.


## See Hainmueller and Hazlett (2014) for details.




In [3]:
num_test <- 100; times <- 100

B <- 100
n <- 50
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_3.2.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_3.3.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.1.1.txt")

In [4]:
num_test <- 100; times <- 100

B <- 100
n <- 100
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_3.2.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_3.3.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.1.2.txt")

In [5]:
num_test <- 100; times <- 100

B <- 100
n <- 150
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_3.2.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_3.3.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.1.3.txt")

In [6]:
num_test <- 100; times <- 100

B <- 100
n <- 200
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_3.2.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_3.3.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.1.4.txt")

In [7]:
num_test <- 100; times <- 100

B <- 300
n <- 50
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_5.2.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.2.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.2.1.txt")

In [9]:
num_test <- 100; times <- 100

B <- 300
n <- 100
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_5.2.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.2.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.2.2.txt")

In [10]:
num_test <- 100; times <- 100

B <- 300
n <- 150
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_5.2.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.2.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.2.3.txt")

In [12]:
num_test <- 100; times <- 100

B <- 300
n <- 200
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_5.2.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.2.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.2.4.txt")

In [13]:
num_test <- 100; times <- 100

B <- 500
n <- 50
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_1.2.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.1.1.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.3.1.txt")

In [14]:
num_test <- 100; times <- 100

B <- 500
n <- 100
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_1.2.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.1.2.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.3.2.txt")

In [15]:
num_test <- 100; times <- 100

B <- 500
n <- 150
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_1.2.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.1.3.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.3.3.txt")

In [16]:
num_test <- 100; times <- 100

B <- 500
n <- 200
  
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.5, times), file = "bos_1.2.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.7, times), file = "bos_7.1.4.txt")
set.seed(1)
dput(experiment(num_test,n, B, k=n*0.9, times), file = "bos_9.3.4.txt")