In [1]:
library(KRLS)

generate_data <- function(n) {
  # Generate x1 and x2 from Uniform(0, 1)
  x1 <- runif(n, min = 0, max = 1)
  x2 <- runif(n, min = 0, max = 1)
  
  # Generate epsilon from N(0, 0.25)
  epsilon <- rnorm(n, mean = 0, sd = sqrt(0.25))
  
  # Calculate y based on the given formula
  y <- exp(10 * (-(x1 - 0.25)^2 - (x2 - 0.25)^2)) +
       0.5 * exp(14 * (-(x1 - 0.7)^2 - (x2 - 0.7)^2)) + epsilon
  
  # Create x matrix with n rows and 2 columns
  X <- cbind(x1, x2)
  
  # Return x matrix and y vector
  return(list(X = X, y = y))
}


method <- function(n, B, k, test_data){
  data <- generate_data(n)
  X <- data$X[1:n,]
  y <- data$y[1:n]
  X_test <- test_data$X
  y_test <-test_data$y
  
  time_1 <- 0        
  start_time_1 <- proc.time()["elapsed"]
  krlsout_1 <- krls(y=y,X=X,print.level=0,lambda =n^(-2/3))
  end_time_1 <- proc.time()["elapsed"]
  prediction_1 <- matrix(predict(krlsout_1,newdata=X_test,se.fit=FALSE)$fit,ncol=1)
  predict_error_1 <- (y_test-prediction_1)^2############
  time_1 <- end_time_1-start_time_1
    
  sub_prediction_2 <- matrix(nrow=nrow(test_data$X),ncol=B)
  time_2 <- 0
  for (i in 1:B) {
    indices <- sample(1:n, size = k, replace = FALSE)
    X_subsample <- X[indices, , drop = FALSE]
    y_subsample <- y[indices]
    start_time_2 <- proc.time()["elapsed"]
    krlsout <-  krls(y=y_subsample,X=X_subsample,print.level=0,lambda =n^(-2/3))
    end_time_2 <- proc.time()["elapsed"]
    time_2 <- time_2 + end_time_2-start_time_2
    sub_prediction_2[,i] <- predict(krlsout, newdata = X_test,se.fit=FALSE)$fit
    #sub_predict_error_2[i] <- y_test-sub_prediction_2[i]#########

  }
  prediction_2 <- matrix(rowMeans(sub_prediction_2), ncol = 1)
  predict_error_2 <- (y_test-prediction_2)^2

  
  sub_prediction_3 <- matrix(nrow=nrow(test_data$X),ncol=B)
  time_3 <- 0
  for (i in 1:B) {
    indices <- sample(1:n, size = k, replace = TRUE)
    X_subsample <- X[indices, , drop = FALSE]
    y_subsample <- y[indices]
    start_time_3 <- proc.time()["elapsed"]
    krlsout <-  krls(y=y_subsample,X=X_subsample,print.level=0,lambda =n^(-2/3))
    end_time_3 <- proc.time()["elapsed"]
    time_3 <- time_3 + end_time_3-start_time_3
    sub_prediction_3[,i] <- predict(krlsout,newdata = X_test,se.fit=FALSE)$fit
    #sub_predict_error_3[i] <- y_test-sub_prediction_3[i]############

  }
  prediction_3 <- matrix(rowMeans(sub_prediction_3), ncol = 1)
  predict_error_3 <- (y_test-prediction_3)^2
  
  return(list(
  prediction = matrix(c(prediction_1, prediction_2, prediction_3),ncol=3),
  predict_error = matrix(c(predict_error_1, predict_error_2, predict_error_3),ncol=3),
  time = c(time_1, time_2, time_3)
))

}



experiment <- function(num_test,n, B, k, times){
  test_data <- generate_data(num_test)
  MSE_matrix <- matrix(ncol = 3, nrow = times)
  variance <- matrix(ncol = 3, nrow = num_test)
  MSE <-  matrix(ncol = 3, nrow = num_test)
  matrices_prediction <- lapply(1:num_test, function(x) matrix(0, nrow = times, ncol = 3))
  matrices_error <- lapply(1:num_test, function(x) matrix(0, nrow = times, ncol = 3))
  Time_all <- c(0,0,0)
  for (i in 1:times){
    result <- suppressWarnings({method(n, B, k, test_data)})
    MSE_matrix[i,] <- colMeans( result[['predict_error']])
    Time_all <- Time_all + result[['time']]
    for (j in 1:num_test){
       matrices_prediction[[j]][i,]<- result[['prediction']][j,]
       matrices_error[[j]][i,]<- result[['predict_error']][j,]
    }
  }
  for (k in 1:num_test){
    variance[k,] <- apply(matrices_prediction[[k]], 2, var)
    MSE[k,] <- apply(matrices_error[[k]], 2, mean)
  }
  return(list(
    MSE_matrix = MSE_matrix, variance  = variance, MSE = MSE, Time_all = Time_all
  ))
  
}

## KRLS Package for Kernel-based Regularized Least Squares.


## See Hainmueller and Hazlett (2014) for details.




In [3]:
num_test <- 100; times <- 100

B <- 25
k <- 10

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_1.4.1.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_1.4.2.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_1.4.3.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_1.4.4.txt")

In [4]:
num_test <- 100; times <- 100

B <- 25
k <- 15

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_3.4.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_3.4.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_3.4.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_3.4.5.txt")

In [5]:
num_test <- 100; times <- 100

B <- 25
k <- 20

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_4.4.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_4.4.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_4.4.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_4.4.5.txt")

In [7]:
num_test <- 100; times <- 100

B <- 50
k <- 10

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_1.0.1.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_1.0.2.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_1.0.3.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_1.0.4.txt")

In [8]:
num_test <- 100; times <- 100

B <- 50
k <- 15

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_3.0.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_3.0.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_3.0.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_3.0.5.txt")

In [9]:
num_test <- 100; times <- 100

B <- 50
k <- 20

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_4.0.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_4.0.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_4.0.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_4.0.5.txt")

In [10]:
num_test <- 100; times <- 100

B <- 100
k <- 10

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_1.1.1.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_1.1.2.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_1.1.3.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_1.1.4.txt")

In [11]:
num_test <- 100; times <- 100


B <- 100
k <- 15

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_3.1.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_3.1.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_3.1.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_3.1.5.txt")

In [12]:
num_test <- 100; times <- 100

B <- 100
k <- 20

set.seed(1)
dput(experiment(num_test,n=100, B, k, times), file = "kr_4.1.2.txt")
set.seed(1)
dput(experiment(num_test,n=150, B, k, times), file = "kr_4.1.3.txt")
set.seed(1)
dput(experiment(num_test,n=200, B, k, times), file = "kr_4.1.4.txt")
set.seed(1)
dput(experiment(num_test,n=250, B, k, times), file = "kr_4.1.5.txt")