-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
98f987c
commit af22019
Showing
38 changed files
with
933 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,22 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(IBOSS) | ||
export(IES) | ||
export(LowCon) | ||
export(OSMAC) | ||
export(OSS) | ||
export(Unif) | ||
export(c_IES_compare) | ||
export(myArma_IBOSS) | ||
export(myArma_OSS) | ||
export(myR_IBOSS) | ||
export(myR_OSS) | ||
export(myRcpp_IBOSS) | ||
export(myRcpp_cstyle_IBOSS) | ||
export(r_IES) | ||
export(r_IES_compare) | ||
export(scale01) | ||
export(scale_neg_pos_1) | ||
export(subsampling) | ||
importFrom(Rcpp,sourceCpp) | ||
useDynLib(dbsubsampling, .registration = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
## --------------------------------------------------------------------------- | ||
## --------------------------------------------------------------------------- | ||
## | ||
## R main function `IES`. `RcppArmadillo`-Version. | ||
## | ||
## --------------------------------------------------------------------------- | ||
## --------------------------------------------------------------------------- | ||
|
||
#' Scale data to \eqn{[-1, 1]} | ||
#' | ||
#' @param X A data.frame or matrix. | ||
#' | ||
#' @return Scaled X. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' X <- matrix(1:20, 5, 4) | ||
#' scale01(X) | ||
scale01 <- function(X) { | ||
apply(X, 2, function(.col) (.col - min(.col)) / (max(.col)-min(.col))) | ||
} | ||
|
||
#' Independence-Encouraging Subsampling for Nonparametric Additive Models (IES, Proposed by Zhang et.al. (2024)) | ||
#' | ||
#' A subsampling method for nonparameter additive model based on Orthogonal Array. | ||
#' | ||
#' @param X A data.frame or matrix consists of explanatory variables. | ||
#' @param n Subsample size. | ||
#' @param q Hyperparamter of how to divide the axes. Default to 16. | ||
#' @param seed Random seed for the sampling. | ||
#' | ||
#' @return Subsample index. | ||
#' @references Yi Zhang, Lin Wang, Xiaoke Zhang & HaiYing Wang (2024) | ||
#' \emph{Independence-Encouraging Subsampling for Nonparametric Additive Models, | ||
#' Journal of Computational and Graphical Statistics}, | ||
#' \url{https://www.tandfonline.com/doi/full/10.1080/10618600.2024.2326136}. | ||
#' | ||
#' @examples | ||
#' data <- data_numeric_regression | ||
#' X <- data[-which(names(data) == "y")] | ||
#' IES(X, n = 100, q = 16, seed = NULL) | ||
#' @export | ||
IES <- function(X, n, q = 16, seed = NULL) { | ||
if (!is.null(seed)) withr::local_seed(seed) | ||
X <- scale01(as.matrix(X)) | ||
attributes(X) <- attributes(X)["dim"] | ||
|
||
index <- armaIES(X, n, q) | ||
return(as.vector(index)) | ||
} | ||
|
||
|
||
#' IES C++-Version for Benchmarking (R-Wrap Code) | ||
#' | ||
#' There is no randomness, all parts that need to be randomly selected are selected first indexed. | ||
#' | ||
#' @param X X A data.frame or matrix consists of explanatory variables. | ||
#' @param n Subsample size. | ||
#' @param q Hyperparamter of how to divide the axes. | ||
#' | ||
#' @export | ||
c_IES_compare <- function(X, n, q) { | ||
X <- scale01(as.matrix(X)) | ||
attributes(X) <- attributes(X)["dim"] | ||
|
||
index <- armaIES_compare(X, n, q) | ||
return(as.vector(index)) | ||
} | ||
|
||
## --------------------------------------------------------------------------- | ||
## --------------------------------------------------------------------------- | ||
## | ||
## IES R-Version. | ||
## | ||
## --------------------------------------------------------------------------- | ||
## --------------------------------------------------------------------------- | ||
|
||
r_Compute_IES_Loss <- function(candi, last_index, X, q) { | ||
x_last <- floor(X[last_index, ]*q) | ||
loss <- apply(X[candi, ], 1, function(.row) (sum(floor(.row*q) == x_last))^2) | ||
return(loss) | ||
} | ||
|
||
#' R Version of IES for Testing | ||
#' | ||
#' @param X A data.frame or matrix consists of explanatory variables. | ||
#' @param n Subsample size. | ||
#' @param q Hyperparamter of how to divide the axes. | ||
#' @param seed Random seed for the sampling. | ||
#' | ||
#' @export | ||
r_IES <- function(X, n, q, seed = NULL) { | ||
if (!is.null(seed)) withr::local_seed(seed) | ||
X <- scale01(as.matrix(X)) | ||
attributes(X) <- attributes(X)["dim"] | ||
N <- nrow(X) | ||
|
||
index <- numeric(n) | ||
candi <- 1:N | ||
loss <- numeric(N) | ||
|
||
# Initial | ||
index[1] <- sample(N, 1) | ||
# index[1] <- 1 | ||
candi <- candi[-index[1]] | ||
loss <- loss[-index[1]] | ||
loss <- r_Compute_IES_Loss(candi, index[1], X, q) | ||
# paste0("---Step: 1---") | ||
# print(loss) | ||
|
||
for (i in 2:n) { | ||
# Election | ||
temp_vec <- which(loss == min(loss)) | ||
temp <- temp_vec[sample(length(temp_vec), 1)] | ||
# temp <- temp_vec[1] | ||
index[i] <- candi[temp] | ||
# print(temp) | ||
|
||
# Update | ||
candi <- candi[-temp] | ||
loss <- loss[-temp] | ||
loss <- loss + r_Compute_IES_Loss(candi, index[i], X, q) | ||
# paste0("---Step: ", i, "---") | ||
# print(loss) | ||
} | ||
|
||
return(index) | ||
} | ||
|
||
#' IES R-Version for Benchmarking | ||
#' | ||
#' There is no randomness, all parts that need to be randomly selected are selected first indexed. | ||
#' | ||
#' @param X X A data.frame or matrix consists of explanatory variables. | ||
#' @param n Subsample size. | ||
#' @param q Hyperparamter of how to divide the axes. | ||
#' | ||
#' @export | ||
r_IES_compare <- function(X, n, q) { | ||
X <- scale01(as.matrix(X)) | ||
attributes(X) <- attributes(X)["dim"] | ||
N <- nrow(X) | ||
|
||
index <- numeric(n) | ||
candi <- 1:N | ||
loss <- numeric(N) | ||
|
||
# Initial | ||
# index[1] <- sample(N, 1) | ||
index[1] <- 1 | ||
candi <- candi[-index[1]] | ||
loss <- loss[-index[1]] | ||
loss <- r_Compute_IES_Loss(candi, index[1], X, q) | ||
# paste0("---Step: 1---") | ||
# print(loss) | ||
for (i in 2:n) { | ||
# Election | ||
temp_vec <- which(loss == min(loss)) | ||
# temp <- temp_vec[sample(length(temp_vec), 1)] | ||
temp <- temp_vec[1] | ||
index[i] <- candi[temp] | ||
# print(temp) | ||
|
||
# Update | ||
candi <- candi[-temp] | ||
loss <- loss[-temp] | ||
loss <- loss + r_Compute_IES_Loss(candi, index[i], X, q) | ||
# paste0("---Step: ", i, "---") | ||
# print(loss) | ||
} | ||
|
||
return(index) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#' Transform data to \eqn{[-1,1]^p} | ||
#' | ||
#' @param X A data.frame or matrix of explanatory variables. | ||
#' | ||
#' @return Scaled `X` to \eqn{[-1,1]^p}. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' X <- matrix(rnorm(100), nrow = 20, ncol = 5) | ||
#' scaled_X <- scale_neg_pos_1(X) | ||
#' apply(scaled_X, 2, range) | ||
scale_neg_pos_1 <- function(X) { | ||
apply(X, 2, function(.col) (( (.col - min(.col)) / (max(.col)-min(.col)) ) *2 ) -1 ) | ||
} | ||
|
||
trans_LHD_adapt_X <- function(LHD_data, X, theta) { | ||
p <- ncol(X) | ||
space <- apply(X, 2, stats::quantile, probs = c(theta / 100, 1 - theta / 100)) | ||
width <- space[2,] - space[1,] | ||
for (j in 1:p) { | ||
LHD_data[,j] <- LHD_data[,j]*width[j] + space[1, j] | ||
} | ||
return(LHD_data) | ||
} | ||
|
||
#' LowCon: A Design-based Subsampling Approach in a Misspecified Linear Model | ||
#' | ||
#' A subsampling method based space-filling design for misspecified linear model proposed by Meng et.al. (2021). | ||
#' | ||
#' @param X A data.frame or matrix of explanatory variables. | ||
#' @param n Subsample size. | ||
#' @param theta Percentage of data shrinkage. Default to 1. | ||
#' @param seed Random seed for the sampling. | ||
#' | ||
#' @return Subsample index. | ||
#' @references Cheng Meng, Rui Xie, Abhyuday Mandal, Xinlian Zhang, Wenxuan Zhong & Ping Ma (2021) | ||
#' \emph{LowCon: A Design-based Subsampling Approach in a Misspecified Linear Model, | ||
#' Journal of Computational and Graphical Statistics, 30:3, 694-708}, | ||
#' \url{https://www.tandfonline.com/doi/full/10.1080/10618600.2020.1844215}. | ||
#' | ||
#' @examples | ||
#' data <- data_numeric_regression | ||
#' X <- data[-which(names(data) == "y")] | ||
#' LowCon(X, n = 100, theta = 1, seed = NULL) | ||
#' @export | ||
LowCon <- function(X, n, theta = 1, seed = NULL){ | ||
if (!is.null(seed)) withr::local_seed(seed) | ||
|
||
X <- scale_neg_pos_1(as.matrix(X)) | ||
attributes(X) <- attributes(X)["dim"] | ||
p <- ncol(X) | ||
|
||
LHD_data <- lhs::randomLHS(n, p) | ||
LHD_data <- trans_LHD_adapt_X(LHD_data, X, theta) | ||
|
||
index <- RANN::nn2(X, LHD_data, k=1, treetype = "kd")$nn.idx | ||
return(as.vector(index)) | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.