Skip to content

Commit

Permalink
Add new implemention for IBOSS.
Browse files Browse the repository at this point in the history
  • Loading branch information
JieYinStat committed Feb 17, 2024
1 parent 08365ba commit 98f987c
Show file tree
Hide file tree
Showing 20 changed files with 435 additions and 90 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(myArma_OSS)
export(myR_IBOSS)
export(myR_OSS)
export(myRcpp_IBOSS)
export(myRcpp_cstyle_IBOSS)
export(subsampling)
importFrom(Rcpp,sourceCpp)
useDynLib(dbsubsampling, .registration = TRUE)
93 changes: 76 additions & 17 deletions R/IBOSS.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
#' Information-Based Optimal Subdata Selection for Big Data Linear Regression (IBOSS, Rcpp-c++-style)
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `IBOSS`. `Rcpp`-C++-style by `Wang`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------

#' Information-Based Optimal Subdata Selection for Big Data Linear Regression (IBOSS, `Rcpp`-c++-style by `Wang`)
#'
#' A subsampling method based on D-optiaml criterion inspired by optimal experimental design
#' used for linear regression.
Expand Down Expand Up @@ -30,7 +38,40 @@ IBOSS <- function(n, X) {
return(idx)
}

#' IBOSS with `Rcpp` by the package itself.

## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myRcpp_cstyle_IBOSS`. `Rcpp`-C++-style by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------


#' IBOSS with `Rcpp-C++-style` by `the package itself`.
#'
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
#'
#' @return Subsample index.
#' @export
myRcpp_cstyle_IBOSS <- function(n, X){
if (floor(n / 2 / ncol(X)) == 0) stop("Subsample size too small. n/2/ncol(X) must >= 1")
X = as.matrix(X)
rcpp_cstyle_IBOSS(X, n)
}


## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myRcpp_IBOSS`. `Rcpp`-r-style by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------


#' IBOSS with `Rcpp`-r-style by `the package itself`.
#'
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
Expand All @@ -43,14 +84,46 @@ myRcpp_IBOSS <- function(n, X){
rcppIBOSS(X, n)
}


## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myArma_IBOSS`. `RcppArmadillo`-r-style by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------


#' IBOSS with `RcppArmadillo`-r-style by `the package itself`.
#'
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
#'
#' @return Subsample index.
#' @export
myArma_IBOSS <- function(n, X){
if (floor(n / 2 / ncol(X)) == 0) stop("Subsample size too small. n/2/ncol(X) must >= 1")
X = as.matrix(X)
armarcppIBOSS(X, n)
}


## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myR_IBOSS`. `base-R` by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------

rbottom_r_index <- function(x, r){
return(which(x <= sort(x)[r]))
}
rtop_r_index <- function(x, r){
return(which(x >= sort(x, decreasing = TRUE)[r]))
}

#' IBOSS with `base R` by the package itself.
#' IBOSS with `base R` by `the package itself`.
#'
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
Expand All @@ -72,17 +145,3 @@ myR_IBOSS <- function(n, X){
}
return(index)
}

#' IBOSS with `RcppArmadillo` by the package itself.
#'
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
#'
#' @return Subsample index.
#' @export
myArma_IBOSS <- function(n, X){
if (floor(n / 2 / ncol(X)) == 0) stop("Subsample size too small. n/2/ncol(X) must >= 1")
X = as.matrix(X)
armarcppIBOSS(X, n)
}

73 changes: 48 additions & 25 deletions R/OSS.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
#' Orthogonal subsampling for big data linear regression (OSS, Rcpp-version)
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `OSS`. `Rcpp`-version by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------

#' Orthogonal subsampling for big data linear regression (OSS, `Rcpp`-version by `the package itself`)
#'
#' A subsampling method based on orthogonal array for linear model.
#'
Expand All @@ -25,6 +33,40 @@ OSS <- function(n, X){
return(subindex)
}

## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myArma_OSS`. `RcppArmadillo`-version by `Zhu`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------

#' OSS (`RcppArmadillo`-version by `Zhu`)
#'
#' @param n Subsample size.
#' @param X A matrix.
#'
#' @return Subsample index.
#' @export
# @examples
# data_numeric_regression["y"] <- NULL
# X <- as.matrix(data_numeric_regression)
# myR_OSS(X, 100)
myArma_OSS <- function(n, X){
X <- scale(as.matrix(X))
attributes(X) <- attributes(X)["dim"]
as.vector(armaOSS(X, n))
}


## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
##
## R main function `myR_OSS`. `base-R` by `the package itself`.
##
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------

#' Get L2 norm (r-version)
#'
#' Get L2 norm of a matrix or data frame.
Expand Down Expand Up @@ -75,7 +117,7 @@ rbottom_t_index <- function(loss, t){
}


#' OSS (R-version)
#' OSS with `base R` by `the package itself`.
#'
#' @param n Subsample size.
#' @param X A matrix.
Expand Down Expand Up @@ -119,32 +161,13 @@ myR_OSS <- function(n, X){
loss <- loss[rbottom_t_index(loss,t)]
}

# if (length(candi) == 0) {
# index <- index[1:i]
# break
# }
# if (length(candi) == 0) {
# index <- index[1:i]
# break
# }
# Update loss
loss <- loss + rComputeLoss(candi, index[i], X, norm)
}

return(index)
}

#' OSS (RcppArmadillo-version)
#'
#' @param n Subsample size.
#' @param X A matrix.
#'
#' @return Subsample index.
#' @export
# @examples
# data_numeric_regression["y"] <- NULL
# X <- as.matrix(data_numeric_regression)
# myR_OSS(X, 100)
myArma_OSS <- function(n, X){
X <- scale(as.matrix(X))
attributes(X) <- attributes(X)["dim"]
as.vector(armaOSS(X, n))
}


24 changes: 22 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Get subsample index of other column(except the first column) (IBOSS core code, Rcpp-C++-style)
#' Get subsample index of other column(except the first column) (IBOSS core code, `Rcpp-C++-style by Wang`)
#'
#' @param r Subsample size of the column.
#' @param z A numeric vector. the column.
Expand All @@ -11,7 +11,7 @@ getIdxR_cpp <- function(r, z, rdel) {
.Call(`_dbsubsampling_getIdxR_cpp`, r, z, rdel)
}

#' Get subsample index of the first column (IBOSS core code, Rcpp-C++-style)
#' Get subsample index of the first column (IBOSS core code, Rcpp-C++-style by Wang)
#'
#' @param r Subsample size of the first column.
#' @param z A numeric vector. the first column.
Expand All @@ -20,6 +20,26 @@ getIdx_cpp <- function(r, z) {
.Call(`_dbsubsampling_getIdx_cpp`, r, z)
}

Append_Index <- function(index, new_index) {
.Call(`_dbsubsampling_Append_Index`, index, new_index)
}

GetQuantile <- function(temp, r) {
.Call(`_dbsubsampling_GetQuantile`, temp, r)
}

Get_Candi_Quan <- function(x, next_x, candi, lower, upper, r) {
.Call(`_dbsubsampling_Get_Candi_Quan`, x, next_x, candi, lower, upper, r)
}

#' IBOSS with `Rcpp-C++-style` by myself (`myRcpp_cstyle_IBOSS` core c++ code)
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
#' @return Subsample index.
rcpp_cstyle_IBOSS <- function(X, n) {
.Call(`_dbsubsampling_rcpp_cstyle_IBOSS`, X, n)
}

#' IBOSS with `Rcpp` (`myRcppIBOSS` core c++ code)
#' @param n Subsample size.
#' @param X A data.frame or matrix consists of explanatory variables.
Expand Down
2 changes: 1 addition & 1 deletion man/IBOSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/OSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/getIdxR_cpp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/getIdx_cpp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/myArma_IBOSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/myArma_OSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/myR_IBOSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/myR_OSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 98f987c

Please sign in to comment.