From 58908bc7262aaaf3719b77ad981829318748c7f6 Mon Sep 17 00:00:00 2001
From: JieYinStat <yjabcdhot@hotmail.com>
Date: Fri, 16 Feb 2024 11:18:22 +0800
Subject: [PATCH] Add OSS

---
 DESCRIPTION                   |   4 +-
 NAMESPACE                     |   1 +
 R/IBOSS.R                     |   4 +-
 R/OSMAC.R                     |   6 +-
 R/OSS.R                       | 135 +++++++++++++++++++++++++++++++
 R/RcppExports.R               |  96 ++++++++++++++++++++++
 R/Unif.R                      |   4 +-
 R/subsampling.R               |  10 ++-
 README.Rmd                    |   8 +-
 README.md                     |  18 ++++-
 man/ComputeLoss.Rd            |  23 ++++++
 man/IBOSS.Rd                  |   4 +-
 man/L2norm.Rd                 |  17 ++++
 man/OSS.Rd                    |  31 ++++++++
 man/Unif.Rd                   |   4 +-
 man/armaComputeLoss.Rd        |  25 ++++++
 man/armaOSS.Rd                |  21 +++++
 man/armaScaleMatrix.Rd        |  17 ++++
 man/armabottom_k.Rd           |  19 +++++
 man/bottom_t_index.Rd         |  19 +++++
 man/getIdxR_cpp.Rd            |  21 +++++
 man/getIdx_cpp.Rd             |  19 +++++
 man/get_Logistic_MLE.Rd       |   6 +-
 man/rComputeLoss.Rd           |  25 ++++++
 man/rL2norm.Rd                |  17 ++++
 man/rOSS.Rd                   |  19 +++++
 man/rbottom_t_index.Rd        |  19 +++++
 man/rcppOSS.Rd                |  19 +++++
 man/subsampling.Rd            |   6 +-
 src/IBOSS.cpp                 |  49 ++----------
 src/Makevars                  |   2 +
 src/Makevars.win              |   2 +
 src/OSS.cpp                   | 146 ++++++++++++++++++++++++++++++++++
 src/RcppExports.cpp           | 109 +++++++++++++++++++++++++
 src/armaOSS.cpp               | 132 ++++++++++++++++++++++++++++++
 tests/testthat/test-OSS.R     |  12 +++
 tests/testthat/test-armaOSS.R |  11 +++
 vignettes/Subsampling.Rmd     |  20 +++--
 38 files changed, 1026 insertions(+), 74 deletions(-)
 create mode 100644 R/OSS.R
 create mode 100644 man/ComputeLoss.Rd
 create mode 100644 man/L2norm.Rd
 create mode 100644 man/OSS.Rd
 create mode 100644 man/armaComputeLoss.Rd
 create mode 100644 man/armaOSS.Rd
 create mode 100644 man/armaScaleMatrix.Rd
 create mode 100644 man/armabottom_k.Rd
 create mode 100644 man/bottom_t_index.Rd
 create mode 100644 man/getIdxR_cpp.Rd
 create mode 100644 man/getIdx_cpp.Rd
 create mode 100644 man/rComputeLoss.Rd
 create mode 100644 man/rL2norm.Rd
 create mode 100644 man/rOSS.Rd
 create mode 100644 man/rbottom_t_index.Rd
 create mode 100644 man/rcppOSS.Rd
 create mode 100644 src/Makevars
 create mode 100644 src/Makevars.win
 create mode 100644 src/OSS.cpp
 create mode 100644 src/armaOSS.cpp
 create mode 100644 tests/testthat/test-OSS.R
 create mode 100644 tests/testthat/test-armaOSS.R

diff --git a/DESCRIPTION b/DESCRIPTION
index ac31561..56a78a5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,6 +15,7 @@ BugReports: https://github.com/JieYinStat/dbsubsampling/issues
 Suggests: 
     knitr,
     mvtnorm,
+    RcppArmadillo,
     rmarkdown,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
@@ -26,4 +27,5 @@ Depends:
 LazyData: true
 VignetteBuilder: knitr
 LinkingTo: 
-    Rcpp
+    Rcpp,
+    RcppArmadillo
diff --git a/NAMESPACE b/NAMESPACE
index e82fb7d..9c53540 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(IBOSS)
 export(OSMAC)
+export(OSS)
 export(Unif)
 export(subsampling)
 importFrom(Rcpp,sourceCpp)
diff --git a/R/IBOSS.R b/R/IBOSS.R
index e0a373c..33354bc 100644
--- a/R/IBOSS.R
+++ b/R/IBOSS.R
@@ -3,10 +3,10 @@
 #' A subsampling method based on D-optiaml criterion inspired by optimal experimental design
 #' used for linear regression.
 #'
-#' @param n subsample size.
+#' @param n Subsample size.
 #' @param X A data.frame or matrix consists of explanatory variables.
 #'
-#' @return subsample index.
+#' @return Subsample index.
 #' @references HaiYing Wang, Min Yang & John Stufken (2019)
 #' \emph{Information-Based Optimal Subdata Selection for Big Data Linear Regression,
 #' Journal of the American Statistical Association, 114:525, 393-405},
diff --git a/R/OSMAC.R b/R/OSMAC.R
index 81bc212..357a55b 100644
--- a/R/OSMAC.R
+++ b/R/OSMAC.R
@@ -7,9 +7,9 @@
 #' @param w A numeric vector. The weight of each sample.
 #'
 #' @return A list.
-#'  * `par` : parameter estimation.
-#'  * `message` : message during iteration.
-#'  * `iter` : iteration times.
+#'  * `par` : Parameter estimation.
+#'  * `message` : Message during iteration.
+#'  * `iter` : Iteration times.
 get_Logistic_MLE <- function(x, y, w) {
   d <- ncol(x)
   beta <- rep(0, d)
diff --git a/R/OSS.R b/R/OSS.R
new file mode 100644
index 0000000..c564d5b
--- /dev/null
+++ b/R/OSS.R
@@ -0,0 +1,135 @@
+#' Orthogonal subsampling for big data linear regression(OSS)
+#'
+#' A subsampling method based on orthogonal array for linear model.
+#'
+#' @param n Subsample size.
+#' @param X A matrix or data frame.
+#'
+#' @return Subsample index.
+#'
+#' @examples
+#' data_numeric_regression["y"] <- NULL
+#' X <- as.matrix(data_numeric_regression)
+#' OSS(100, X)
+#'
+#' @references Lin Wang, Jake Elmstedt, Weng Kee Wong & Hongquan Xu (2021)
+#' \emph{Orthogonal subsampling for big data linear regression,
+#' The Annals of Applied Statistics, 15(3), 1273-1290},
+#' \url{https://projecteuclid.org/journals/annals-of-applied-statistics/volume-15/issue-3/Orthogonal-subsampling-for-big-data-linear-regression/10.1214/21-AOAS1462.short?tab=ArticleLink}.
+#'
+#' @export
+OSS <- function(n, X){
+  X <- scale(as.matrix(X)) # need scale
+  attributes(X) <- attributes(X)["dim"]
+  subindex <- rcppOSS(X = X, n = n)
+  return(subindex)
+}
+
+#' Get L2 norm (r-version)
+#'
+#' Get L2 norm of a matrix or data frame.
+#' @param X A matrix or data.frame.
+#'
+#' @return L2 norm of `X`(every row).
+#'
+# @examples
+# X <- matrix(1:12, 4, 3)
+# X <- scale(X)
+# rL2norm(X)
+rL2norm <- function(X) {
+  return(rowSums(X^2))
+}
+
+#' Compute loss function for OSS (r-version)
+#'
+#' @param candi The index of the candidate set.
+#' @param last_index The index of the seleted point in last iteration.
+#' @param X The whole data.
+#' @param norm Norm of the whole data.
+#' @param p Numbers of columns of the data.
+#'
+#' @return Loss of every point in candidate set.
+# @examples
+# X <- matrix(1:20, 5, 4)
+# X <- scale(X)
+# norm <- rL2norm(X)
+# rComputeLoss(c(1,3,4), 2, X, norm)
+rComputeLoss <- function(candi, last_index, X, norm, p = ncol(X)){
+  delta <- rowSums(t(apply(X[candi, ], 1, function(.row) sign(.row) == sign(X[last_index,]))))
+  loss <- (p - norm[candi]/2  -  norm[last_index]/2 + delta)^2
+  return(loss)
+}
+
+#' Find t smallest index of a vector.
+#'
+#' @param loss A vector.
+#' @param t A int
+#'
+#' @return The index of the t smallest element of the vector.
+#'
+# @examples
+# loss <- rnorm(10)
+# rbottom_t_index(loss, 3)
+rbottom_t_index <- function(loss, t){
+  return(which(loss <= sort(loss)[t]))
+}
+
+
+#' OSS (r-version)
+#'
+#' @param n Subsample size.
+#' @param X A matrix.
+#'
+#' @return Subsample index.
+#'
+# @examples
+# data_numeric_regression["y"] <- NULL
+# X <- as.matrix(data_numeric_regression)
+# rOSS(X, 100)
+rOSS <- function(n, X){
+  X <- scale(as.matrix(X))
+  attributes(X) <- attributes(X)["dim"]
+  N <- nrow(X)
+
+  index <- numeric(n)
+  candi <- 1:N
+
+  norm <- rL2norm(X)
+  r <- log(N)/log(n)
+
+  for (i in 1:n) {
+    # Initial
+    if (i == 1) {
+      index[1] <- which.max(norm)
+      candi <- candi[-index[1]]
+      loss <- rComputeLoss(candi, index[1], X, norm)
+      next
+    }
+
+    # Election
+    tmp <- which.min(loss)
+    index[i] <- candi[tmp]
+    candi <- candi[-tmp]
+    loss <- loss[-tmp]
+
+    # Elimination
+    t <- ifelse(N > (n^2), N/i, N/(i^(r-1)))
+    if (length(candi) > t) {
+      candi <- candi[rbottom_t_index(loss,t)]
+      loss <- loss[rbottom_t_index(loss,t)]
+    }
+
+#    if (length(candi) == 0) {
+#      index <- index[1:i]
+#      break
+#    }
+    # Update loss
+    loss <- loss + rComputeLoss(candi, index[i], X, norm)
+  }
+
+  return(index)
+}
+
+
+
+
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 803384c..e84eb44 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,11 +1,107 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
+#' Get subsample index of other column(except the first column) (IBOSS)
+#'
+#' @param r Subsample size of the column.
+#' @param z A numeric vector. the column.
+#' @param rdel Subsample index of the first column.
+#' @return Subsample index of the column.
 getIdxR_cpp <- function(r, z, rdel) {
     .Call(`_dbsubsampling_getIdxR_cpp`, r, z, rdel)
 }
 
+#' Get subsample index of the first column(IBOSS)
+#'
+#' @param r Subsample size of the first column.
+#' @param z A numeric vector. the first column.
+#' @return Subsample index of the first column.
 getIdx_cpp <- function(r, z) {
     .Call(`_dbsubsampling_getIdx_cpp`, r, z)
 }
 
+#' Get L2 norm
+#'
+#' Get L2 norm of a matrix or data frame.
+#'
+#' @param X A matrix or data.frame.
+#'
+#' @return L2 norm of `X`(every row).
+L2norm <- function(X) {
+    .Call(`_dbsubsampling_L2norm`, X)
+}
+
+#' Find t smallest index of a vector
+#'
+#' @param loss A vector.
+#' @param t A int.
+#'
+#' @return The index of the t smallest element of the vector.
+bottom_t_index <- function(loss, t) {
+    .Call(`_dbsubsampling_bottom_t_index`, loss, t)
+}
+
+#' Compute loss function for OSS
+#'
+#' @param candi The index of the candidate set.
+#' @param last_index The index of the seleted point in last iteration.
+#' @param X The whole data.
+#' @param norm Norm of the whole data.
+#'
+#' @return Loss of every point in candidate set.
+ComputeLoss <- function(candi, last_index, X, norm) {
+    .Call(`_dbsubsampling_ComputeLoss`, candi, last_index, X, norm)
+}
+
+#' Rcpp version OSS (core code of `OSS`)
+#'
+#' @param X A matrix.
+#' @param n Subsample size.
+#'
+#' @return Subsample index.
+rcppOSS <- function(X, n) {
+    .Call(`_dbsubsampling_rcppOSS`, X, n)
+}
+
+#' Find t smallest index of a vector (RcppArmadillo-version)
+#'
+#' @param x A vector.
+#' @param k A int.
+#'
+#' @return The index of the t smallest element of the vector.
+armabottom_k <- function(x, k) {
+    .Call(`_dbsubsampling_armabottom_k`, x, k)
+}
+
+#' Scale a matrix (RcppArmadillo-version)
+#'
+#' @param X A matrix.
+#'
+#' @return Scaled matrix.
+armaScaleMatrix <- function(X) {
+    .Call(`_dbsubsampling_armaScaleMatrix`, X)
+}
+
+#' Compute loss function for OSS (RcppArmadillo-version)
+#'
+#' @param X Matrix of the candidate set.
+#' @param xa Norm of the candidate set.
+#' @param y A vector. The point which be selected last iteration.
+#' @param ya Norm of `y`.
+#' @param tPow The power of the loss function.
+#'
+#' @return Loss of the candidate set.
+armaComputeLoss <- function(X, xa, y, ya, tPow) {
+    .Call(`_dbsubsampling_armaComputeLoss`, X, xa, y, ya, tPow)
+}
+
+#' OSS (RcppArmadillo-version)
+#' @param x A matrix.
+#' @param k Subsample size.
+#' @param tPow The power of the loss function.
+#'
+#' @return Subsample index.
+armaOSS <- function(x, k, tPow = 2) {
+    .Call(`_dbsubsampling_armaOSS`, x, k, tPow)
+}
+
diff --git a/R/Unif.R b/R/Unif.R
index 9edae03..b61403d 100644
--- a/R/Unif.R
+++ b/R/Unif.R
@@ -5,8 +5,8 @@
 #' @param N Total sample size.
 #' @param n Subsample size.
 #' @param replace A boolean.
-#'  * `TRUE` (the default): sampling with replace.
-#'  * `FALSE`: sampling without replace
+#'  * `TRUE` (the default): Sampling with replace.
+#'  * `FALSE`: Sampling without replace
 #' @param seed Random seed which is an integer (default NULL). This random seed is only valid for this sampling and
 #'  will not affect the external environment
 #'
diff --git a/R/subsampling.R b/R/subsampling.R
index 95bb433..77f2b98 100644
--- a/R/subsampling.R
+++ b/R/subsampling.R
@@ -13,9 +13,10 @@
 #'  * `OSMAC_A`: A subsampling method based on A-optimal for logistic regression proposed by Wang et.al. (2018).
 #'  * `OSMAC_L`: A subsampling method based on L-optimal for logistic regression proposed by Wang et.al. (2018).
 #'  * `IBOSS`: A subsampling method based on D-optimal for linear regression proposed by Wang et.al. (2019).
+#'  * `OSS` : A subsampling method based on Orthogonal Array proposed by Wang et.al.(2021).
 #' @param replace A boolean.
-#'  * `TRUE` (the default): sampling with replace.
-#'  * `FALSE`: sampling without replace
+#'  * `TRUE` (the default): Sampling with replace.
+#'  * `FALSE`: Sampling without replace
 #' @param seed_1 Random seed for the first stage sampling or Unif.
 #' @param seed_2 Random seed for the second stage sampling.
 #' @param na_method Method to handle NA.
@@ -33,6 +34,7 @@
 #'
 #' data_numeric <- data_numeric_regression
 #' subsampling(y_name = "y", data = data_numeric, n = 100, method = "IBOSS")
+#' subsampling(y_name = "y", data = data_numeric, n = 30, method = "OSS")
 subsampling <- function(y_name, x_name = NULL, data, n, pilot_n = NULL, method = "Unif",
                         replace = TRUE, seed_1 = NULL, seed_2 = NULL, na_method = NULL) {
 
@@ -47,10 +49,10 @@ subsampling <- function(y_name, x_name = NULL, data, n, pilot_n = NULL, method =
          Unif = Unif(N = N, n = n, seed = seed_1, replace = TRUE),
          IBOSS = IBOSS(n = n, X = x),
          OSMAC_A = OSMAC(X = x, Y = y, r1 = pilot_n, r2 = n, method = "mmse", seed_1 = seed_1, seed_2 = seed_2),
-         OSMAC_L = OSMAC(X = x, Y = y, r1 = pilot_n, r2 = n, method = "mvc", seed_1 = seed_1, seed_2 = seed_2)
+         OSMAC_L = OSMAC(X = x, Y = y, r1 = pilot_n, r2 = n, method = "mvc", seed_1 = seed_1, seed_2 = seed_2),
+         OSS = OSS(n = n, X = x)
          # Support =
          # Lowcon =
-         # OSS =
          # DDS =
     )
   return(subsample_index)
diff --git a/README.Rmd b/README.Rmd
index 389b0b0..1a6fde5 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -32,7 +32,8 @@ devtools::install_github("JieYinStat/dbsubsampling")
 
 ## Example
 
-This is a basic example which shows you how to get subsample index, such as uniform sampling, OSMAC and IBOSS:
+This is a basic example which shows you how to get subsample index, such as uniform sampling, OSMAC, IBOSS and
+OSS:
 
 ```{r example}
 library(dbsubsampling)
@@ -52,7 +53,10 @@ subsampling(y_name = "y", data = data_binary, n = 10, pilot_n = 100, method = "O
 
 # IBOSS
 data_numeric <- data_numeric_regression
-subsampling(y_name = "y", data = data_numeric, n = 30, method = "IBOSS")
+subsampling(y_name = "y", data = data_numeric, n = 100, method = "IBOSS")
+
+# OSS
+subsampling(y_name = "y", data = data_numeric, n = 30, method = "OSS")
 ```
 
 You can get more detailed examples from the article column on the [website](jieyinstat.github.io/dbsubsampling/).
diff --git a/README.md b/README.md
index f357838..bd81dba 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ devtools::install_github("JieYinStat/dbsubsampling")
 ## Example
 
 This is a basic example which shows you how to get subsample index, such
-as uniform sampling, OSMAC and IBOSS:
+as uniform sampling, OSMAC, IBOSS and OSS:
 
 ``` r
 library(dbsubsampling)
@@ -47,9 +47,19 @@ subsampling(y_name = "y", data = data_binary, n = 10, pilot_n = 100, method = "O
 
 # IBOSS
 data_numeric <- data_numeric_regression
-subsampling(y_name = "y", data = data_numeric, n = 30, method = "IBOSS")
-#>  [1]  419 1144 3395 3484 3896 5121 6203 7915 7967 8026 8156 8694 8841 9117 8438
-#> [16] 3121
+subsampling(y_name = "y", data = data_numeric, n = 100, method = "IBOSS")
+#>  [1]  183  226  395  419  584  666  711  758 1027 1144 1324 1445 1940 1946 1978
+#> [16] 2018 2673 2982 3190 3395 3484 3612 3632 3638 3696 3816 3835 3896 3921 4256
+#> [31] 4312 4405 4523 4551 4729 4938 5121 5226 5342 5410 5679 5770 5995 6089 6163
+#> [46] 6170 6203 6250 6525 6964 6979 7053 7198 7407 7564 7633 7915 7935 7967 7992
+#> [61] 8026 8088 8106 8156 8161 8267 8306 8501 8503 8521 8534 8694 8805 8841 9117
+#> [76] 9211 9302 9364 9398 9456 9676 9946 9971 9989 1173 2344 5394 8438 8567 9239
+#> [91] 1787 2104 2215 3121 7159 9133
+
+# OSS
+subsampling(y_name = "y", data = data_numeric, n = 30, method = "OSS")
+#>  [1] 8841 8961 1902 7512   48 9867 6547 9784 3392 3622 5780 6594 1890 1850 8335
+#> [16] 1254 6204 1257 4611 3831 4782 4919 1579 3404  718 7189 2060 4899  590 1800
 ```
 
 You can get more detailed examples from the article column on the
diff --git a/man/ComputeLoss.Rd b/man/ComputeLoss.Rd
new file mode 100644
index 0000000..558e5c5
--- /dev/null
+++ b/man/ComputeLoss.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{ComputeLoss}
+\alias{ComputeLoss}
+\title{Compute loss function for OSS}
+\usage{
+ComputeLoss(candi, last_index, X, norm)
+}
+\arguments{
+\item{candi}{The index of the candidate set.}
+
+\item{last_index}{The index of the seleted point in last iteration.}
+
+\item{X}{The whole data.}
+
+\item{norm}{Norm of the whole data.}
+}
+\value{
+Loss of every point in candidate set.
+}
+\description{
+Compute loss function for OSS
+}
diff --git a/man/IBOSS.Rd b/man/IBOSS.Rd
index c33af84..791c4fe 100644
--- a/man/IBOSS.Rd
+++ b/man/IBOSS.Rd
@@ -7,12 +7,12 @@
 IBOSS(n, X)
 }
 \arguments{
-\item{n}{subsample size.}
+\item{n}{Subsample size.}
 
 \item{X}{A data.frame or matrix consists of explanatory variables.}
 }
 \value{
-subsample index.
+Subsample index.
 }
 \description{
 A subsampling method based on D-optiaml criterion inspired by optimal experimental design
diff --git a/man/L2norm.Rd b/man/L2norm.Rd
new file mode 100644
index 0000000..e7f2000
--- /dev/null
+++ b/man/L2norm.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{L2norm}
+\alias{L2norm}
+\title{Get L2 norm}
+\usage{
+L2norm(X)
+}
+\arguments{
+\item{X}{A matrix or data.frame.}
+}
+\value{
+L2 norm of \code{X}(every row).
+}
+\description{
+Get L2 norm of a matrix or data frame.
+}
diff --git a/man/OSS.Rd b/man/OSS.Rd
new file mode 100644
index 0000000..f8eefaf
--- /dev/null
+++ b/man/OSS.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/OSS.R
+\name{OSS}
+\alias{OSS}
+\title{Orthogonal subsampling for big data linear regression(OSS)}
+\usage{
+OSS(n, X)
+}
+\arguments{
+\item{n}{Subsample size.}
+
+\item{X}{A matrix or data frame.}
+}
+\value{
+Subsample index.
+}
+\description{
+A subsampling method based on orthogonal array for linear model.
+}
+\examples{
+data_numeric_regression["y"] <- NULL
+X <- as.matrix(data_numeric_regression)
+OSS(100, X)
+
+}
+\references{
+Lin Wang, Jake Elmstedt, Weng Kee Wong & Hongquan Xu (2021)
+\emph{Orthogonal subsampling for big data linear regression,
+The Annals of Applied Statistics, 15(3), 1273-1290},
+\url{https://projecteuclid.org/journals/annals-of-applied-statistics/volume-15/issue-3/Orthogonal-subsampling-for-big-data-linear-regression/10.1214/21-AOAS1462.short?tab=ArticleLink}.
+}
diff --git a/man/Unif.Rd b/man/Unif.Rd
index b6270d4..9d31f08 100644
--- a/man/Unif.Rd
+++ b/man/Unif.Rd
@@ -16,8 +16,8 @@ will not affect the external environment}
 
 \item{replace}{A boolean.
 \itemize{
-\item \code{TRUE} (the default): sampling with replace.
-\item \code{FALSE}: sampling without replace
+\item \code{TRUE} (the default): Sampling with replace.
+\item \code{FALSE}: Sampling without replace
 }}
 }
 \value{
diff --git a/man/armaComputeLoss.Rd b/man/armaComputeLoss.Rd
new file mode 100644
index 0000000..e434330
--- /dev/null
+++ b/man/armaComputeLoss.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{armaComputeLoss}
+\alias{armaComputeLoss}
+\title{Compute loss function for OSS (RcppArmadillo-version)}
+\usage{
+armaComputeLoss(X, xa, y, ya, tPow)
+}
+\arguments{
+\item{X}{Matrix of the candidate set.}
+
+\item{xa}{Norm of the candidate set.}
+
+\item{y}{A vector. The point which be selected last iteration.}
+
+\item{ya}{Norm of \code{y}.}
+
+\item{tPow}{The power of the loss function.}
+}
+\value{
+Loss of the candidate set.
+}
+\description{
+Compute loss function for OSS (RcppArmadillo-version)
+}
diff --git a/man/armaOSS.Rd b/man/armaOSS.Rd
new file mode 100644
index 0000000..998cc3f
--- /dev/null
+++ b/man/armaOSS.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{armaOSS}
+\alias{armaOSS}
+\title{OSS (RcppArmadillo-version)}
+\usage{
+armaOSS(x, k, tPow = 2)
+}
+\arguments{
+\item{x}{A matrix.}
+
+\item{k}{Subsample size.}
+
+\item{tPow}{The power of the loss function.}
+}
+\value{
+Subsample index.
+}
+\description{
+OSS (RcppArmadillo-version)
+}
diff --git a/man/armaScaleMatrix.Rd b/man/armaScaleMatrix.Rd
new file mode 100644
index 0000000..884579a
--- /dev/null
+++ b/man/armaScaleMatrix.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{armaScaleMatrix}
+\alias{armaScaleMatrix}
+\title{Scale a matrix (RcppArmadillo-version)}
+\usage{
+armaScaleMatrix(X)
+}
+\arguments{
+\item{X}{A matrix.}
+}
+\value{
+Scaled matrix.
+}
+\description{
+Scale a matrix (RcppArmadillo-version)
+}
diff --git a/man/armabottom_k.Rd b/man/armabottom_k.Rd
new file mode 100644
index 0000000..b76ac53
--- /dev/null
+++ b/man/armabottom_k.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{armabottom_k}
+\alias{armabottom_k}
+\title{Find t smallest index of a vector (RcppArmadillo-version)}
+\usage{
+armabottom_k(x, k)
+}
+\arguments{
+\item{x}{A vector.}
+
+\item{k}{A int.}
+}
+\value{
+The index of the t smallest element of the vector.
+}
+\description{
+Find t smallest index of a vector (RcppArmadillo-version)
+}
diff --git a/man/bottom_t_index.Rd b/man/bottom_t_index.Rd
new file mode 100644
index 0000000..24f84fc
--- /dev/null
+++ b/man/bottom_t_index.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{bottom_t_index}
+\alias{bottom_t_index}
+\title{Find t smallest index of a vector}
+\usage{
+bottom_t_index(loss, t)
+}
+\arguments{
+\item{loss}{A vector.}
+
+\item{t}{A int.}
+}
+\value{
+The index of the t smallest element of the vector.
+}
+\description{
+Find t smallest index of a vector
+}
diff --git a/man/getIdxR_cpp.Rd b/man/getIdxR_cpp.Rd
new file mode 100644
index 0000000..6ad8f47
--- /dev/null
+++ b/man/getIdxR_cpp.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{getIdxR_cpp}
+\alias{getIdxR_cpp}
+\title{Get subsample index of other column(except the first column) (IBOSS)}
+\usage{
+getIdxR_cpp(r, z, rdel)
+}
+\arguments{
+\item{r}{Subsample size of the column.}
+
+\item{z}{A numeric vector. the column.}
+
+\item{rdel}{Subsample index of the first column.}
+}
+\value{
+Subsample index of the column.
+}
+\description{
+Get subsample index of other column(except the first column) (IBOSS)
+}
diff --git a/man/getIdx_cpp.Rd b/man/getIdx_cpp.Rd
new file mode 100644
index 0000000..79ade97
--- /dev/null
+++ b/man/getIdx_cpp.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{getIdx_cpp}
+\alias{getIdx_cpp}
+\title{Get subsample index of the first column(IBOSS)}
+\usage{
+getIdx_cpp(r, z)
+}
+\arguments{
+\item{r}{Subsample size of the first column.}
+
+\item{z}{A numeric vector. the first column.}
+}
+\value{
+Subsample index of the first column.
+}
+\description{
+Get subsample index of the first column(IBOSS)
+}
diff --git a/man/get_Logistic_MLE.Rd b/man/get_Logistic_MLE.Rd
index 64dc541..d5e7faa 100644
--- a/man/get_Logistic_MLE.Rd
+++ b/man/get_Logistic_MLE.Rd
@@ -16,9 +16,9 @@ get_Logistic_MLE(x, y, w)
 \value{
 A list.
 \itemize{
-\item \code{par} : parameter estimation.
-\item \code{message} : message during iteration.
-\item \code{iter} : iteration times.
+\item \code{par} : Parameter estimation.
+\item \code{message} : Message during iteration.
+\item \code{iter} : Iteration times.
 }
 }
 \description{
diff --git a/man/rComputeLoss.Rd b/man/rComputeLoss.Rd
new file mode 100644
index 0000000..ea6ecf7
--- /dev/null
+++ b/man/rComputeLoss.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/OSS.R
+\name{rComputeLoss}
+\alias{rComputeLoss}
+\title{Compute loss function for OSS (r-version)}
+\usage{
+rComputeLoss(candi, last_index, X, norm, p = ncol(X))
+}
+\arguments{
+\item{candi}{The index of the candidate set.}
+
+\item{last_index}{The index of the seleted point in last iteration.}
+
+\item{X}{The whole data.}
+
+\item{norm}{Norm of the whole data.}
+
+\item{p}{Numbers of columns of the data.}
+}
+\value{
+Loss of every point in candidate set.
+}
+\description{
+Compute loss function for OSS (r-version)
+}
diff --git a/man/rL2norm.Rd b/man/rL2norm.Rd
new file mode 100644
index 0000000..a8afc98
--- /dev/null
+++ b/man/rL2norm.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/OSS.R
+\name{rL2norm}
+\alias{rL2norm}
+\title{Get L2 norm (r-version)}
+\usage{
+rL2norm(X)
+}
+\arguments{
+\item{X}{A matrix or data.frame.}
+}
+\value{
+L2 norm of \code{X}(every row).
+}
+\description{
+Get L2 norm of a matrix or data frame.
+}
diff --git a/man/rOSS.Rd b/man/rOSS.Rd
new file mode 100644
index 0000000..b0e7db0
--- /dev/null
+++ b/man/rOSS.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/OSS.R
+\name{rOSS}
+\alias{rOSS}
+\title{OSS (r-version)}
+\usage{
+rOSS(n, X)
+}
+\arguments{
+\item{n}{Subsample size.}
+
+\item{X}{A matrix.}
+}
+\value{
+Subsample index.
+}
+\description{
+OSS (r-version)
+}
diff --git a/man/rbottom_t_index.Rd b/man/rbottom_t_index.Rd
new file mode 100644
index 0000000..1012482
--- /dev/null
+++ b/man/rbottom_t_index.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/OSS.R
+\name{rbottom_t_index}
+\alias{rbottom_t_index}
+\title{Find t smallest index of a vector.}
+\usage{
+rbottom_t_index(loss, t)
+}
+\arguments{
+\item{loss}{A vector.}
+
+\item{t}{A int}
+}
+\value{
+The index of the t smallest element of the vector.
+}
+\description{
+Find t smallest index of a vector.
+}
diff --git a/man/rcppOSS.Rd b/man/rcppOSS.Rd
new file mode 100644
index 0000000..474f1e5
--- /dev/null
+++ b/man/rcppOSS.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{rcppOSS}
+\alias{rcppOSS}
+\title{Rcpp version OSS (core code of \code{OSS})}
+\usage{
+rcppOSS(X, n)
+}
+\arguments{
+\item{X}{A matrix.}
+
+\item{n}{Subsample size.}
+}
+\value{
+Subsample index.
+}
+\description{
+Rcpp version OSS (core code of \code{OSS})
+}
diff --git a/man/subsampling.Rd b/man/subsampling.Rd
index ace558e..8e2be3d 100644
--- a/man/subsampling.Rd
+++ b/man/subsampling.Rd
@@ -35,12 +35,13 @@ Default to all variables except the response variable}
 \item \code{OSMAC_A}: A subsampling method based on A-optimal for logistic regression proposed by Wang et.al. (2018).
 \item \code{OSMAC_L}: A subsampling method based on L-optimal for logistic regression proposed by Wang et.al. (2018).
 \item \code{IBOSS}: A subsampling method based on D-optimal for linear regression proposed by Wang et.al. (2019).
+\item \code{OSS} : A subsampling method based on Orthogonal Array proposed by Wang et.al.(2021).
 }}
 
 \item{replace}{A boolean.
 \itemize{
-\item \code{TRUE} (the default): sampling with replace.
-\item \code{FALSE}: sampling without replace
+\item \code{TRUE} (the default): Sampling with replace.
+\item \code{FALSE}: Sampling without replace
 }}
 
 \item{seed_1}{Random seed for the first stage sampling or Unif.}
@@ -65,4 +66,5 @@ subsampling(y_name = "y", data = data_binary, n = 30, pilot_n = 100, method = "O
 
 data_numeric <- data_numeric_regression
 subsampling(y_name = "y", data = data_numeric, n = 100, method = "IBOSS")
+subsampling(y_name = "y", data = data_numeric, n = 30, method = "OSS")
 }
diff --git a/src/IBOSS.cpp b/src/IBOSS.cpp
index c7e7c38..cb9e8f6 100644
--- a/src/IBOSS.cpp
+++ b/src/IBOSS.cpp
@@ -6,12 +6,10 @@ using namespace Rcpp;
 
 //' Get subsample index of other column(except the first column) (IBOSS)
 //'
-//' @param r subsample size of the column.
+//' @param r Subsample size of the column.
 //' @param z A numeric vector. the column.
-//' @param rdel subsample index of the first column.
-//' @return subsample index of the column.
-//' @export
-// [[Rcpp::plugins("cpp99")]]
+//' @param rdel Subsample index of the first column.
+//' @return Subsample index of the column.
 // [[Rcpp::export]]
 IntegerVector getIdxR_cpp(int r, NumericVector z, IntegerVector rdel) {
   int m = rdel.size(), n = z.size();
@@ -46,27 +44,7 @@ IntegerVector getIdxR_cpp(int r, NumericVector z, IntegerVector rdel) {
   std::nth_element(y, y + r - 1, y + n - m);
   double yru = -y[r-1];
   delete [] y;
-  // /********************************************/
-  // // This code use twice memory
-  // double* yl = new double [n-m];
-  // double* yu = new double [n-m];
-  // int j = 0, k=0, kl = 0, ku = 0;
-  // for ( int i = 0; i < n; i++) {
-  //   if ( j >= m) {
-  // 	yl[kl++] = z[i];
-  // 	yu[ku++] = -z[i];
-  //   }
-  //   else if ( del[j] != i + 1) {
-  // 	yl[kl++] = z[i];
-  // 	yu[ku++] = -z[i];
-  //   }
-  //   else
-  // 	j++;
-  // }
-  // std::nth_element(yl, yl + r - 1, yl + n - m);
-  // std::nth_element(yu, yu + r - 1, yu + n - m);
-  // double yrl = yl[r-1], yru = -yu[r-1];
-  // /********************************************/
+
   int jl = 0, ju = 0;
   std::vector<int> locl(r);
   std::vector<int> locu(r);
@@ -100,11 +78,9 @@ IntegerVector getIdxR_cpp(int r, NumericVector z, IntegerVector rdel) {
 
 //' Get subsample index of the first column(IBOSS)
 //'
-//' @param r subsample size of the first column.
+//' @param r Subsample size of the first column.
 //' @param z A numeric vector. the first column.
-//' @return subsample index of the first column.
-//' @export
- // [[Rcpp::plugins("cpp99")]]
+//' @return Subsample index of the first column.
 // [[Rcpp::export]]
 IntegerVector getIdx_cpp(int r, NumericVector z) {
   int n = z.size();
@@ -119,18 +95,7 @@ IntegerVector getIdx_cpp(int r, NumericVector z) {
   std::nth_element(y, y + r - 1, y + n);
   double yru = -y[r-1];
   delete [] y;
-  // /********************************************/
-  // // This code use twice memory
-  // double* yl = new double [n];
-  // double* yu = new double [n];
-  // for ( int i = 0; i < n; i++) {
-  //   yl[i] = z[i];
-  //   yu[i] = -z[i];
-  // }
-  // std::nth_element(yl, yl + r - 1, yl + n);
-  // std::nth_element(yu, yu + r - 1, yu + n);
-  // double yrl = yl[r-1], yru = -yu[r-1];
-  // /*******************************************/
+
   int jl = 0, ju = 0;
   std::vector<int> locl(r);
   std::vector<int> locu(r);
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..3a7f8ac
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1,2 @@
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/src/Makevars.win b/src/Makevars.win
new file mode 100644
index 0000000..3a7f8ac
--- /dev/null
+++ b/src/Makevars.win
@@ -0,0 +1,2 @@
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/src/OSS.cpp b/src/OSS.cpp
new file mode 100644
index 0000000..3f695b4
--- /dev/null
+++ b/src/OSS.cpp
@@ -0,0 +1,146 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+// Scale a matrix
+//
+// @param X A matrix.
+//
+// @return Scaled matrix.
+//
+// NumericMatrix ScaleMatrix(NumericMatrix X){
+//   int p = X.cols();
+//   for(int j = 0; j < p; j++){
+//     X(_,j) = (X(_,j) - mean(X(_,j))) / sd(X(_,j));
+//   }
+//   return X;
+// }
+
+//' Get L2 norm
+//'
+//' Get L2 norm of a matrix or data frame.
+//'
+//' @param X A matrix or data.frame.
+//'
+//' @return L2 norm of `X`(every row).
+// [[Rcpp::export]]
+NumericVector L2norm(NumericMatrix X){
+  int N = X.rows();
+  NumericVector norm(N);
+  for(int i=0; i<N; i++){
+    norm[i] = sum(X(i,_) * X(i,_));
+    // norm[i] = std::inner_product(X(i,_).begin(), X(i,_).end(), X(i,_).begin(),0);
+  }
+  return norm;
+}
+
+//' Find t smallest index of a vector
+//'
+//' @param loss A vector.
+//' @param t A int.
+//'
+//' @return The index of the t smallest element of the vector.
+// [[Rcpp::export]]
+IntegerVector bottom_t_index(NumericVector loss, int t){
+  IntegerVector remain(t);
+  NumericVector losscopy = clone(loss);
+  std::nth_element(losscopy.begin(), losscopy.begin() + t - 1, losscopy.end());
+  for(int i=0, ii=0; i<loss.length(); i++){
+    if (loss[i] <= losscopy[t-1]) remain[ii++] = i;
+  }
+  return(remain);
+}
+
+//' Compute loss function for OSS
+//'
+//' @param candi The index of the candidate set.
+//' @param last_index The index of the seleted point in last iteration.
+//' @param X The whole data.
+//' @param norm Norm of the whole data.
+//'
+//' @return Loss of every point in candidate set.
+// [[Rcpp::export]]
+NumericVector ComputeLoss(IntegerVector candi, int last_index, NumericMatrix X, NumericVector norm){
+  int p = X.cols();
+  int k = candi.length();
+  NumericVector loss(k);
+  for(int i=0; i<k; i++){
+    int delta = sum(sign(X(candi[i],_)) == sign(X(last_index,_)));
+    loss[i] = pow(p - norm[candi[i]]/2 - norm[last_index]/2 + delta, 2);
+  }
+  return loss;
+}
+
+//' Rcpp version OSS (core code of `OSS`)
+//'
+//' @param X A matrix.
+//' @param n Subsample size.
+//'
+//' @return Subsample index.
+// [[Rcpp::export]]
+IntegerVector rcppOSS(NumericMatrix X, int n) {
+  // X = ScaleMatrix(X);  // Standardize in R
+  int N = X.rows();
+
+  IntegerVector index(n);
+  IntegerVector candi = seq_len(N)-1;
+  NumericVector norm = L2norm(X);
+  double r = log(N) / log(n);
+
+  index[0] = which_max(norm);
+  candi.erase(index[0]);
+  NumericVector loss = ComputeLoss(candi, index[0], X, norm);
+  // Rcout << "i = 0" << "\n";
+  // Rcout << "index: " << index[0] << " \n";
+  // Rcout << "length of candi: " << candi.length() << " \n";
+  // Rcout << "candi: " << candi << "\n";
+  // Rcout << "length of loss: " << loss.length() << " \n";
+  // Rcout << "loss: " << loss << "\n" << "\n";
+
+  for(int i=1; i<n; i++){
+
+    int tmp = which_min(loss);
+    index[i] = candi[tmp];
+    // Rcout << "i = " << i << "\n";
+    // Rcout << "tmp: " << tmp << "\n";
+    // Rcout << "index: " << index[i] << "\n";
+
+    candi.erase(tmp);
+    loss.erase(tmp);
+    // Rcout << "length of candi: " << candi.length() << "\n";
+    // Rcout << "candi: " << candi << "\n";
+    // Rcout << "length of loss: " << loss.length() << "\n";
+    // Rcout << "loss: " << loss << "\n";
+
+    double t = 0;
+    if (N > pow(n,2)) {
+      t = N / (i+1);
+    } else {
+      t = N / pow(i+1, r-1);
+    }
+    // Rcout << "t = " << t << "\n";
+    if (candi.length() > t) {
+      IntegerVector remain = bottom_t_index(loss, floor(t));
+      //   Rcout << "length of remain: " << remain.length() << "\n";
+      //   Rcout << "remain:" << remain << "\n";
+      candi = candi[remain];
+      loss = loss[remain];
+    }
+
+//    if (candi.length() == 0) {
+//      index = index[seq(0,i)];
+//      break;
+//    }
+    // Rcout << "After eliminate:" << "\n";
+    // Rcout << "length of candi: " << candi.length() << "\n";
+    // Rcout << "candi: " << candi << "\n";
+    // Rcout << "length of loss: " << loss.length() << "\n";
+    // Rcout << "loss: " << loss << "\n";
+    loss = loss + ComputeLoss(candi, index[i], X, norm); // loss can't located.
+    // Rcout << "After Update loss:" << "\n";
+    // Rcout << "length of loss: " << loss.length() << "\n";
+    // Rcout << "loss: " << loss << "\n" << "\n";
+  }
+  return index + 1;
+}
+
+
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 746aa78..fb208b6 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -1,6 +1,7 @@
 // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
+#include <RcppArmadillo.h>
 #include <Rcpp.h>
 
 using namespace Rcpp;
@@ -35,10 +36,118 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// L2norm
+NumericVector L2norm(NumericMatrix X);
+RcppExport SEXP _dbsubsampling_L2norm(SEXP XSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP);
+    rcpp_result_gen = Rcpp::wrap(L2norm(X));
+    return rcpp_result_gen;
+END_RCPP
+}
+// bottom_t_index
+IntegerVector bottom_t_index(NumericVector loss, int t);
+RcppExport SEXP _dbsubsampling_bottom_t_index(SEXP lossSEXP, SEXP tSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericVector >::type loss(lossSEXP);
+    Rcpp::traits::input_parameter< int >::type t(tSEXP);
+    rcpp_result_gen = Rcpp::wrap(bottom_t_index(loss, t));
+    return rcpp_result_gen;
+END_RCPP
+}
+// ComputeLoss
+NumericVector ComputeLoss(IntegerVector candi, int last_index, NumericMatrix X, NumericVector norm);
+RcppExport SEXP _dbsubsampling_ComputeLoss(SEXP candiSEXP, SEXP last_indexSEXP, SEXP XSEXP, SEXP normSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< IntegerVector >::type candi(candiSEXP);
+    Rcpp::traits::input_parameter< int >::type last_index(last_indexSEXP);
+    Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type norm(normSEXP);
+    rcpp_result_gen = Rcpp::wrap(ComputeLoss(candi, last_index, X, norm));
+    return rcpp_result_gen;
+END_RCPP
+}
+// rcppOSS
+IntegerVector rcppOSS(NumericMatrix X, int n);
+RcppExport SEXP _dbsubsampling_rcppOSS(SEXP XSEXP, SEXP nSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP);
+    Rcpp::traits::input_parameter< int >::type n(nSEXP);
+    rcpp_result_gen = Rcpp::wrap(rcppOSS(X, n));
+    return rcpp_result_gen;
+END_RCPP
+}
+// armabottom_k
+arma::vec armabottom_k(arma::vec x, unsigned int k);
+RcppExport SEXP _dbsubsampling_armabottom_k(SEXP xSEXP, SEXP kSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::vec >::type x(xSEXP);
+    Rcpp::traits::input_parameter< unsigned int >::type k(kSEXP);
+    rcpp_result_gen = Rcpp::wrap(armabottom_k(x, k));
+    return rcpp_result_gen;
+END_RCPP
+}
+// armaScaleMatrix
+arma::mat armaScaleMatrix(arma::mat X);
+RcppExport SEXP _dbsubsampling_armaScaleMatrix(SEXP XSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP);
+    rcpp_result_gen = Rcpp::wrap(armaScaleMatrix(X));
+    return rcpp_result_gen;
+END_RCPP
+}
+// armaComputeLoss
+arma::vec armaComputeLoss(arma::mat X, arma::vec xa, arma::mat y, double ya, double tPow);
+RcppExport SEXP _dbsubsampling_armaComputeLoss(SEXP XSEXP, SEXP xaSEXP, SEXP ySEXP, SEXP yaSEXP, SEXP tPowSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP);
+    Rcpp::traits::input_parameter< arma::vec >::type xa(xaSEXP);
+    Rcpp::traits::input_parameter< arma::mat >::type y(ySEXP);
+    Rcpp::traits::input_parameter< double >::type ya(yaSEXP);
+    Rcpp::traits::input_parameter< double >::type tPow(tPowSEXP);
+    rcpp_result_gen = Rcpp::wrap(armaComputeLoss(X, xa, y, ya, tPow));
+    return rcpp_result_gen;
+END_RCPP
+}
+// armaOSS
+arma::uvec armaOSS(arma::mat x, int k, double tPow);
+RcppExport SEXP _dbsubsampling_armaOSS(SEXP xSEXP, SEXP kSEXP, SEXP tPowSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP);
+    Rcpp::traits::input_parameter< int >::type k(kSEXP);
+    Rcpp::traits::input_parameter< double >::type tPow(tPowSEXP);
+    rcpp_result_gen = Rcpp::wrap(armaOSS(x, k, tPow));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_dbsubsampling_getIdxR_cpp", (DL_FUNC) &_dbsubsampling_getIdxR_cpp, 3},
     {"_dbsubsampling_getIdx_cpp", (DL_FUNC) &_dbsubsampling_getIdx_cpp, 2},
+    {"_dbsubsampling_L2norm", (DL_FUNC) &_dbsubsampling_L2norm, 1},
+    {"_dbsubsampling_bottom_t_index", (DL_FUNC) &_dbsubsampling_bottom_t_index, 2},
+    {"_dbsubsampling_ComputeLoss", (DL_FUNC) &_dbsubsampling_ComputeLoss, 4},
+    {"_dbsubsampling_rcppOSS", (DL_FUNC) &_dbsubsampling_rcppOSS, 2},
+    {"_dbsubsampling_armabottom_k", (DL_FUNC) &_dbsubsampling_armabottom_k, 2},
+    {"_dbsubsampling_armaScaleMatrix", (DL_FUNC) &_dbsubsampling_armaScaleMatrix, 1},
+    {"_dbsubsampling_armaComputeLoss", (DL_FUNC) &_dbsubsampling_armaComputeLoss, 5},
+    {"_dbsubsampling_armaOSS", (DL_FUNC) &_dbsubsampling_armaOSS, 3},
     {NULL, NULL, 0}
 };
 
diff --git a/src/armaOSS.cpp b/src/armaOSS.cpp
new file mode 100644
index 0000000..cce7aa8
--- /dev/null
+++ b/src/armaOSS.cpp
@@ -0,0 +1,132 @@
+#include <RcppArmadillo.h>
+using namespace arma;
+
+//' Find t smallest index of a vector (RcppArmadillo-version)
+//'
+//' @param x A vector.
+//' @param k A int.
+//'
+//' @return The index of the t smallest element of the vector.
+// [[Rcpp::export]]
+arma::vec armabottom_k(arma::vec x, unsigned int k) {
+  arma::vec x2 = x; // save a copy of x
+  arma::vec ind(k); // save the indexes of the smallest k numbers
+  std::nth_element(x.begin(), x.begin() + k - 1, x.end()); // std::greater<double>());
+  for(int ii=0, i=0; i<int(x.n_elem) && ii<int(k); i++){
+    if(x2[i] <= x[k-1])  ind[ii++] = i;  // +1 for R
+  }
+  return ind;
+}
+
+//' Scale a matrix (RcppArmadillo-version)
+//'
+//' @param X A matrix.
+//'
+//' @return Scaled matrix.
+// [[Rcpp::export]]
+arma::mat armaScaleMatrix(arma::mat X){
+  // new add to scale (GOSS do not have)
+  int p = X.n_cols;
+  for(int j = 0; j < p; j++){
+    X.col(j) = (X.col(j) - mean(X.col(j))) / stddev(X.col(j));
+  }
+  return X;
+}
+
+//' Compute loss function for OSS (RcppArmadillo-version)
+//'
+//' @param X Matrix of the candidate set.
+//' @param xa Norm of the candidate set.
+//' @param y A vector. The point which be selected last iteration.
+//' @param ya Norm of `y`.
+//' @param tPow The power of the loss function.
+//'
+//' @return Loss of the candidate set.
+// [[Rcpp::export]]
+arma::vec armaComputeLoss(arma::mat X, arma::vec xa, arma::mat y, double ya, double tPow) {
+  int n=X.n_rows;
+  int p=X.n_cols;
+  arma::vec B = zeros<vec>(n);
+  for(int i=0; i<n; i++){
+    B(i) = pow(accu(X.row(i)==y)+p-xa(i)/2-ya/2,tPow);
+  }
+  return B;
+}
+
+//' OSS (RcppArmadillo-version)
+//' @param x A matrix.
+//' @param k Subsample size.
+//' @param tPow The power of the loss function.
+//'
+//' @return Subsample index.
+// [[Rcpp::export]]
+arma::uvec armaOSS(arma::mat x, int k, double tPow=2){
+  x = armaScaleMatrix(x); // Standardize internally
+  int n=x.n_rows;
+  arma::uvec candi=linspace<uvec>(1,n,n);
+  arma::uvec ind=linspace<uvec>(1,k,k);
+  arma::vec L=sum(pow(x,2),1);
+  arma::vec xa=L;
+  uword mm=L.index_max();
+  ind(0)=candi(mm);
+  candi.shed_row(mm);
+  L.shed_row(mm);
+
+  // Rcout << "i = 0" << "\n";
+  // Rcout << "index: " << ind(0)-1 << "\n";
+  // Rcout << "length of candi: " << candi.n_elem << "\n";
+  // Rcout << "candi: " << candi.t()-1 << "\n";
+
+  arma::mat sx=sign(x);
+
+  /* GOSS original:
+   double r=log(n/k)/log(k);
+   */
+  double r=log(n)/log(k); // modified
+  for(int i=1; i<k; i++){
+    if(i==1)
+      L = armaComputeLoss(sx.rows(candi-1),xa.elem(candi-1),sx.row(ind(i-1)-1),xa(ind(i-1)-1),tPow);
+    else
+      L = L + armaComputeLoss(sx.rows(candi-1),xa.elem(candi-1),sx.row(ind(i-1)-1),xa(ind(i-1)-1),tPow);
+
+    mm=L.index_min();
+    ind(i)=candi(mm);
+    candi.shed_row(mm);
+    L.shed_row(mm);
+
+    // Rcout << "i = " << i << "\n";
+    // Rcout << "index: " << ind(i)-1 << "\n";
+    // Rcout << "length of candi: " << candi.n_elem << "\n";
+    // Rcout << "candi: " << candi.t() - 1 << "\n";
+    // Rcout << "length of loss: " << L.n_elem << "\n";
+    // Rcout << "loss: " << L.t() << "\n";
+
+    /* GOSS original:
+     int nc=floor(n/pow(i,r));
+     */
+    double nc = n/pow(i+1,r-1); // modified
+
+    /* GOSS original:
+     if((i>1) & (L.n_elem>double(nc))){
+     arma::uvec tt=arma::conv_to<arma::uvec>::from(bottom_k(L,nc));
+     L=L.elem(tt);
+     candi=candi.elem(tt);
+     */
+    if( L.n_elem > nc ){ //modified
+      arma::uvec tt=arma::conv_to<arma::uvec>::from(armabottom_k(L,floor(nc)));
+      L=L.elem(tt);
+      candi=candi.elem(tt);
+
+    // Rcout << "t = " << nc << "\n";
+    // Rcout << "length of remain: " << tt.n_elem << "\n";
+    // Rcout << "remain: " << tt.t() << "\n";
+    // Rcout << "After eliminatie" << "\n";
+    // Rcout << "length of candi: " << candi.n_elem << "\n";
+    // Rcout << "candi: " << candi.t()-1 << "\n";
+    // Rcout << "length of loss: " << L.n_elem << "\n";
+    // Rcout << "loss: " << L.t() << "\n";
+
+    }
+  }
+  return ind;
+}
diff --git a/tests/testthat/test-OSS.R b/tests/testthat/test-OSS.R
new file mode 100644
index 0000000..10b4c78
--- /dev/null
+++ b/tests/testthat/test-OSS.R
@@ -0,0 +1,12 @@
+test_that("OSS with Rcpp works well and get the same result with r-version", {
+  data_numeric_regression["y"] <- NULL
+  X <- data_numeric_regression
+
+  # X <- scale(as.matrix(data_numeric_regression))
+  # attributes(X) <- attributes(X)["dim"]
+  # expect_equal(L2norm(X), rowSums(X^2))
+  # expect_equal(bottom_t_index(X[,1], 20) + 1, which(X[,1] <= sort(X[,1])[20]))
+
+  expect_equal(OSS(100, X), rOSS(100, X))
+  expect_equal(OSS(100, X), OSS(100, X))
+})
diff --git a/tests/testthat/test-armaOSS.R b/tests/testthat/test-armaOSS.R
new file mode 100644
index 0000000..5df9bd5
--- /dev/null
+++ b/tests/testthat/test-armaOSS.R
@@ -0,0 +1,11 @@
+test_that("OSS with RcppArmadill get the same result with Rcpp", {
+  data_numeric_regression["y"] <- NULL
+  X <- as.matrix(data_numeric_regression)
+  attributes(X) <- attributes(X)["dim"]
+
+  expect_equal(as.vector(armaOSS(X, 100)), OSS(100, X))
+  expect_equal(as.vector(armaOSS(X, 100)), as.vector(armaOSS(X, 100)))
+})
+
+
+
diff --git a/vignettes/Subsampling.Rmd b/vignettes/Subsampling.Rmd
index 3da72f1..31179dd 100644
--- a/vignettes/Subsampling.Rmd
+++ b/vignettes/Subsampling.Rmd
@@ -45,7 +45,7 @@ x <- data_binary[-which(names(data_binary) == "y")]
 
 OSMAC(X = x, Y = y, r1 = 100, r2 = 10, method="mmse", seed_1 = 123, seed_2 = 456)
 ```
-or you can use a unified interface(recommended):
+or you can use a unified interface (recommended):
 ```{r}
 subsampling(y_name = "y", data = data_binary, n = 10, pilot_n = 100, method = "OSMAC_A", 
             seed_1 = 123, seed_2 = 456)
@@ -56,7 +56,7 @@ L-optimal minimise the trace of the covariance matrix of the linear combination
 ```{r OSMAC-L}
 OSMAC(X = x, Y = y, r1 = 100, r2 = 10, method="mvc", seed_1 = 123, seed_2 = 456)
 ```
-or you can use a unified interface(recommended):
+or you can use a unified interface (recommended):
 ```{r}
 subsampling(y_name = "y", data = data_binary, n = 10, pilot_n = 100, method = "OSMAC_L", 
             seed_1 = 123, seed_2 = 456)
@@ -72,11 +72,21 @@ A subsampling method based on D-optimal for linear regression proposed by [Wang
 ```{r}
 data_numeric <- data_numeric_regression
 X <- data_numeric[-which(names(data_numeric) == "y")]
-IBOSS(n = 30, X = X)
+IBOSS(n = 100, X = X)
 ```
-or you can use a unified interface(recommended):
+or you can use a unified interface (recommended):
 ```{r}
-subsampling(y_name = "y", data = data_numeric, n = 30, method = "IBOSS")
+subsampling(y_name = "y", data = data_numeric, n = 100, method = "IBOSS")
+```
+
+# OSS
+A subsampling method based on Orthogonal Array proposed by [Wang et.al.(2021)](https://projecteuclid.org/journals/annals-of-applied-statistics/volume-15/issue-3/Orthogonal-subsampling-for-big-data-linear-regression/10.1214/21-AOAS1462.short?tab=ArticleLink)^[Lin Wang, Jake Elmstedt, Weng Kee Wong & Hongquan Xu (2021) Orthogonal subsampling for big data linear regression, The Annals of Applied Statistics, 15(3), 1273-1290.].
+```{r}
+OSS(n = 10, X = X)
+```
+or you can use a unified interface (recommended):
+```{r}
+subsampling(y_name = "y", data = data_numeric, n = 10, method = "OSS")
 ```
 
 **We're working on more features，such as subsampling based on OSS, Lowcon, support point, etc. **