Update Proximal Gradient Argument Wrapper

Improve handling of arguments passed to penalized regression solvers - Require arguments to be passed by name - Wrap runtime arguments (iterations and convergence thresholds) into a control object - Update tests
DataSlingers · Jun 28, 2019 · ecc2e76 · ecc2e76
2 parents 08196ad + 3f81cf8
commit ecc2e76
Show file tree

Hide file tree

Showing 7 changed files with 424 additions and 167 deletions.
diff --git a/R/moma_arguments.R b/R/moma_arguments.R
@@ -15,6 +15,7 @@ empty <- function() {
 #' \deqn{\lambda \sum \| x_{i} \| ,}
 #' where \eqn{\lambda} is set by \code{lambda_u/v} in the function \code{moma_svd}.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param non_negative a boolean value. Set \code{TRUE} to add non-negativity
 #' constraint.
 #'
@@ -23,7 +24,11 @@ empty <- function() {
 #' @examples
 #' lasso(non_negative = FALSE)
 #' @export
-lasso <- function(non_negative = FALSE) {
+lasso <- function(..., non_negative = FALSE) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
+
     if (!is_logical_scalar(non_negative)) {
         moma_error(sQuote("non_negative"), " should be a boolean value.")
     }
@@ -40,6 +45,7 @@ lasso <- function(non_negative = FALSE) {
 #' determined by \eqn{\gamma}. See Zhang, Cun-Hui. "Nearly unbiased variable
 #' selection under minimax concave penalty." The Annals of statistics 38.2 (2010): 894-942.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param gamma non-convexity. Must be larger than 1.
 #' @param non_negative a boolean value. Set to \code{TRUE} to add non-negativity
 #' constraint.
@@ -50,7 +56,11 @@ lasso <- function(non_negative = FALSE) {
 #' @examples
 #' mcp(gamma = 3, non_negative = FALSE)
 #' @export
-mcp <- function(gamma = 3, non_negative = FALSE) {
+mcp <- function(..., gamma = 3, non_negative = FALSE) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
+
     if (!is_logical_scalar(non_negative)) {
         moma_error(sQuote("non_negative"), " should be a boolean value.")
     }
@@ -75,6 +85,7 @@ mcp <- function(gamma = 3, non_negative = FALSE) {
 #'  via nonconcave penalized likelihood and its oracle properties." Journal of
 #'  the American statistical Association 96.456 (2001): 1348-1360.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param gamma non-convexity. Must be larger than 2.
 #' @param non_negative a boolean value. Set to \code{TRUE} to add non-negativity
 #' constraint.
@@ -85,7 +96,11 @@ mcp <- function(gamma = 3, non_negative = FALSE) {
 #' @examples
 #' scad(gamma = 3.7, non_negative = FALSE)
 #' @export
-scad <- function(gamma = 3.7, non_negative = FALSE) {
+scad <- function(..., gamma = 3.7, non_negative = FALSE) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
+
     if (!is_logical_scalar(non_negative)) {
         moma_error(sQuote("non_negative"), " should be a boolean value.")
     }
@@ -128,6 +143,7 @@ slope <- function() {
 #' where \eqn{\lambda} is set by \code{lambda_u/v} in the function \code{moma_svd}, \eqn{\|x_g\|} is
 #' the vector comprised of elements of \eqn{x} picked out by indeces set \eqn{g}.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param g a vector of integer or characters, or a factor itself. It gets transformed
 #' to factor eventually to indicate grouping.
 #' @param non_negative a boolean value. Set to \code{TRUE} to add non-negativity
@@ -140,7 +156,11 @@ slope <- function() {
 #' # This sets every three adjacent parameters as a group.
 #' grplasso(g = rep(1:10, each = 3), non_negative = FALSE)
 #' @export
-grplasso <- function(g, non_negative = FALSE) {
+grplasso <- function(..., g, non_negative = FALSE) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
+
     if (!is_logical_scalar(non_negative)) {
         moma_error(sQuote("non_negative"), " should be a boolean value.")
     }
@@ -157,6 +177,7 @@ grplasso <- function(g, non_negative = FALSE) {
 #' Use this function to set the penalty function as fused lasso
 #' \deqn{\lambda \sum \| x_{i} - x_{i-1} \|,}
 #' where \eqn{\lambda} is set by \code{lambda_u/v} in the function \code{moma_svd}.
+#' @param ... Forces users to specify all arguments by name.
 #' @param algo a string being either "path" or "dp". Defaults to "path". Partial matching
 #' is supported. Two solving algorithms
 #' are provided. When "path" is chosen, the algorithm by
@@ -173,7 +194,10 @@ grplasso <- function(g, non_negative = FALSE) {
 #' @examples
 #' fusedlasso()
 #' @export
-fusedlasso <- function(algo = c("path", "dp")) {
+fusedlasso <- function(..., algo = c("path", "dp")) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
     # fused lasso
 
     # Two options for solving the proximal operator
@@ -203,6 +227,7 @@ fusedlasso <- function(algo = c("path", "dp")) {
 #' Tibshirani, Ryan J. "Adaptive piecewise polynomial estimation via trend
 #' filtering." The Annals of Statistics 42.1 (2014): 285-323.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param l1tf_k use (k+1)-difference matrix in trend filtering. Note \eqn{k = 0}
 #'          implies piecewise constant, \eqn{k=1} implies piecewise linear, \eqn{k=2}
 #'          piecewise quadratic etc.
@@ -212,7 +237,10 @@ fusedlasso <- function(algo = c("path", "dp")) {
 #' @examples
 #' l1tf(l1tf_k = 1)
 #' @export
-l1tf <- function(l1tf_k = 1) {
+l1tf <- function(..., l1tf_k = 1) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
     # l1 linear trend filtering
     arglist <- list(P = "L1TRENDFILTERING", l1tf_k = l1tf_k)
     class(arglist) <- "moma_sparsity"
@@ -226,14 +254,18 @@ l1tf <- function(l1tf_k = 1) {
 #' where \eqn{\lambda_} is set by \code{lambda_u/v} in the function \code{moma_svd}, and \eqn{\lambda_2}
 #' is specified in here.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param lambda2 the level of penalty on the absolute values of the coefficients
 #'
 #' @return a \code{moma_sparsity} object, which is a list containing the value of \code{lambda_2}.
 #'
 #' @examples
 #' spfusedlasso(lambda2 = 2)
 #' @export
-spfusedlasso <- function(lambda2) {
+spfusedlasso <- function(..., lambda2) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
     arglist <- list(P = "SPARSEFUSEDLASSO", lambda2 = lambda2)
     class(arglist) <- "moma_sparsity"
     return(arglist)
@@ -245,6 +277,7 @@ spfusedlasso <- function(lambda2) {
 #' \deqn{\lambda \sum w_{ij} \| x_{i} - x_{j} \|,}
 #' where \eqn{\lambda} is set by \code{lambda_u/v} in the function \code{moma_svd}.
 #'
+#' @param ... Forces users to specify all arguments by name.
 #' @param w a symmetric square matrix. \code{w[i][j]} is the \eqn{w_{ij}} described above.
 #' @param ADMM a boolean value. Set to \code{TRUE} to use ADMM, set to \code{FALSE} to use AMA.
 #' @param acc a boolean value. Set to \code{TRUE} to use the accelereated version of the algorithm.
@@ -257,9 +290,12 @@ spfusedlasso <- function(lambda2) {
 #' @examples
 #' cluster(w = matrix(rep(1, 9), 3), ADMM = FALSE, acc = FALSE, eps = 1e-10)
 #' @export
-cluster <- function(w = NULL, ADMM = FALSE,
+cluster <- function(..., w = NULL, ADMM = FALSE,
                     acc = FALSE,
                     eps = 1e-10) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
     # fused lasso
     if (!is.matrix(w) || is.null(w) || dim(w)[1] != dim(w)[2]) {
         moma_error("`w` should be a square matrix.")
@@ -275,3 +311,19 @@ cluster <- function(w = NULL, ADMM = FALSE,
     class(arglist) <- "moma_sparsity"
     return(arglist)
 }
+
+moma_pg_settings <- function(..., EPS = 1e-10, MAX_ITER = 1000,
+                             EPS_inner = 1e-10, MAX_ITER_inner = 1e+5,
+                             solver = c("ista", "fista", "onestepista")) {
+    if (length(list(...)) != 0) {
+        moma_error("Please specify the correct argument by name.")
+    }
+    solver <- match.arg(solver)
+    arglist <- list(
+        EPS = EPS, MAX_ITER = MAX_ITER,
+        EPS_inner = EPS_inner, MAX_ITER_inner = MAX_ITER_inner,
+        solver = toupper(solver)
+    )
+    class(arglist) <- "moma_pg_settings"
+    return(arglist)
+}
diff --git a/R/moma_svd.R b/R/moma_svd.R
@@ -67,9 +67,7 @@ moma_svd <- function(
                      X,
                      u_sparsity = empty(), v_sparsity = empty(), lambda_u = 0, lambda_v = 0, # lambda_u/_v is a vector or scalar
                      Omega_u = NULL, Omega_v = NULL, alpha_u = 0, alpha_v = 0, # so is alpha_u/_v
-                     EPS = 1e-10, MAX_ITER = 1000,
-                     EPS_inner = 1e-10, MAX_ITER_inner = 1e+5,
-                     solver = "ista",
+                     pg_setting = moma_pg_settings(),
                      k = 1, # number of pairs of singular vecters
                      select = c("gridsearch", "nestedBIC")) {
     if (!inherits(alpha_u, c("numeric", "integer")) ||
@@ -116,12 +114,6 @@ moma_svd <- function(
         # smoothness
         alpha_u = alpha_u,
         alpha_v = alpha_v,
-        # algorithm parameters
-        EPS = EPS,
-        MAX_ITER = MAX_ITER,
-        EPS_inner = EPS_inner,
-        MAX_ITER_inner = MAX_ITER_inner,
-        solver = toupper(solver),
         k = k
     )
 
@@ -157,6 +149,15 @@ moma_svd <- function(
         )
     }
 
+    # PG loop settings
+    if (!inherits(pg_setting, "moma_pg_settings")) {
+        moma_error(
+            "pg_setting penalty should be of class ",
+            sQuote("moma_pg_settings"),
+            ". Try using, for example, `pg_setting = moma_pg_settings(MAX_ITER=1e+4)`."
+        )
+    }
+
     # Pack all argument into a list
     # First we check the smoothness term argument.
     algo_settings_list <- c(
@@ -166,7 +167,8 @@ moma_svd <- function(
             Omega_v = check_omega(Omega_v, alpha_v, p),
             prox_arg_list_u = add_default_prox_args(u_sparsity),
             prox_arg_list_v = add_default_prox_args(v_sparsity)
-        )
+        ),
+        pg_setting
     )
 
     if (is_multiple_para) {

diff --git a/src/moma.cpp b/src/moma.cpp
@@ -60,7 +60,9 @@ MoMA::MoMA(const arma::mat &i_X,  // Pass X_ as a reference to avoid copy
     MoMALogger::info("Initializing MoMA object:")
         << " lambda_u " << lambda_u << " lambda_v " << lambda_v << " alpha_u " << alpha_u
         << " alpha_v " << alpha_v << " P_u " << Rcpp::as<std::string>(i_prox_arg_list_u["P"])
-        << " P_v " << Rcpp::as<std::string>(i_prox_arg_list_v["P"]);
+        << " P_v " << Rcpp::as<std::string>(i_prox_arg_list_v["P"]) << " EPS " << i_EPS
+        << " MAX_ITER " << i_MAX_ITER << " EPS_inner " << i_EPS_inner << " MAX_ITER_inner "
+        << i_MAX_ITER_inner << " solver " << i_solver;
     // Step 2: Initialize to leading singular vectors
     //
     //         MoMA is a regularized SVD, which is a non-convex (bi-convex)

diff --git a/tests/testthat/test_BIC_gird_mixed.R b/tests/testthat/test_BIC_gird_mixed.R
@@ -14,7 +14,7 @@ lambda_u <- seq(0.3, 1, length.out = n_lambda_u)
 lambda_v <- seq(0.3, 1, length.out = n_lambda_v)
 
 
-arg_list <- list(
+public_arg_list <- list(
     X,
     alpha_u = alpha_u, alpha_v = alpha_v,
     Omega_u = second_diff_mat(3), Omega_v = second_diff_mat(4),
@@ -28,7 +28,7 @@ test_that("BIC search returns correct-sized grid: four grid requests", {
     result <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 0, # grid
                 selection_criterion_alpha_v = 0, # grid
@@ -58,7 +58,7 @@ test_that("BIC search returns correct-sized grid: three grid requests", {
     result2 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 0, # grid
                 selection_criterion_alpha_v = 0, # grid
@@ -83,7 +83,7 @@ test_that("BIC search returns correct-sized grid: three grid requests", {
     result2 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 1,
                 selection_criterion_alpha_v = 0, # grid
@@ -110,7 +110,7 @@ test_that("BIC search returns correct-sized grid: two grid requests on u", {
     result3 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 0, # grid
                 selection_criterion_lambda_u = 0, # grid
@@ -137,7 +137,7 @@ test_that("BIC search returns correct-sized grid: two grid requests on different
     result4 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 1,
                 selection_criterion_lambda_u = 0, # grid
@@ -164,7 +164,7 @@ test_that("BIC search returns correct-sized grid: one grid", {
     result4 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 1,
                 selection_criterion_lambda_u = 1,
@@ -191,7 +191,7 @@ test_that("BIC search returns correct-sized grid: all BIC search", {
     result4 <- do.call(
         testnestedBIC,
         c(
-            arg_list,
+            public_arg_list,
             list(
                 selection_criterion_alpha_u = 1,
                 selection_criterion_lambda_u = 1,
@@ -214,7 +214,7 @@ test_that("BIC search returns correct-sized grid: all BIC search", {
 
 test_that("testnestedBIC receivs a vector of length 0", {
     arglist <- c(
-        arg_list,
+        public_arg_list,
         list(
             selection_criterion_alpha_u = 1,
             selection_criterion_lambda_u = 1,