From 293dc5e392580cf6c438ad0513a00fda1d1feca0 Mon Sep 17 00:00:00 2001
From: Will Beasley <wibeasley@hotmail.com>
Date: Tue, 30 Aug 2022 17:08:05 -0500
Subject: [PATCH] return tibble instead of data.frame

close #415
---
 R/metadata-utilities.R                   | 19 +++++++++--------
 R/redcap-metadata-read.R                 | 19 ++++-------------
 R/redcap-read-oneshot-eav.R              |  6 +++---
 man/metadata_utilities.Rd                | 12 +++++++----
 man/redcap_metadata_read.Rd              | 15 ++------------
 man/redcap_read_oneshot_eav.Rd           |  2 +-
 tests/testthat/test-metadata-read.R      |  5 +++++
 tests/testthat/test-metadata-utilities.R | 26 ++++++++++++++++--------
 tests/testthat/test-read-oneshot-eav.R   | 11 ++++++++++
 9 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/R/metadata-utilities.R b/R/metadata-utilities.R
index 09a87cc5..6fcc4073 100644
--- a/R/metadata-utilities.R
+++ b/R/metadata-utilities.R
@@ -13,7 +13,7 @@
 #' @param perl Indicates if perl-compatible regexps should be used.
 #'   Default is `TRUE`. Optional.
 #'
-#' @return Currently, a [base::data.frame()] is returned a row for each match,
+#' @return Currently, a [tibble::tibble()] is returned a row for each match,
 #' and a column for each *named* group within a match.  For the
 #' `retrieve_checkbox_choices()` function, the columns will be.
 #' * `id`: The numeric value assigned to each choice (in the data dictionary).
@@ -22,13 +22,17 @@
 #' @details
 #' The [regex_named_captures()] function is general, and not specific to
 #' REDCap; it accepts any arbitrary regular expression.
-#' It returns a [base::data.frame()] with as many columns as named matches.
+#' It returns a [tibble::tibble()] with as many columns as named matches.
 #'
 #' The [checkbox_choices()] function is specialized, and accommodates the
 #' "select choices" for a *single* REDCap checkbox group (where multiple boxes
-#' can be selected).  It returns a [base::data.frame()] with two columns, one
+#' can be selected).  It returns a [tibble::tibble()] with two columns, one
 #' for the numeric id and one for the text label.
 #'
+#' The parse will probably fail if a label contains a pipe (*i.e.*, `|`),
+#' since that the delimiter REDCap uses to separate choices
+#' presented to the user.
+#'
 #' @author Will Beasley
 #' @references See the official documentation for permissible characters in a
 #' checkbox label.
@@ -59,7 +63,7 @@
 #' token       <- "9A81268476645C4E5F03428B8AC3AA7B"
 #'
 #' ds_metadata <- redcap_metadata_read(redcap_uri=uri, token=token)$data
-#' choices_2   <- ds_metadata[ds_metadata$field_name=="race", "select_choices_or_calculations"]
+#' choices_2   <- ds_metadata[ds_metadata$field_name=="race", ]$select_choices_or_calculations
 #'
 #' REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_2)
 #' }
@@ -70,15 +74,14 @@
 #' REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_3)
 
 #' @export
-regex_named_captures <- function(pattern, text, perl=TRUE) {
-
+regex_named_captures <- function(pattern, text, perl = TRUE) {
   checkmate::assert_character(pattern, any.missing=FALSE, min.chars=0L, len=1)
   checkmate::assert_character(text   , any.missing=FALSE, min.chars=0L, len=1)
   checkmate::assert_logical(  perl   , any.missing=FALSE)
 
   match <- gregexpr(pattern, text, perl = perl)[[1]]
   capture_names <- attr(match, "capture.names")
-  d <- as.data.frame(matrix(
+  d <- base::data.frame(matrix(
     data  = NA_character_,
     nrow  = length(attr(match, "match.length")),
     ncol  = length(capture_names)
@@ -95,7 +98,7 @@ regex_named_captures <- function(pattern, text, perl=TRUE) {
       attr(match, "capture.length")[, column_name]
     )
   }
-  d
+  tibble::as_tibble(d)
 }
 
 #' @rdname metadata_utilities
diff --git a/R/redcap-metadata-read.R b/R/redcap-metadata-read.R
index 4b8aaa71..29006ef2 100644
--- a/R/redcap-metadata-read.R
+++ b/R/redcap-metadata-read.R
@@ -1,7 +1,7 @@
 #' @title Export the metadata of a REDCap project
 #'
 #' @description Export the metadata (as a data dictionary) of a REDCap project
-#' as a [base::data.frame()]. Each row in the data dictionary corresponds to
+#' as a [tibble::tibble()]. Each row in the data dictionary corresponds to
 #' one field in the project's dataset.
 #'
 #' @param redcap_uri The
@@ -28,7 +28,7 @@
 #'
 #' @return Currently, a list is returned with the following elements:
 #'
-#' * `data`: An R [base::data.frame()] of the desired records and columns.
+#' * `data`: An R [tibble::tibble()] of the desired fields (as rows).
 #' * `success`: A boolean value indicating if the operation was apparently
 #' successful.
 #' * `status_codes`: A collection of
@@ -43,17 +43,6 @@
 #' string, separated by commas.
 #' * `elapsed_seconds`: The duration of the function.
 #'
-#' @details
-#' Specifically, it internally uses multiple calls to [redcap_read_oneshot()]
-#' to select and return data.  Initially, only primary key is queried through
-#' the REDCap API.  The long list is then subsetted into partitions, whose
-#' sizes are determined by the `batch_size` parameter.  REDCap is then queried
-#' for all variables of the subset's subjects.  This is repeated for each
-#' subset, before returning a unified [base::data.frame()].
-#'
-#' The function allows a delay between calls, which allows the server to
-#' attend to other users' requests.
-#'
 #' @author Will Beasley
 #'
 #' @references The official documentation can be found on the 'API Help Page'
@@ -142,7 +131,7 @@ redcap_metadata_read <- function(
       # Override the 'success' determination from the http status code
       #   and return an empty data.frame.
       kernel$success    <- FALSE
-      ds                <- data.frame()
+      ds                <- tibble::tibble()
       outcome_message   <- sprintf(
         "The REDCap metadata export failed.  The http status code was %i.  The 'raw_text' returned was '%s'.",
         kernel$status_code,
@@ -150,7 +139,7 @@ redcap_metadata_read <- function(
       )
     }       # nocov end
   } else {
-    ds                  <- data.frame() #Return an empty data.frame
+    ds                  <- tibble::tibble() # Return an empty data.frame
     outcome_message     <- sprintf(
       "The REDCapR metadata export operation was not successful.  The error message was:\n%s",
       kernel$raw_text
diff --git a/R/redcap-read-oneshot-eav.R b/R/redcap-read-oneshot-eav.R
index 66550845..1282d701 100644
--- a/R/redcap-read-oneshot-eav.R
+++ b/R/redcap-read-oneshot-eav.R
@@ -70,7 +70,7 @@
 #' `httr` package.  See the details below. Optional.
 #'
 #' @return Currently, a list is returned with the following elements:
-#' * `data`: An R [base::data.frame()] of the desired records and columns.
+#' * `data`: A [tibble::tibble()] of the desired records and columns.
 #' * `success`: A boolean value indicating if the operation was apparently
 #' successful.
 #' * `status_code`: The
@@ -351,7 +351,7 @@ redcap_read_oneshot_eav <- function(
     } else {
       # nocov start
       kernel$success    <- FALSE #Override the 'success' determination from the http status code.
-      ds_2              <- tibble::tibble() #Return an empty data.frame
+      ds_2              <- tibble::tibble() # Return an empty data.frame
       outcome_message   <- sprintf(
         "The REDCap read failed.  The http status code was %s.  The 'raw_text' returned was '%s'.",
         kernel$status_code,
@@ -361,7 +361,7 @@ redcap_read_oneshot_eav <- function(
     }
   } else {
     # nocov start
-    ds_2            <- tibble::tibble() #Return an empty data.frame
+    ds_2            <- tibble::tibble() # Return an empty data.frame
     outcome_message <- if (any(grepl(kernel$regex_empty, kernel$raw_text))) {
       "The REDCapR read/export operation was not successful.  The returned dataset was empty."
     } else {
diff --git a/man/metadata_utilities.Rd b/man/metadata_utilities.Rd
index 54a67f64..9aad48b6 100644
--- a/man/metadata_utilities.Rd
+++ b/man/metadata_utilities.Rd
@@ -22,7 +22,7 @@ Default is \code{TRUE}. Optional.}
 to determine the \code{id} and \code{label} values.  Required.}
 }
 \value{
-Currently, a \code{\link[base:data.frame]{base::data.frame()}} is returned a row for each match,
+Currently, a \code{\link[tibble:tibble]{tibble::tibble()}} is returned a row for each match,
 and a column for each \emph{named} group within a match.  For the
 \code{retrieve_checkbox_choices()} function, the columns will be.
 \itemize{
@@ -37,12 +37,16 @@ project metadata.
 \details{
 The \code{\link[=regex_named_captures]{regex_named_captures()}} function is general, and not specific to
 REDCap; it accepts any arbitrary regular expression.
-It returns a \code{\link[base:data.frame]{base::data.frame()}} with as many columns as named matches.
+It returns a \code{\link[tibble:tibble]{tibble::tibble()}} with as many columns as named matches.
 
 The \code{\link[=checkbox_choices]{checkbox_choices()}} function is specialized, and accommodates the
 "select choices" for a \emph{single} REDCap checkbox group (where multiple boxes
-can be selected).  It returns a \code{\link[base:data.frame]{base::data.frame()}} with two columns, one
+can be selected).  It returns a \code{\link[tibble:tibble]{tibble::tibble()}} with two columns, one
 for the numeric id and one for the text label.
+
+The parse will probably fail if a label contains a pipe (\emph{i.e.}, \code{|}),
+since that the delimiter REDCap uses to separate choices
+presented to the user.
 }
 \examples{
 # The weird ranges are to avoid the pipe character;
@@ -68,7 +72,7 @@ uri         <- "https://bbmc.ouhsc.edu/redcap/api/"
 token       <- "9A81268476645C4E5F03428B8AC3AA7B"
 
 ds_metadata <- redcap_metadata_read(redcap_uri=uri, token=token)$data
-choices_2   <- ds_metadata[ds_metadata$field_name=="race", "select_choices_or_calculations"]
+choices_2   <- ds_metadata[ds_metadata$field_name=="race", ]$select_choices_or_calculations
 
 REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_2)
 }
diff --git a/man/redcap_metadata_read.Rd b/man/redcap_metadata_read.Rd
index dfe70ea0..31f9eb22 100644
--- a/man/redcap_metadata_read.Rd
+++ b/man/redcap_metadata_read.Rd
@@ -48,7 +48,7 @@ be visible somewhere public. Optional.}
 \value{
 Currently, a list is returned with the following elements:
 \itemize{
-\item \code{data}: An R \code{\link[base:data.frame]{base::data.frame()}} of the desired records and columns.
+\item \code{data}: An R \code{\link[tibble:tibble]{tibble::tibble()}} of the desired fields (as rows).
 \item \code{success}: A boolean value indicating if the operation was apparently
 successful.
 \item \code{status_codes}: A collection of
@@ -66,20 +66,9 @@ string, separated by commas.
 }
 \description{
 Export the metadata (as a data dictionary) of a REDCap project
-as a \code{\link[base:data.frame]{base::data.frame()}}. Each row in the data dictionary corresponds to
+as a \code{\link[tibble:tibble]{tibble::tibble()}}. Each row in the data dictionary corresponds to
 one field in the project's dataset.
 }
-\details{
-Specifically, it internally uses multiple calls to \code{\link[=redcap_read_oneshot]{redcap_read_oneshot()}}
-to select and return data.  Initially, only primary key is queried through
-the REDCap API.  The long list is then subsetted into partitions, whose
-sizes are determined by the \code{batch_size} parameter.  REDCap is then queried
-for all variables of the subset's subjects.  This is repeated for each
-subset, before returning a unified \code{\link[base:data.frame]{base::data.frame()}}.
-
-The function allows a delay between calls, which allows the server to
-attend to other users' requests.
-}
 \examples{
 \dontrun{
 uri   <- "https://bbmc.ouhsc.edu/redcap/api/"
diff --git a/man/redcap_read_oneshot_eav.Rd b/man/redcap_read_oneshot_eav.Rd
index 5cfdfc64..3d681586 100644
--- a/man/redcap_read_oneshot_eav.Rd
+++ b/man/redcap_read_oneshot_eav.Rd
@@ -110,7 +110,7 @@ be visible somewhere public. Optional.}
 \value{
 Currently, a list is returned with the following elements:
 \itemize{
-\item \code{data}: An R \code{\link[base:data.frame]{base::data.frame()}} of the desired records and columns.
+\item \code{data}: A \code{\link[tibble:tibble]{tibble::tibble()}} of the desired records and columns.
 \item \code{success}: A boolean value indicating if the operation was apparently
 successful.
 \item \code{status_code}: The
diff --git a/tests/testthat/test-metadata-read.R b/tests/testthat/test-metadata-read.R
index 1408fabe..32a77786 100644
--- a/tests/testthat/test-metadata-read.R
+++ b/tests/testthat/test-metadata-read.R
@@ -30,6 +30,7 @@ test_that("Super-wide", {
   expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data)
   expect_equal(ncol(returned_object$data), expected=expected_column_count)
   expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 test_that("Super-wide 2", {
@@ -47,6 +48,7 @@ test_that("Super-wide 2", {
   expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data)
   expect_equal(ncol(returned_object$data), expected=expected_column_count)
   expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 test_that("Super-wide 3", {
@@ -64,6 +66,7 @@ test_that("Super-wide 3", {
   expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data)
   expect_equal(ncol(returned_object$data), expected=expected_column_count)
   expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 test_that("Problematic Dictionary", {
@@ -81,6 +84,7 @@ test_that("Problematic Dictionary", {
   expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data)
   expect_equal(ncol(returned_object$data), expected=expected_column_count)
   expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 test_that("normal", {
@@ -104,6 +108,7 @@ test_that("normal", {
   expect_true(returned_object$fields_collapsed=="", "A subset of fields was not requested.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 rm(credential           )
diff --git a/tests/testthat/test-metadata-utilities.R b/tests/testthat/test-metadata-utilities.R
index 39c96042..705643a4 100644
--- a/tests/testthat/test-metadata-utilities.R
+++ b/tests/testthat/test-metadata-utilities.R
@@ -6,12 +6,17 @@ test_that("Named Captures", {
   choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
   ds_boxes <- regex_named_captures(pattern=pattern_checkboxes, text=choices_1)
 
-  ds_expected <- structure(list(id = c("1", "2", "3", "4", "5", "6"), label = c("American Indian/Alaska Native",
-    "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American",
-    "White", "Unknown / Not Reported")), .Names = c("id", "label"
-    ), row.names = c(NA, -6L), class = "data.frame")
+  ds_expected <- structure(
+    list(
+      id = c("1", "2", "3", "4", "5", "6"),
+      label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
+    ),
+    class = c("tbl_df", "tbl", "data.frame"),
+    row.names = c(NA, -6L)
+  )
 
   expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
+  expect_s3_class(ds_boxes, "tbl")
 })
 
 test_that("checkbox choices", {
@@ -20,10 +25,15 @@ test_that("checkbox choices", {
   choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
   ds_boxes <- checkbox_choices(select_choices=choices_1)
 
-  ds_expected <- structure(list(id = c("1", "2", "3", "4", "5", "6"), label = c("American Indian/Alaska Native",
-    "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American",
-    "White", "Unknown / Not Reported")), .Names = c("id", "label"
-    ), row.names = c(NA, -6L), class = "data.frame")
+  ds_expected <- structure(
+    list(
+      id = c("1", "2", "3", "4", "5", "6"),
+      label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
+    ),
+    class = c("tbl_df", "tbl", "data.frame"),
+    row.names = c(NA, -6L)
+  )
 
   expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
+  expect_s3_class(ds_boxes, "tbl")
 })
diff --git a/tests/testthat/test-read-oneshot-eav.R b/tests/testthat/test-read-oneshot-eav.R
index dba8fd31..2f7caf7d 100644
--- a/tests/testthat/test-read-oneshot-eav.R
+++ b/tests/testthat/test-read-oneshot-eav.R
@@ -33,6 +33,7 @@ test_that("default", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("specify-forms", {
   testthat::skip_on_cran()
@@ -56,6 +57,7 @@ test_that("specify-forms", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("raw", {
   testthat::skip_on_cran()
@@ -78,6 +80,7 @@ test_that("raw", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("raw-and-dag", {
   testthat::skip_on_cran()
@@ -100,6 +103,7 @@ test_that("raw-and-dag", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("label-and-dag", {
   testthat::skip_on_cran()
@@ -122,6 +126,7 @@ test_that("label-and-dag", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("label-header", {
   testthat::skip_on_cran()
@@ -145,6 +150,7 @@ test_that("label-header", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("filter-numeric", {
   testthat::skip_on_cran()
@@ -168,6 +174,7 @@ test_that("filter-numeric", {
   expect_equal(returned_object$filter_logic, filter)
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("filter-character", {
   testthat::skip_on_cran()
@@ -194,6 +201,7 @@ test_that("filter-character", {
   expect_equal(returned_object$filter_logic, filter)
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("blank-for-gray-status-true", {
   testthat::skip_on_cran()
@@ -221,6 +229,7 @@ test_that("blank-for-gray-status-true", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("blank-for-gray-status-false", {
   testthat::skip_on_cran()
@@ -248,6 +257,7 @@ test_that("blank-for-gray-status-false", {
   expect_true(returned_object$filter_logic=="", "A filter was not specified.")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 
 test_that("date-range", {
@@ -279,6 +289,7 @@ test_that("date-range", {
   expect_equal(returned_object$filter_logic, "")
   expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
   expect_true(returned_object$success)
+  expect_s3_class(returned_object$data, "tbl")
 })
 test_that("bad token -Error", {
   testthat::skip_on_cran()