From 293dc5e392580cf6c438ad0513a00fda1d1feca0 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Tue, 30 Aug 2022 17:08:05 -0500 Subject: [PATCH] return tibble instead of data.frame close #415 --- R/metadata-utilities.R | 19 +++++++++-------- R/redcap-metadata-read.R | 19 ++++------------- R/redcap-read-oneshot-eav.R | 6 +++--- man/metadata_utilities.Rd | 12 +++++++---- man/redcap_metadata_read.Rd | 15 ++------------ man/redcap_read_oneshot_eav.Rd | 2 +- tests/testthat/test-metadata-read.R | 5 +++++ tests/testthat/test-metadata-utilities.R | 26 ++++++++++++++++-------- tests/testthat/test-read-oneshot-eav.R | 11 ++++++++++ 9 files changed, 63 insertions(+), 52 deletions(-) diff --git a/R/metadata-utilities.R b/R/metadata-utilities.R index 09a87cc5..6fcc4073 100644 --- a/R/metadata-utilities.R +++ b/R/metadata-utilities.R @@ -13,7 +13,7 @@ #' @param perl Indicates if perl-compatible regexps should be used. #' Default is `TRUE`. Optional. #' -#' @return Currently, a [base::data.frame()] is returned a row for each match, +#' @return Currently, a [tibble::tibble()] is returned a row for each match, #' and a column for each *named* group within a match. For the #' `retrieve_checkbox_choices()` function, the columns will be. #' * `id`: The numeric value assigned to each choice (in the data dictionary). @@ -22,13 +22,17 @@ #' @details #' The [regex_named_captures()] function is general, and not specific to #' REDCap; it accepts any arbitrary regular expression. -#' It returns a [base::data.frame()] with as many columns as named matches. +#' It returns a [tibble::tibble()] with as many columns as named matches. #' #' The [checkbox_choices()] function is specialized, and accommodates the #' "select choices" for a *single* REDCap checkbox group (where multiple boxes -#' can be selected). It returns a [base::data.frame()] with two columns, one +#' can be selected). It returns a [tibble::tibble()] with two columns, one #' for the numeric id and one for the text label. #' +#' The parse will probably fail if a label contains a pipe (*i.e.*, `|`), +#' since that the delimiter REDCap uses to separate choices +#' presented to the user. +#' #' @author Will Beasley #' @references See the official documentation for permissible characters in a #' checkbox label. @@ -59,7 +63,7 @@ #' token <- "9A81268476645C4E5F03428B8AC3AA7B" #' #' ds_metadata <- redcap_metadata_read(redcap_uri=uri, token=token)$data -#' choices_2 <- ds_metadata[ds_metadata$field_name=="race", "select_choices_or_calculations"] +#' choices_2 <- ds_metadata[ds_metadata$field_name=="race", ]$select_choices_or_calculations #' #' REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_2) #' } @@ -70,15 +74,14 @@ #' REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_3) #' @export -regex_named_captures <- function(pattern, text, perl=TRUE) { - +regex_named_captures <- function(pattern, text, perl = TRUE) { checkmate::assert_character(pattern, any.missing=FALSE, min.chars=0L, len=1) checkmate::assert_character(text , any.missing=FALSE, min.chars=0L, len=1) checkmate::assert_logical( perl , any.missing=FALSE) match <- gregexpr(pattern, text, perl = perl)[[1]] capture_names <- attr(match, "capture.names") - d <- as.data.frame(matrix( + d <- base::data.frame(matrix( data = NA_character_, nrow = length(attr(match, "match.length")), ncol = length(capture_names) @@ -95,7 +98,7 @@ regex_named_captures <- function(pattern, text, perl=TRUE) { attr(match, "capture.length")[, column_name] ) } - d + tibble::as_tibble(d) } #' @rdname metadata_utilities diff --git a/R/redcap-metadata-read.R b/R/redcap-metadata-read.R index 4b8aaa71..29006ef2 100644 --- a/R/redcap-metadata-read.R +++ b/R/redcap-metadata-read.R @@ -1,7 +1,7 @@ #' @title Export the metadata of a REDCap project #' #' @description Export the metadata (as a data dictionary) of a REDCap project -#' as a [base::data.frame()]. Each row in the data dictionary corresponds to +#' as a [tibble::tibble()]. Each row in the data dictionary corresponds to #' one field in the project's dataset. #' #' @param redcap_uri The @@ -28,7 +28,7 @@ #' #' @return Currently, a list is returned with the following elements: #' -#' * `data`: An R [base::data.frame()] of the desired records and columns. +#' * `data`: An R [tibble::tibble()] of the desired fields (as rows). #' * `success`: A boolean value indicating if the operation was apparently #' successful. #' * `status_codes`: A collection of @@ -43,17 +43,6 @@ #' string, separated by commas. #' * `elapsed_seconds`: The duration of the function. #' -#' @details -#' Specifically, it internally uses multiple calls to [redcap_read_oneshot()] -#' to select and return data. Initially, only primary key is queried through -#' the REDCap API. The long list is then subsetted into partitions, whose -#' sizes are determined by the `batch_size` parameter. REDCap is then queried -#' for all variables of the subset's subjects. This is repeated for each -#' subset, before returning a unified [base::data.frame()]. -#' -#' The function allows a delay between calls, which allows the server to -#' attend to other users' requests. -#' #' @author Will Beasley #' #' @references The official documentation can be found on the 'API Help Page' @@ -142,7 +131,7 @@ redcap_metadata_read <- function( # Override the 'success' determination from the http status code # and return an empty data.frame. kernel$success <- FALSE - ds <- data.frame() + ds <- tibble::tibble() outcome_message <- sprintf( "The REDCap metadata export failed. The http status code was %i. The 'raw_text' returned was '%s'.", kernel$status_code, @@ -150,7 +139,7 @@ redcap_metadata_read <- function( ) } # nocov end } else { - ds <- data.frame() #Return an empty data.frame + ds <- tibble::tibble() # Return an empty data.frame outcome_message <- sprintf( "The REDCapR metadata export operation was not successful. The error message was:\n%s", kernel$raw_text diff --git a/R/redcap-read-oneshot-eav.R b/R/redcap-read-oneshot-eav.R index 66550845..1282d701 100644 --- a/R/redcap-read-oneshot-eav.R +++ b/R/redcap-read-oneshot-eav.R @@ -70,7 +70,7 @@ #' `httr` package. See the details below. Optional. #' #' @return Currently, a list is returned with the following elements: -#' * `data`: An R [base::data.frame()] of the desired records and columns. +#' * `data`: A [tibble::tibble()] of the desired records and columns. #' * `success`: A boolean value indicating if the operation was apparently #' successful. #' * `status_code`: The @@ -351,7 +351,7 @@ redcap_read_oneshot_eav <- function( } else { # nocov start kernel$success <- FALSE #Override the 'success' determination from the http status code. - ds_2 <- tibble::tibble() #Return an empty data.frame + ds_2 <- tibble::tibble() # Return an empty data.frame outcome_message <- sprintf( "The REDCap read failed. The http status code was %s. The 'raw_text' returned was '%s'.", kernel$status_code, @@ -361,7 +361,7 @@ redcap_read_oneshot_eav <- function( } } else { # nocov start - ds_2 <- tibble::tibble() #Return an empty data.frame + ds_2 <- tibble::tibble() # Return an empty data.frame outcome_message <- if (any(grepl(kernel$regex_empty, kernel$raw_text))) { "The REDCapR read/export operation was not successful. The returned dataset was empty." } else { diff --git a/man/metadata_utilities.Rd b/man/metadata_utilities.Rd index 54a67f64..9aad48b6 100644 --- a/man/metadata_utilities.Rd +++ b/man/metadata_utilities.Rd @@ -22,7 +22,7 @@ Default is \code{TRUE}. Optional.} to determine the \code{id} and \code{label} values. Required.} } \value{ -Currently, a \code{\link[base:data.frame]{base::data.frame()}} is returned a row for each match, +Currently, a \code{\link[tibble:tibble]{tibble::tibble()}} is returned a row for each match, and a column for each \emph{named} group within a match. For the \code{retrieve_checkbox_choices()} function, the columns will be. \itemize{ @@ -37,12 +37,16 @@ project metadata. \details{ The \code{\link[=regex_named_captures]{regex_named_captures()}} function is general, and not specific to REDCap; it accepts any arbitrary regular expression. -It returns a \code{\link[base:data.frame]{base::data.frame()}} with as many columns as named matches. +It returns a \code{\link[tibble:tibble]{tibble::tibble()}} with as many columns as named matches. The \code{\link[=checkbox_choices]{checkbox_choices()}} function is specialized, and accommodates the "select choices" for a \emph{single} REDCap checkbox group (where multiple boxes -can be selected). It returns a \code{\link[base:data.frame]{base::data.frame()}} with two columns, one +can be selected). It returns a \code{\link[tibble:tibble]{tibble::tibble()}} with two columns, one for the numeric id and one for the text label. + +The parse will probably fail if a label contains a pipe (\emph{i.e.}, \code{|}), +since that the delimiter REDCap uses to separate choices +presented to the user. } \examples{ # The weird ranges are to avoid the pipe character; @@ -68,7 +72,7 @@ uri <- "https://bbmc.ouhsc.edu/redcap/api/" token <- "9A81268476645C4E5F03428B8AC3AA7B" ds_metadata <- redcap_metadata_read(redcap_uri=uri, token=token)$data -choices_2 <- ds_metadata[ds_metadata$field_name=="race", "select_choices_or_calculations"] +choices_2 <- ds_metadata[ds_metadata$field_name=="race", ]$select_choices_or_calculations REDCapR::regex_named_captures(pattern=pattern_boxes, text=choices_2) } diff --git a/man/redcap_metadata_read.Rd b/man/redcap_metadata_read.Rd index dfe70ea0..31f9eb22 100644 --- a/man/redcap_metadata_read.Rd +++ b/man/redcap_metadata_read.Rd @@ -48,7 +48,7 @@ be visible somewhere public. Optional.} \value{ Currently, a list is returned with the following elements: \itemize{ -\item \code{data}: An R \code{\link[base:data.frame]{base::data.frame()}} of the desired records and columns. +\item \code{data}: An R \code{\link[tibble:tibble]{tibble::tibble()}} of the desired fields (as rows). \item \code{success}: A boolean value indicating if the operation was apparently successful. \item \code{status_codes}: A collection of @@ -66,20 +66,9 @@ string, separated by commas. } \description{ Export the metadata (as a data dictionary) of a REDCap project -as a \code{\link[base:data.frame]{base::data.frame()}}. Each row in the data dictionary corresponds to +as a \code{\link[tibble:tibble]{tibble::tibble()}}. Each row in the data dictionary corresponds to one field in the project's dataset. } -\details{ -Specifically, it internally uses multiple calls to \code{\link[=redcap_read_oneshot]{redcap_read_oneshot()}} -to select and return data. Initially, only primary key is queried through -the REDCap API. The long list is then subsetted into partitions, whose -sizes are determined by the \code{batch_size} parameter. REDCap is then queried -for all variables of the subset's subjects. This is repeated for each -subset, before returning a unified \code{\link[base:data.frame]{base::data.frame()}}. - -The function allows a delay between calls, which allows the server to -attend to other users' requests. -} \examples{ \dontrun{ uri <- "https://bbmc.ouhsc.edu/redcap/api/" diff --git a/man/redcap_read_oneshot_eav.Rd b/man/redcap_read_oneshot_eav.Rd index 5cfdfc64..3d681586 100644 --- a/man/redcap_read_oneshot_eav.Rd +++ b/man/redcap_read_oneshot_eav.Rd @@ -110,7 +110,7 @@ be visible somewhere public. Optional.} \value{ Currently, a list is returned with the following elements: \itemize{ -\item \code{data}: An R \code{\link[base:data.frame]{base::data.frame()}} of the desired records and columns. +\item \code{data}: A \code{\link[tibble:tibble]{tibble::tibble()}} of the desired records and columns. \item \code{success}: A boolean value indicating if the operation was apparently successful. \item \code{status_code}: The diff --git a/tests/testthat/test-metadata-read.R b/tests/testthat/test-metadata-read.R index 1408fabe..32a77786 100644 --- a/tests/testthat/test-metadata-read.R +++ b/tests/testthat/test-metadata-read.R @@ -30,6 +30,7 @@ test_that("Super-wide", { expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data) expect_equal(ncol(returned_object$data), expected=expected_column_count) expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells) + expect_s3_class(returned_object$data, "tbl") }) test_that("Super-wide 2", { @@ -47,6 +48,7 @@ test_that("Super-wide 2", { expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data) expect_equal(ncol(returned_object$data), expected=expected_column_count) expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells) + expect_s3_class(returned_object$data, "tbl") }) test_that("Super-wide 3", { @@ -64,6 +66,7 @@ test_that("Super-wide 3", { expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data) expect_equal(ncol(returned_object$data), expected=expected_column_count) expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells) + expect_s3_class(returned_object$data, "tbl") }) test_that("Problematic Dictionary", { @@ -81,6 +84,7 @@ test_that("Problematic Dictionary", { expect_equal(nrow(returned_object$data), expected=expected_row_count) # dput(returned_object$data) expect_equal(ncol(returned_object$data), expected=expected_column_count) expect_equal(sum(is.na(returned_object$data)), expected=expected_na_cells) + expect_s3_class(returned_object$data, "tbl") }) test_that("normal", { @@ -104,6 +108,7 @@ test_that("normal", { expect_true(returned_object$fields_collapsed=="", "A subset of fields was not requested.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) rm(credential ) diff --git a/tests/testthat/test-metadata-utilities.R b/tests/testthat/test-metadata-utilities.R index 39c96042..705643a4 100644 --- a/tests/testthat/test-metadata-utilities.R +++ b/tests/testthat/test-metadata-utilities.R @@ -6,12 +6,17 @@ test_that("Named Captures", { choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported" ds_boxes <- regex_named_captures(pattern=pattern_checkboxes, text=choices_1) - ds_expected <- structure(list(id = c("1", "2", "3", "4", "5", "6"), label = c("American Indian/Alaska Native", - "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", - "White", "Unknown / Not Reported")), .Names = c("id", "label" - ), row.names = c(NA, -6L), class = "data.frame") + ds_expected <- structure( + list( + id = c("1", "2", "3", "4", "5", "6"), + label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported") + ), + class = c("tbl_df", "tbl", "data.frame"), + row.names = c(NA, -6L) + ) expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes) + expect_s3_class(ds_boxes, "tbl") }) test_that("checkbox choices", { @@ -20,10 +25,15 @@ test_that("checkbox choices", { choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported" ds_boxes <- checkbox_choices(select_choices=choices_1) - ds_expected <- structure(list(id = c("1", "2", "3", "4", "5", "6"), label = c("American Indian/Alaska Native", - "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", - "White", "Unknown / Not Reported")), .Names = c("id", "label" - ), row.names = c(NA, -6L), class = "data.frame") + ds_expected <- structure( + list( + id = c("1", "2", "3", "4", "5", "6"), + label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported") + ), + class = c("tbl_df", "tbl", "data.frame"), + row.names = c(NA, -6L) + ) expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes) + expect_s3_class(ds_boxes, "tbl") }) diff --git a/tests/testthat/test-read-oneshot-eav.R b/tests/testthat/test-read-oneshot-eav.R index dba8fd31..2f7caf7d 100644 --- a/tests/testthat/test-read-oneshot-eav.R +++ b/tests/testthat/test-read-oneshot-eav.R @@ -33,6 +33,7 @@ test_that("default", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("specify-forms", { testthat::skip_on_cran() @@ -56,6 +57,7 @@ test_that("specify-forms", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("raw", { testthat::skip_on_cran() @@ -78,6 +80,7 @@ test_that("raw", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("raw-and-dag", { testthat::skip_on_cran() @@ -100,6 +103,7 @@ test_that("raw-and-dag", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("label-and-dag", { testthat::skip_on_cran() @@ -122,6 +126,7 @@ test_that("label-and-dag", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("label-header", { testthat::skip_on_cran() @@ -145,6 +150,7 @@ test_that("label-header", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("filter-numeric", { testthat::skip_on_cran() @@ -168,6 +174,7 @@ test_that("filter-numeric", { expect_equal(returned_object$filter_logic, filter) expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("filter-character", { testthat::skip_on_cran() @@ -194,6 +201,7 @@ test_that("filter-character", { expect_equal(returned_object$filter_logic, filter) expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("blank-for-gray-status-true", { testthat::skip_on_cran() @@ -221,6 +229,7 @@ test_that("blank-for-gray-status-true", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("blank-for-gray-status-false", { testthat::skip_on_cran() @@ -248,6 +257,7 @@ test_that("blank-for-gray-status-false", { expect_true(returned_object$filter_logic=="", "A filter was not specified.") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("date-range", { @@ -279,6 +289,7 @@ test_that("date-range", { expect_equal(returned_object$filter_logic, "") expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE) expect_true(returned_object$success) + expect_s3_class(returned_object$data, "tbl") }) test_that("bad token -Error", { testthat::skip_on_cran()