From 6173922adcf1a84bb434a71e8f55da3aef7aa5be Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sun, 23 Jul 2023 12:24:06 -0500 Subject: [PATCH 01/10] tasks --- .Rbuildignore | 1 + .vscode/tasks.json | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 .vscode/tasks.json diff --git a/.Rbuildignore b/.Rbuildignore index be3ce21..c9a3bf3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,6 +16,7 @@ ^\.gitignore$ ^\.travis\.yml$ ^\.lintr$ +^\.vscode$ ^appveyor\.yml$ ^CONDUCT.md$ ^CODE_OF_CONDUCT.md$ diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..a7d587d --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,43 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "R", + "code": [ + "devtools::build()" + ], + "group": "build", + "problemMatcher": [], + "label": "R: Build" + }, + { + "type": "R", + "code": [ + "devtools::test()" + ], + "problemMatcher": [ + "$testthat" + ], + "group": "test", + "label": "R: Test" + }, + { + "type": "R", + "code": [ + "devtools::check()" + ], + "group": "test", + "problemMatcher": [], + "label": "R: Check" + }, + { + "type": "R", + "code": [ + "devtools::document()" + ], + "group": "build", + "problemMatcher": [], + "label": "R: Document" + } + ] +} \ No newline at end of file From c8e13e8db8ab9fcfe60099890dc7c324deff5e5b Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sun, 23 Jul 2023 12:24:26 -0500 Subject: [PATCH 02/10] update spelling --- inst/WORDLIST | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index f9a8c71..7cf1526 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,6 +8,7 @@ Dev Funders HRSA MIECHV +NIGMS NSE ODBC OUHSC @@ -16,6 +17,7 @@ PatientDOB RStudio's RedcapExamplesAndPatterns SHA +Translational Visel abc alistaire @@ -24,7 +26,6 @@ camelCase codecov cryptographic csv -dbConnect devtools dplyr dsn @@ -41,7 +42,6 @@ io lettercase libcurl mrn -na nonmissing odbc openssl From d6cc92af3a470181797a8052e9c153cbbb50b6bb Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sun, 23 Jul 2023 12:24:41 -0500 Subject: [PATCH 03/10] tidy readme --- README.md | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 388ee5f..d4d91e3 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,10 @@ -| [GitHub](https://github.com/OuhscBbmc/OuhscMunge) | [Travis-CI](https://app.travis-ci.com/OuhscBbmc/OuhscMunge/builds) | [CodeCov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/) | -| :----- | :---------------------------: | :-------: | -| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge) | -| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev) - | - OuhscMunge ========== + Data manipulation operations commonly used by the [Biomedical and Behavioral Methodology Core](http://www.ouhsc.edu/bbmc/) within the [Department of Pediatrics](http://www.oumedicine.com/pediatrics) of the [University of Oklahoma Health Sciences Center](http://ouhsc.edu/). ### Download and Installation Instructions + The *development* version of `OuhscMunge` can be installed from [GitHub](https://github.com/OuhscBbmc/OuhscMunge) after installing the `remotes` package. (The *release* version will be available on [CRAN](https://cran.r-project.org/) later.) + ```r install.packages("remotes") # If it's not already installed. remotes::install_github("OuhscBbmc/OuhscMunge") ``` ### Collaborative Development + We encourage input and collaboration from the overall community. If you're familiar with GitHub and R packages, feel free to submit a [pull request](https://github.com/OuhscBbmc/OuhscMunge/pulls). If you'd like to report a bug or make a suggestion, please create a GitHub [issue](https://github.com/OuhscBbmc/OuhscMunge/issues); issues are a usually a good place to ask public questions too. However, feel free to email Will (). Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. ### Thanks to Funders -*OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012). + +* *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH NIGMS; U54 GM104938](https://grantome.com/grant/NIH/U54-GM104938). Judith A. James, PI, OUHSC; 2013-2018. +* *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH U54GM104938](https://taggs.hhs.gov/Detail/AwardDetail?arg_AwardNum=U54GM104938&arg_ProgOfficeCode=127); 2020-2021. +* *OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012). (So far) the primary developers of OuhscMunge are the external evaluators for [Oklahoma's MIECHV](https://www.ok.gov/health/Child_and_Family_Health/Family_Support_and_Prevention_Service/MIECHV_Program_-_Federal_Home_Visiting_Grant/MIECHV_Program_Resources/index.html) program. @@ -39,3 +40,10 @@ Main Branch: Dev Branch: ![codecov.io](http://codecov.io/github/OuhscBbmc/OuhscMunge/branch.svg?branch=dev) + +### Build Status and Package Characteristics + +| [GitHub](https://github.com/OuhscBbmc/OuhscMunge) | [Travis-CI](https://app.travis-ci.com/OuhscBbmc/OuhscMunge/builds) | [CodeCov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/) | +| :----- | :---------------------------: | :-------: | +| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://codecov.io/gh/OuhscBbmc/OuhscMunge) | +| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/tree/dev) | From a0e35dce020232c6add07abc7b40137502e3b489 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 11:49:59 -0500 Subject: [PATCH 04/10] starting `row_sum()` ref #126 --- NAMESPACE | 1 + R/row.R | 114 +++++++++++++++++++++++++++++++++++++++++++++++++ man/row_sum.Rd | 57 +++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 R/row.R create mode 100644 man/row_sum.Rd diff --git a/NAMESPACE b/NAMESPACE index 6a9c34f..0a85b23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(readr_spec_aligned) export(replace_nas_with_explicit) export(replace_with_nas) export(retrieve_key_value) +export(row_sum) export(snake_case) export(trim_character) export(trim_date) diff --git a/R/row.R b/R/row.R new file mode 100644 index 0000000..9357e27 --- /dev/null +++ b/R/row.R @@ -0,0 +1,114 @@ +#' @name row_sum +#' @title Find the sum of selected columns within a row +#' +#' @description Sums across columns within a row, +#' while accounting for nonmissingness. +#' Specify the desired columns by passing their explicit column names or +#' by passing a regular expression to matches the column names. +#' +#' @param d The data.frame containing the values to sum. Required. +#' @param columns_to_average A character vector containing the columns +#' names to sum. +#' If empty, `pattern` is used to select columns. Optional. +#' @param pattern A regular expression pattern passed to [base::grep()] +#' (with `perl = TRUE`). Optional +#' @param new_column_name The name of the new column that represents the sum +#' of the specified columns. Required. +#' @param threshold_proportion Designates the minimum proportion of columns +#' that have a nonmissing values (within each row) in order to return a sum. +#' Required; defaults to to 0.75. +#' @param vebose a logical value to designate if extra information is +#' displayed in the console, +#' such as which columns are matched by `pattern`. +#' +#' @return The data.frame `d`, with the additional column containing the row sum. +#' +#' @details +#' If the specified columns are all logicals or integers, +#' the new column will be an [integer]. +#' Otherwise the new column will be a [double]. +#' +#' @note +#' @author Will Beasley +#' @examples +#' library(OuhscMunge) #Load the package into the current R session. + +#' +#' @export +row_sum <- function( + d, + columns_to_average = character(0), + pattern, + new_column_name = "row_sum", + threshold_proportion = .75, + verbose = FALSE +) { + + if (length(columns_to_average) == 0L) { + columns_to_average <- + d |> + colnames() |> + grep( + x = _, + pattern = pattern, + value = TRUE, + perl = TRUE + ) + + if (verbose) { + message( + "The following columns will be summed:\n- ", + paste(columns_to_average, collapse = "\n- ") + ) + } + } + + cast_to_integer <- + d |> + dplyr::select(!!columns_to_average) |> + purrr::every( + \(x) { + is.logical(x) | is.integer(x) + } + ) + + rs <- nonmissing_count <- NULL + d <- + d |> + dplyr::mutate( + rs = # Finding the sum (used by m4) + rowSums( + dplyr::across(!!columns_to_average), + na.rm = TRUE + ), + # rs = dplyr::if_else(cast_to_integer, as.integer(rs), rs), + nonmissing_count = + rowSums( + dplyr::across( + !!columns_to_average, + .fns = \(x) { !is.na(x) } + ) + ), + nonmissing_proportion = nonmissing_count / length(columns_to_average), + {{new_column_name}} := + dplyr::if_else( + threshold_proportion <= nonmissing_proportion, + rs, + # rs / nonmissing_count, + NA_real_ + ) + ) |> + dplyr::select( + -rs, + -nonmissing_count, + -nonmissing_proportion, + ) + # Alternatively, return just the new columns + # dplyr::pull({{new_column_name}}) + + if (cast_to_integer) { + d[[new_column_name]] <- as.integer(d[[new_column_name]]) + } + + d +} diff --git a/man/row_sum.Rd b/man/row_sum.Rd new file mode 100644 index 0000000..061712e --- /dev/null +++ b/man/row_sum.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/row.R +\name{row_sum} +\alias{row_sum} +\title{Find the sum of selected columns within a row} +\usage{ +row_sum( + d, + columns_to_average = character(0), + pattern, + new_column_name = "row_sum", + threshold_proportion = 0.75, + verbose = FALSE +) +} +\arguments{ +\item{d}{The data.frame containing the values to sum. Required.} + +\item{columns_to_average}{A character vector containing the columns +names to sum. +If empty, \code{pattern} is used to select columns. Optional.} + +\item{pattern}{A regular expression pattern passed to \code{\link[base:grep]{base::grep()}} +(with \code{perl = TRUE}). Optional} + +\item{new_column_name}{The name of the new column that represents the sum +of the specified columns. Required.} + +\item{threshold_proportion}{Designates the minimum proportion of columns +that have a nonmissing values (within each row) in order to return a sum. +Required; defaults to to 0.75.} + +\item{vebose}{a logical value to designate if extra information is +displayed in the console, +such as which columns are matched by \code{pattern}.} +} +\value{ +The data.frame \code{d}, with the additional column containing the row sum. +} +\description{ +Sums across columns within a row, +while accounting for nonmissingness. +Specify the desired columns by passing their explicit column names or +by passing a regular expression to matches the column names. +} +\details{ +If the specified columns are all logicals or integers, +the new column will be an \link{integer}. +Otherwise the new column will be a \link{double}. +} +\examples{ +library(OuhscMunge) #Load the package into the current R session. + +} +\author{ +Will Beasley +} From 4457547c61eccb9e8af2c1fced0b9eff9368500c Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 12:10:04 -0500 Subject: [PATCH 05/10] `row_sum()` checks ref #126 --- NAMESPACE | 1 + R/row.R | 17 +++++++++++++---- inst/WORDLIST | 1 + man/row_sum.Rd | 4 ++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0a85b23..5e94663 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(update_packages_addin) export(upload_sqls_odbc) export(verify_value_headstart) importFrom(magrittr,"%>%") +importFrom(rlang,":=") importFrom(rlang,.data) importFrom(utils,capture.output) importFrom(utils,installed.packages) diff --git a/R/row.R b/R/row.R index 9357e27..6008259 100644 --- a/R/row.R +++ b/R/row.R @@ -17,7 +17,7 @@ #' @param threshold_proportion Designates the minimum proportion of columns #' that have a nonmissing values (within each row) in order to return a sum. #' Required; defaults to to 0.75. -#' @param vebose a logical value to designate if extra information is +#' @param vv a logical value to designate if extra information is #' displayed in the console, #' such as which columns are matched by `pattern`. #' @@ -30,6 +30,7 @@ #' #' @note #' @author Will Beasley +#' @importFrom rlang := #' @examples #' library(OuhscMunge) #Load the package into the current R session. @@ -41,8 +42,16 @@ row_sum <- function( pattern, new_column_name = "row_sum", threshold_proportion = .75, - verbose = FALSE + vv = FALSE ) { + checkmate::assert_data_frame(d) + checkmate::assert_character(columns_to_average , any.missing = FALSE) + checkmate::assert_character(pattern , min.len = 0, max.len = 1) + checkmate::assert_character(new_column_name , len = 1) + checkmate::assert_double( threshold_proportion, len = 1) + checkmate::assert_logical( vv , len = 1) + + if (length(columns_to_average) == 0L) { columns_to_average <- @@ -55,7 +64,7 @@ row_sum <- function( perl = TRUE ) - if (verbose) { + if (vv) { message( "The following columns will be summed:\n- ", paste(columns_to_average, collapse = "\n- ") @@ -72,7 +81,7 @@ row_sum <- function( } ) - rs <- nonmissing_count <- NULL + rs <- nonmissing_count <- nonmissing_proportion <- NULL d <- d |> dplyr::mutate( diff --git a/inst/WORDLIST b/inst/WORDLIST index 7cf1526..0a2bfae 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -43,6 +43,7 @@ lettercase libcurl mrn nonmissing +nonmissingness odbc openssl patientdob diff --git a/man/row_sum.Rd b/man/row_sum.Rd index 061712e..3ccd138 100644 --- a/man/row_sum.Rd +++ b/man/row_sum.Rd @@ -10,7 +10,7 @@ row_sum( pattern, new_column_name = "row_sum", threshold_proportion = 0.75, - verbose = FALSE + vv = FALSE ) } \arguments{ @@ -30,7 +30,7 @@ of the specified columns. Required.} that have a nonmissing values (within each row) in order to return a sum. Required; defaults to to 0.75.} -\item{vebose}{a logical value to designate if extra information is +\item{vv}{a logical value to designate if extra information is displayed in the console, such as which columns are matched by \code{pattern}.} } From 628fbe7257b8dd80b0a9cdb99a76dc346a94a4dc Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 12:14:24 -0500 Subject: [PATCH 06/10] change to `verbose` I'm not sure why it was producing errors before ref #126 --- R/row.R | 11 ++++------- man/row_sum.Rd | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/R/row.R b/R/row.R index 6008259..bde6bf7 100644 --- a/R/row.R +++ b/R/row.R @@ -17,7 +17,7 @@ #' @param threshold_proportion Designates the minimum proportion of columns #' that have a nonmissing values (within each row) in order to return a sum. #' Required; defaults to to 0.75. -#' @param vv a logical value to designate if extra information is +#' @param verbose a logical value to designate if extra information is #' displayed in the console, #' such as which columns are matched by `pattern`. #' @@ -42,16 +42,14 @@ row_sum <- function( pattern, new_column_name = "row_sum", threshold_proportion = .75, - vv = FALSE + verbose = FALSE ) { checkmate::assert_data_frame(d) checkmate::assert_character(columns_to_average , any.missing = FALSE) checkmate::assert_character(pattern , min.len = 0, max.len = 1) checkmate::assert_character(new_column_name , len = 1) checkmate::assert_double( threshold_proportion, len = 1) - checkmate::assert_logical( vv , len = 1) - - + checkmate::assert_logical( verbose , len = 1) if (length(columns_to_average) == 0L) { columns_to_average <- @@ -64,7 +62,7 @@ row_sum <- function( perl = TRUE ) - if (vv) { + if (verbose) { message( "The following columns will be summed:\n- ", paste(columns_to_average, collapse = "\n- ") @@ -90,7 +88,6 @@ row_sum <- function( dplyr::across(!!columns_to_average), na.rm = TRUE ), - # rs = dplyr::if_else(cast_to_integer, as.integer(rs), rs), nonmissing_count = rowSums( dplyr::across( diff --git a/man/row_sum.Rd b/man/row_sum.Rd index 3ccd138..e0528ed 100644 --- a/man/row_sum.Rd +++ b/man/row_sum.Rd @@ -10,7 +10,7 @@ row_sum( pattern, new_column_name = "row_sum", threshold_proportion = 0.75, - vv = FALSE + verbose = FALSE ) } \arguments{ @@ -30,7 +30,7 @@ of the specified columns. Required.} that have a nonmissing values (within each row) in order to return a sum. Required; defaults to to 0.75.} -\item{vv}{a logical value to designate if extra information is +\item{verbose}{a logical value to designate if extra information is displayed in the console, such as which columns are matched by \code{pattern}.} } From 60562c8b63f429b3c84d680fa2f4746499126b35 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 12:40:10 -0500 Subject: [PATCH 07/10] examples ref #146 --- DESCRIPTION | 1 + R/row.R | 39 ++++++++++++++++++++++++++++++++++----- man/row_sum.Rd | 32 +++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6a7aa71..be1026a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,6 +31,7 @@ Suggests: RODBC, spelling, testthat, + tidyr, tinytex Encoding: UTF-8 Language: en-US diff --git a/R/row.R b/R/row.R index bde6bf7..ec7d04b 100644 --- a/R/row.R +++ b/R/row.R @@ -28,28 +28,57 @@ #' the new column will be an [integer]. #' Otherwise the new column will be a [double]. #' -#' @note #' @author Will Beasley #' @importFrom rlang := #' @examples #' library(OuhscMunge) #Load the package into the current R session. - +#' mtcars |> +#' row_sum( +#' columns_to_average = c("cyl", "disp", "vs", "carb"), +#' new_column_name = "engine_sum" +#' ) +#' +#' if (require(tidyr)) +#' tidyr::billboard |> +#' row_sum( +#' pattern = "^wk\\d{1,2}$", +#' new_column_name = "week_sum", +#' threshold_proportion = .1, +#' verbose = TRUE +#' ) |> +#' dplyr::select( +#' artist, +#' date.entered, +#' week_sum, +#' ) #' +#' tidyr::billboard |> +#' row_sum( +#' pattern = "^wk\\d$", +#' new_column_name = "week_sum", +#' verbose = TRUE +#' ) |> +#' dplyr::select( +#' artist, +#' date.entered, +#' week_sum, +#' ) + #' @export row_sum <- function( d, columns_to_average = character(0), - pattern, + pattern = "", new_column_name = "row_sum", threshold_proportion = .75, verbose = FALSE ) { checkmate::assert_data_frame(d) checkmate::assert_character(columns_to_average , any.missing = FALSE) - checkmate::assert_character(pattern , min.len = 0, max.len = 1) + checkmate::assert_character(pattern , len = 1) checkmate::assert_character(new_column_name , len = 1) checkmate::assert_double( threshold_proportion, len = 1) - checkmate::assert_logical( verbose , len = 1) + checkmate::assert_logical( verbose , len = 1) if (length(columns_to_average) == 0L) { columns_to_average <- diff --git a/man/row_sum.Rd b/man/row_sum.Rd index e0528ed..0358176 100644 --- a/man/row_sum.Rd +++ b/man/row_sum.Rd @@ -7,7 +7,7 @@ row_sum( d, columns_to_average = character(0), - pattern, + pattern = "", new_column_name = "row_sum", threshold_proportion = 0.75, verbose = FALSE @@ -50,7 +50,37 @@ Otherwise the new column will be a \link{double}. } \examples{ library(OuhscMunge) #Load the package into the current R session. +mtcars |> + row_sum( + columns_to_average = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_sum" + ) +if (require(tidyr)) + tidyr::billboard |> + row_sum( + pattern = "^wk\\\\d{1,2}$", + new_column_name = "week_sum", + threshold_proportion = .1, + verbose = TRUE + ) |> + dplyr::select( + artist, + date.entered, + week_sum, + ) + + tidyr::billboard |> + row_sum( + pattern = "^wk\\\\d$", + new_column_name = "week_sum", + verbose = TRUE + ) |> + dplyr::select( + artist, + date.entered, + week_sum, + ) } \author{ Will Beasley From bef38cf11026e192dc1ca966ebb4dbbdfa5994a3 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 13:09:00 -0500 Subject: [PATCH 08/10] tests for row_sum() ref #126 --- tests/testthat/test-row.R | 150 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/testthat/test-row.R diff --git a/tests/testthat/test-row.R b/tests/testthat/test-row.R new file mode 100644 index 0000000..89d7515 --- /dev/null +++ b/tests/testthat/test-row.R @@ -0,0 +1,150 @@ +library(testthat) + +test_that("mtcars -engine_sum", { + expected <- structure( + list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, + 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, + 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, + 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, + 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), + disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, + 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, + 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, + 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, + 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, + 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, + 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, + 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, + 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 + ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, + 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, + 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, + 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61, + 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, + 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, + 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 + ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, + 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, + 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, + 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, + 2, 2, 4, 6, 8, 2), engine_sum = c(170, 170, 114, 266, 370, + 233, 372, 153.7, 147.8, 178.6, 178.6, 286.8, 286.8, 286.8, + 484, 472, 452, 84.7, 82.7, 77.1, 126.1, 328, 314, 362, 410, + 85, 126.3, 102.1, 363, 157, 317, 128)), row.names = c("Mazda RX4", + "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", + "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", + "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", + "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", + "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", + "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", + "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", + "Volvo 142E"), class = "data.frame" + ) + + actual <- + mtcars |> + row_sum( + columns_to_average = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_sum" + ) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("mtcars -all_columns", { + expected <- + c( + NA, NA, 1403, 1342, 1012, 753, NA, 1041, 533, 1355, 839, 419, + 531, 463, 1025, NA, 2328, NA, 1641, 751, 957, 647, 950, 561, + 570, 903, 647, 1250, 713, 1086, 788, NA, 1283, 1070, 1300, 626, + 1110, 768, 766, 871, 1032, 777, 1143, NA, NA, 916, 1460, NA, + 675, 946, 904, NA, 740, 994, NA, 812, 1189, 1173, 1011, NA, 1249, + 946, 2101, 1590, 1153, NA, 806, 877, 1492, 902, 1362, 968, 1371, + NA, 415, 734, 668, 1084, NA, 696, 753, 969, 759, 942, 865, 865, + 1162, 753, 996, NA, 614, 301, 1021, 624, 698, NA, NA, 1094, NA, + 974, 655, 1067, 1033, 683, 833, NA, NA, NA, 1383, 1226, 1039, + 1038, 1000, 585, 1176, 549, NA, 1166, 1426, 1499, 773, 1220, + 973, 692, NA, 1025, 1373, 616, 500, 685, 982, 668, 1181, 317, + 1039, 1078, 623, 1162, 1149, 951, 698, 753, 466, 555, 626, 734, + 1318, 494, 1140, 933, 1322, 1128, 913, 910, NA, 892, 1044, 909, + 814, NA, 780, 724, 765, NA, NA, NA, NA, 1315, NA, 683, 915, 950, + 780, 771, NA, 753, 949, 1669, 1342, 1470, 1206, 998, 1195, 1179, + NA, 1142, 855, 748, 1434, 1853, 387, 323, NA, 965, 668, 1118, + NA, 1263, 921, 1439, 1121, 994, 884, 910, 704, 882, 864, 1722, + 844, NA, 1090, 829, 1476, 552, 1013, 589, 1215, 661, 328, 542, + 510, 1172, 1050, 592, 715, NA, 811, 850, 712, NA, 1098, 1305, + NA, 588, 596, 605, NA, 1603, 945, NA, NA, 946, 1180, 1236, 1476, + 796, 816, 1659, 890, 1278, 518, 1057, 273, 931, 573, NA, 1261, + 614, NA, 1216, 718, NA, 1241, 632, 537, 1652, 748, NA, 1287, + 531, 998, 1090, 530, 720, NA, 888, 988, 1128, 1255, 1214, 894, + 1169, NA, NA, NA, 958, 569, 1324, 728, 938, 1339, 997, 991, NA, + 1052, NA, 1152, NA, 1259, 1143, 879, 777, 643, 617, 992, 1270, + NA, 867, 1433, 1077, 1283, 773, 665, NA, 1244, NA, 727 + ) + + actual <- + tidyr::billboard |> + row_sum( + pattern = "^wk\\d{1,2}$", + new_column_name = "week_sum", + threshold_proportion = .1, + verbose = TRUE + ) |> + dplyr::pull(week_sum) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + + +test_that("mtcars -all_columns", { + expected <- + c( + 598, NA, 567, 601, 319, 202, NA, 422, 259, 606, 664, 248, 241, + 207, 416, NA, 772, NA, 697, 335, 381, 302, 436, 156, 280, 390, + 342, 644, 443, 571, 788, NA, 729, 472, 537, 313, 659, 668, 514, + 585, 420, 271, 407, NA, NA, 504, 746, NA, 675, 772, 812, 434, + 398, 511, 634, 630, 506, 608, 754, NA, 473, 468, 577, 668, 809, + NA, 331, 470, 720, 708, 583, 438, 745, NA, 274, 478, 325, 616, + NA, 411, 662, 508, 234, 503, 366, 428, 772, 497, 578, NA, 174, + 112, 524, 171, 612, NA, NA, 501, NA, 489, 363, 540, 770, 443, + 496, NA, 655, NA, 614, 602, 587, 787, 376, 287, 564, 380, 362, + 881, 716, 608, 677, 423, 616, 362, NA, 648, 561, 461, 252, 685, + 412, 452, 572, 219, 529, 601, 358, 509, 579, 487, 207, 413, 270, + 555, 314, 734, 790, 247, 500, 461, 618, 565, 464, 576, NA, 447, + 604, 462, 582, NA, 324, 724, 665, NA, NA, NA, NA, 511, NA, 683, + 233, 768, 588, 674, 687, 448, 756, 845, 632, 379, 566, 509, 552, + 789, 608, 505, 474, 460, 715, 839, 387, 102, NA, 441, 180, 467, + NA, 521, 621, 625, 591, 543, 423, 572, 704, 595, 451, 736, 749, + NA, 465, 829, 406, 374, 537, 360, 554, 404, 131, 318, 170, 588, + 541, 371, 403, NA, 440, 850, 712, NA, 437, 602, NA, 331, 114, + 605, NA, 749, 215, NA, NA, 594, 607, 636, 693, 478, 431, 749, + 556, 510, 206, 424, 44, 358, 243, NA, 534, 614, NA, 699, 299, + NA, 540, 385, 296, 638, 343, NA, 653, 254, 515, 341, 344, 208, + NA, 604, 528, 752, 611, 494, 448, 384, 443, NA, NA, 673, 253, + 514, 352, 749, 498, 447, 802, NA, 600, NA, 610, 486, 517, 521, + 382, 358, 457, 617, 802, 478, NA, 525, 592, 429, 785, 675, 665, + NA, 764, NA, 240 + ) + + expected_message <- "The following columns will be summed:\n- wk1\n- wk2\n- wk3\n- wk4\n- wk5\n- wk6\n- wk7\n- wk8\n- wk9" + + expect_message( + regexp = expected_message,{ + actual <- + tidyr::billboard |> + row_sum( + pattern = "^wk\\d$", + new_column_name = "week_sum", + verbose = TRUE + ) |> + dplyr::pull(week_sum) + }) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) From e1c187f4d783128426ce8b17dcb0e46eaf3fe9de Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 14:13:26 -0500 Subject: [PATCH 09/10] include count of nonmissing cells ref #126 --- R/row.R | 51 ++++++++++++++----- man/row_sum.Rd | 18 ++++++- tests/testthat/test-row.R | 103 +++++++++++++++++++++++++++++++++++++- 3 files changed, 157 insertions(+), 15 deletions(-) diff --git a/R/row.R b/R/row.R index ec7d04b..5d08051 100644 --- a/R/row.R +++ b/R/row.R @@ -17,11 +17,18 @@ #' @param threshold_proportion Designates the minimum proportion of columns #' that have a nonmissing values (within each row) in order to return a sum. #' Required; defaults to to 0.75. +#' @param nonmissing_count_name If a non-NA value is passed, +#' a second column will be added to `d` that contains the row's count +#' of nonmissing items among the selected columns. +#' Must be a valid column name. Optional. #' @param verbose a logical value to designate if extra information is #' displayed in the console, #' such as which columns are matched by `pattern`. #' -#' @return The data.frame `d`, with the additional column containing the row sum. +#' @return The data.frame `d`, +#' with the additional column containing the row sum. +#' If a valid value is passed to `nonmissing_count_name`, +#' a second column will be added as well. #' #' @details #' If the specified columns are all logicals or integers, @@ -38,6 +45,13 @@ #' new_column_name = "engine_sum" #' ) #' +#' mtcars |> +#' row_sum( +#' columns_to_average = c("cyl", "disp", "vs", "carb"), +#' new_column_name = "engine_sum", +#' nonmissing_count_name = "engine_nonmissing_count" +#' ) +#' #' if (require(tidyr)) #' tidyr::billboard |> #' row_sum( @@ -71,6 +85,7 @@ row_sum <- function( pattern = "", new_column_name = "row_sum", threshold_proportion = .75, + nonmissing_count_name = NA_character_, verbose = FALSE ) { checkmate::assert_data_frame(d) @@ -78,6 +93,7 @@ row_sum <- function( checkmate::assert_character(pattern , len = 1) checkmate::assert_character(new_column_name , len = 1) checkmate::assert_double( threshold_proportion, len = 1) + checkmate::assert_character(nonmissing_count_name, len = 1, min.chars = 1, any.missing = TRUE) checkmate::assert_logical( verbose , len = 1) if (length(columns_to_average) == 0L) { @@ -108,35 +124,46 @@ row_sum <- function( } ) - rs <- nonmissing_count <- nonmissing_proportion <- NULL + .rs <- .nonmissing_count <- .nonmissing_proportion <- NULL d <- d |> dplyr::mutate( - rs = # Finding the sum (used by m4) + .rs = rowSums( dplyr::across(!!columns_to_average), na.rm = TRUE ), - nonmissing_count = + .nonmissing_count = rowSums( dplyr::across( !!columns_to_average, .fns = \(x) { !is.na(x) } ) ), - nonmissing_proportion = nonmissing_count / length(columns_to_average), + .nonmissing_proportion = .nonmissing_count / length(columns_to_average), {{new_column_name}} := dplyr::if_else( - threshold_proportion <= nonmissing_proportion, - rs, - # rs / nonmissing_count, + threshold_proportion <= .nonmissing_proportion, + .rs, + # .rs / .nonmissing_count, NA_real_ ) - ) |> + ) + + if (!is.na(nonmissing_count_name)) { + d <- + d |> + dplyr::mutate( + {{nonmissing_count_name}} := .nonmissing_count, + ) + } + + d <- + d |> dplyr::select( - -rs, - -nonmissing_count, - -nonmissing_proportion, + -.rs, + -.nonmissing_count, + -.nonmissing_proportion, ) # Alternatively, return just the new columns # dplyr::pull({{new_column_name}}) diff --git a/man/row_sum.Rd b/man/row_sum.Rd index 0358176..7c3613c 100644 --- a/man/row_sum.Rd +++ b/man/row_sum.Rd @@ -10,6 +10,7 @@ row_sum( pattern = "", new_column_name = "row_sum", threshold_proportion = 0.75, + nonmissing_count_name = NA_character_, verbose = FALSE ) } @@ -30,12 +31,20 @@ of the specified columns. Required.} that have a nonmissing values (within each row) in order to return a sum. Required; defaults to to 0.75.} +\item{nonmissing_count_name}{If a non-NA value is passed, +a second column will be added to \code{d} that contains the row's count +of nonmissing items among the selected columns. +Must be a valid column name. Optional.} + \item{verbose}{a logical value to designate if extra information is displayed in the console, such as which columns are matched by \code{pattern}.} } \value{ -The data.frame \code{d}, with the additional column containing the row sum. +The data.frame \code{d}, +with the additional column containing the row sum. +If a valid value is passed to \code{nonmissing_count_name}, +a second column will be added as well. } \description{ Sums across columns within a row, @@ -56,6 +65,13 @@ mtcars |> new_column_name = "engine_sum" ) +mtcars |> + row_sum( + columns_to_average = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_sum", + nonmissing_count_name = "engine_nonmissing_count" + ) + if (require(tidyr)) tidyr::billboard |> row_sum( diff --git a/tests/testthat/test-row.R b/tests/testthat/test-row.R index 89d7515..dd2926a 100644 --- a/tests/testthat/test-row.R +++ b/tests/testthat/test-row.R @@ -55,7 +55,66 @@ test_that("mtcars -engine_sum", { testthat::expect_equal(actual, expected) }) -test_that("mtcars -all_columns", { +test_that("mtcars -engine_sum & nonmissing count", { + expected <- + structure( + list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, + 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, + 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, + 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, + 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), + disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, + 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, + 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, + 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, + 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, + 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, + 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, + 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, + 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 + ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, + 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, + 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, + 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61, + 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, + 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, + 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 + ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, + 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, + 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, + 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, + 2, 2, 4, 6, 8, 2), engine_sum = c(170, 170, 114, 266, 370, + 233, 372, 153.7, 147.8, 178.6, 178.6, 286.8, 286.8, 286.8, + 484, 472, 452, 84.7, 82.7, 77.1, 126.1, 328, 314, 362, 410, + 85, 126.3, 102.1, 363, 157, 317, 128), engine_nonmissing_count = c(4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4)), row.names = c("Mazda RX4", + "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", + "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", + "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", + "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", + "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", + "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", + "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", + "Volvo 142E"), class = "data.frame" + ) + + actual <- + mtcars |> + row_sum( + columns_to_average = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_sum", + nonmissing_count_name = "engine_nonmissing_count" + ) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("billboard -all weeks", { expected <- c( NA, NA, 1403, 1342, 1012, 753, NA, 1041, 533, 1355, 839, 419, @@ -100,8 +159,47 @@ test_that("mtcars -all_columns", { testthat::expect_equal(actual, expected) }) +test_that("billboard -all weeks nonmissing count", { + expected <- + c( + 7, 3, 53, 20, 18, 20, 5, 20, 32, 20, 11, 21, 22, 24, 20, 5, + 29, 3, 20, 32, 20, 20, 31, 20, 24, 15, 20, 20, 21, 15, 9, 3, + 15, 17, 20, 29, 15, 9, 23, 12, 20, 37, 20, 3, 3, 20, 19, 6, 8, + 11, 10, 7, 20, 15, 7, 11, 20, 17, 12, 6, 19, 20, 57, 47, 13, + 5, 17, 21, 20, 11, 18, 20, 20, 3, 28, 32, 32, 14, 6, 28, 10, + 20, 15, 20, 20, 20, 13, 28, 14, 2, 20, 21, 15, 19, 10, 4, 1, + 20, 5, 16, 21, 17, 12, 20, 21, 1, 7, 1, 20, 19, 15, 12, 20, 27, + 20, 11, 7, 12, 20, 20, 8, 53, 14, 14, 4, 13, 19, 11, 28, 9, 20, + 12, 18, 20, 17, 17, 20, 20, 17, 15, 20, 24, 24, 8, 20, 9, 15, + 21, 19, 44, 17, 15, 20, 32, 6, 24, 15, 20, 12, 5, 20, 9, 10, + 5, 4, 2, 3, 20, 5, 8, 20, 11, 9, 10, 7, 13, 11, 18, 17, 55, 20, + 20, 17, 14, 7, 19, 22, 12, 18, 20, 9, 24, 5, 18, 18, 20, 1, 20, + 13, 20, 20, 21, 20, 14, 8, 13, 20, 20, 10, 6, 20, 9, 23, 22, + 20, 30, 17, 20, 23, 25, 26, 16, 34, 21, 27, 5, 13, 9, 9, 4, 20, + 20, 6, 27, 32, 8, 4, 20, 20, 5, 5, 14, 20, 20, 19, 22, 20, 20, + 25, 20, 26, 20, 26, 20, 33, 2, 20, 9, 5, 15, 16, 6, 20, 26, 28, + 20, 26, 4, 26, 24, 24, 20, 11, 20, 3, 12, 26, 13, 17, 20, 20, + 20, 7, 6, 5, 12, 22, 20, 20, 11, 20, 27, 11, 4, 22, 2, 16, 7, + 19, 20, 41, 21, 12, 9, 11, 20, 6, 20, 19, 18, 15, 10, 8, 6, 14, + 2, 39 + ) + + actual <- + tidyr::billboard |> + row_sum( + pattern = "^wk\\d{1,2}$", + new_column_name = "week_sum", + threshold_proportion = .1, + nonmissing_count_name = "nonmissing_count", + verbose = FALSE + ) |> + dplyr::pull(nonmissing_count) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) -test_that("mtcars -all_columns", { +test_that("billboard -subset", { expected <- c( 598, NA, 567, 601, 319, 202, NA, 422, 259, 606, 664, 248, 241, @@ -140,6 +238,7 @@ test_that("mtcars -all_columns", { row_sum( pattern = "^wk\\d$", new_column_name = "week_sum", + nonmissing_count_name = "nonmissing_count", verbose = TRUE ) |> dplyr::pull(week_sum) From 82164f4081af8523a54981a2a83c97fa7761cd36 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 14:17:10 -0500 Subject: [PATCH 10/10] improve doc ref #126 --- R/row.R | 11 ++++++----- man/row_sum.Rd | 13 +++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/R/row.R b/R/row.R index 5d08051..fe04d12 100644 --- a/R/row.R +++ b/R/row.R @@ -17,6 +17,8 @@ #' @param threshold_proportion Designates the minimum proportion of columns #' that have a nonmissing values (within each row) in order to return a sum. #' Required; defaults to to 0.75. +#' In other words, by default, if less than 75% of the specified +#' cells are missing within a row, the row sum will be `NA`. #' @param nonmissing_count_name If a non-NA value is passed, #' a second column will be added to `d` that contains the row's count #' of nonmissing items among the selected columns. @@ -38,15 +40,14 @@ #' @author Will Beasley #' @importFrom rlang := #' @examples -#' library(OuhscMunge) #Load the package into the current R session. #' mtcars |> -#' row_sum( +#' OuhscMunge::row_sum( #' columns_to_average = c("cyl", "disp", "vs", "carb"), #' new_column_name = "engine_sum" #' ) #' #' mtcars |> -#' row_sum( +#' OuhscMunge::row_sum( #' columns_to_average = c("cyl", "disp", "vs", "carb"), #' new_column_name = "engine_sum", #' nonmissing_count_name = "engine_nonmissing_count" @@ -54,7 +55,7 @@ #' #' if (require(tidyr)) #' tidyr::billboard |> -#' row_sum( +#' OuhscMunge::row_sum( #' pattern = "^wk\\d{1,2}$", #' new_column_name = "week_sum", #' threshold_proportion = .1, @@ -67,7 +68,7 @@ #' ) #' #' tidyr::billboard |> -#' row_sum( +#' OuhscMunge::row_sum( #' pattern = "^wk\\d$", #' new_column_name = "week_sum", #' verbose = TRUE diff --git a/man/row_sum.Rd b/man/row_sum.Rd index 7c3613c..b9fbeb0 100644 --- a/man/row_sum.Rd +++ b/man/row_sum.Rd @@ -29,7 +29,9 @@ of the specified columns. Required.} \item{threshold_proportion}{Designates the minimum proportion of columns that have a nonmissing values (within each row) in order to return a sum. -Required; defaults to to 0.75.} +Required; defaults to to 0.75. +In other words, by default, if less than 75\% of the specified +cells are missing within a row, the row sum will be \code{NA}.} \item{nonmissing_count_name}{If a non-NA value is passed, a second column will be added to \code{d} that contains the row's count @@ -58,15 +60,14 @@ the new column will be an \link{integer}. Otherwise the new column will be a \link{double}. } \examples{ -library(OuhscMunge) #Load the package into the current R session. mtcars |> - row_sum( + OuhscMunge::row_sum( columns_to_average = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum" ) mtcars |> - row_sum( + OuhscMunge::row_sum( columns_to_average = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum", nonmissing_count_name = "engine_nonmissing_count" @@ -74,7 +75,7 @@ mtcars |> if (require(tidyr)) tidyr::billboard |> - row_sum( + OuhscMunge::row_sum( pattern = "^wk\\\\d{1,2}$", new_column_name = "week_sum", threshold_proportion = .1, @@ -87,7 +88,7 @@ if (require(tidyr)) ) tidyr::billboard |> - row_sum( + OuhscMunge::row_sum( pattern = "^wk\\\\d$", new_column_name = "week_sum", verbose = TRUE