Skip to content

Commit

Permalink
Merge pull request #132 from OuhscBbmc/dev
Browse files Browse the repository at this point in the history
`row_sum()`
  • Loading branch information
wibeasley committed Oct 28, 2023
2 parents dfe4ff2 + 82164f4 commit d56d665
Show file tree
Hide file tree
Showing 9 changed files with 595 additions and 8 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Expand Up @@ -16,6 +16,7 @@
^\.gitignore$
^\.travis\.yml$
^\.lintr$
^\.vscode$
^appveyor\.yml$
^CONDUCT.md$
^CODE_OF_CONDUCT.md$
Expand Down
43 changes: 43 additions & 0 deletions .vscode/tasks.json
@@ -0,0 +1,43 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "R",
"code": [
"devtools::build()"
],
"group": "build",
"problemMatcher": [],
"label": "R: Build"
},
{
"type": "R",
"code": [
"devtools::test()"
],
"problemMatcher": [
"$testthat"
],
"group": "test",
"label": "R: Test"
},
{
"type": "R",
"code": [
"devtools::check()"
],
"group": "test",
"problemMatcher": [],
"label": "R: Check"
},
{
"type": "R",
"code": [
"devtools::document()"
],
"group": "build",
"problemMatcher": [],
"label": "R: Document"
}
]
}
1 change: 1 addition & 0 deletions DESCRIPTION
Expand Up @@ -31,6 +31,7 @@ Suggests:
RODBC,
spelling,
testthat,
tidyr,
tinytex
Encoding: UTF-8
Language: en-US
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -37,6 +37,7 @@ export(readr_spec_aligned)
export(replace_nas_with_explicit)
export(replace_with_nas)
export(retrieve_key_value)
export(row_sum)
export(snake_case)
export(trim_character)
export(trim_date)
Expand All @@ -47,6 +48,7 @@ export(update_packages_addin)
export(upload_sqls_odbc)
export(verify_value_headstart)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(utils,capture.output)
importFrom(utils,installed.packages)
177 changes: 177 additions & 0 deletions R/row.R
@@ -0,0 +1,177 @@
#' @name row_sum
#' @title Find the sum of selected columns within a row
#'
#' @description Sums across columns within a row,
#' while accounting for nonmissingness.
#' Specify the desired columns by passing their explicit column names or
#' by passing a regular expression to matches the column names.
#'
#' @param d The data.frame containing the values to sum. Required.
#' @param columns_to_average A character vector containing the columns
#' names to sum.
#' If empty, `pattern` is used to select columns. Optional.
#' @param pattern A regular expression pattern passed to [base::grep()]
#' (with `perl = TRUE`). Optional
#' @param new_column_name The name of the new column that represents the sum
#' of the specified columns. Required.
#' @param threshold_proportion Designates the minimum proportion of columns
#' that have a nonmissing values (within each row) in order to return a sum.
#' Required; defaults to to 0.75.
#' In other words, by default, if less than 75% of the specified
#' cells are missing within a row, the row sum will be `NA`.
#' @param nonmissing_count_name If a non-NA value is passed,
#' a second column will be added to `d` that contains the row's count
#' of nonmissing items among the selected columns.
#' Must be a valid column name. Optional.
#' @param verbose a logical value to designate if extra information is
#' displayed in the console,
#' such as which columns are matched by `pattern`.
#'
#' @return The data.frame `d`,
#' with the additional column containing the row sum.
#' If a valid value is passed to `nonmissing_count_name`,
#' a second column will be added as well.
#'
#' @details
#' If the specified columns are all logicals or integers,
#' the new column will be an [integer].
#' Otherwise the new column will be a [double].
#'
#' @author Will Beasley
#' @importFrom rlang :=
#' @examples
#' mtcars |>
#' OuhscMunge::row_sum(
#' columns_to_average = c("cyl", "disp", "vs", "carb"),
#' new_column_name = "engine_sum"
#' )
#'
#' mtcars |>
#' OuhscMunge::row_sum(
#' columns_to_average = c("cyl", "disp", "vs", "carb"),
#' new_column_name = "engine_sum",
#' nonmissing_count_name = "engine_nonmissing_count"
#' )
#'
#' if (require(tidyr))
#' tidyr::billboard |>
#' OuhscMunge::row_sum(
#' pattern = "^wk\\d{1,2}$",
#' new_column_name = "week_sum",
#' threshold_proportion = .1,
#' verbose = TRUE
#' ) |>
#' dplyr::select(
#' artist,
#' date.entered,
#' week_sum,
#' )
#'
#' tidyr::billboard |>
#' OuhscMunge::row_sum(
#' pattern = "^wk\\d$",
#' new_column_name = "week_sum",
#' verbose = TRUE
#' ) |>
#' dplyr::select(
#' artist,
#' date.entered,
#' week_sum,
#' )

#' @export
row_sum <- function(
d,
columns_to_average = character(0),
pattern = "",
new_column_name = "row_sum",
threshold_proportion = .75,
nonmissing_count_name = NA_character_,
verbose = FALSE
) {
checkmate::assert_data_frame(d)
checkmate::assert_character(columns_to_average , any.missing = FALSE)
checkmate::assert_character(pattern , len = 1)
checkmate::assert_character(new_column_name , len = 1)
checkmate::assert_double( threshold_proportion, len = 1)
checkmate::assert_character(nonmissing_count_name, len = 1, min.chars = 1, any.missing = TRUE)
checkmate::assert_logical( verbose , len = 1)

if (length(columns_to_average) == 0L) {
columns_to_average <-
d |>
colnames() |>
grep(
x = _,
pattern = pattern,
value = TRUE,
perl = TRUE
)

if (verbose) {
message(
"The following columns will be summed:\n- ",
paste(columns_to_average, collapse = "\n- ")
)
}
}

cast_to_integer <-
d |>
dplyr::select(!!columns_to_average) |>
purrr::every(
\(x) {
is.logical(x) | is.integer(x)
}
)

.rs <- .nonmissing_count <- .nonmissing_proportion <- NULL
d <-
d |>
dplyr::mutate(
.rs =
rowSums(
dplyr::across(!!columns_to_average),
na.rm = TRUE
),
.nonmissing_count =
rowSums(
dplyr::across(
!!columns_to_average,
.fns = \(x) { !is.na(x) }
)
),
.nonmissing_proportion = .nonmissing_count / length(columns_to_average),
{{new_column_name}} :=
dplyr::if_else(
threshold_proportion <= .nonmissing_proportion,
.rs,
# .rs / .nonmissing_count,
NA_real_
)
)

if (!is.na(nonmissing_count_name)) {
d <-
d |>
dplyr::mutate(
{{nonmissing_count_name}} := .nonmissing_count,
)
}

d <-
d |>
dplyr::select(
-.rs,
-.nonmissing_count,
-.nonmissing_proportion,
)
# Alternatively, return just the new columns
# dplyr::pull({{new_column_name}})

if (cast_to_integer) {
d[[new_column_name]] <- as.integer(d[[new_column_name]])
}

d
}
21 changes: 15 additions & 6 deletions README.md
@@ -1,13 +1,10 @@
| [GitHub](https://github.com/OuhscBbmc/OuhscMunge) | [Travis-CI](https://app.travis-ci.com/OuhscBbmc/OuhscMunge/builds) | [CodeCov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/) |
| :----- | :---------------------------: | :-------: |
| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://codecov.io/gh/OuhscBbmc/OuhscMunge) |
| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/tree/dev) |

OuhscMunge
==========

Data manipulation operations commonly used by the [Biomedical and Behavioral Methodology Core](http://www.ouhsc.edu/bbmc/) within the [Department of Pediatrics](http://www.oumedicine.com/pediatrics) of the [University of Oklahoma Health Sciences Center](http://ouhsc.edu/).

### Download and Installation Instructions

<!--
The *release* version of OuhscMunge can be installed from [CRAN](http://cran.r-project.org/web/packages/OuhscMunge/).
```r
Expand All @@ -16,16 +13,21 @@ install.packages("OuhscMunge")
-->

The *development* version of `OuhscMunge` can be installed from [GitHub](https://github.com/OuhscBbmc/OuhscMunge) after installing the `remotes` package. (The *release* version will be available on [CRAN](https://cran.r-project.org/) later.)

```r
install.packages("remotes") # If it's not already installed.
remotes::install_github("OuhscBbmc/OuhscMunge")
```

### Collaborative Development

We encourage input and collaboration from the overall community. If you're familiar with GitHub and R packages, feel free to submit a [pull request](https://github.com/OuhscBbmc/OuhscMunge/pulls). If you'd like to report a bug or make a suggestion, please create a GitHub [issue](https://github.com/OuhscBbmc/OuhscMunge/issues); issues are a usually a good place to ask public questions too. However, feel free to email Will (<wibeasley@hotmail.com>). Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

### Thanks to Funders
*OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012).

* *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH NIGMS; U54 GM104938](https://grantome.com/grant/NIH/U54-GM104938). Judith A. James, PI, OUHSC; 2013-2018.
* *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH U54GM104938](https://taggs.hhs.gov/Detail/AwardDetail?arg_AwardNum=U54GM104938&arg_ProgOfficeCode=127); 2020-2021.
* *OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012).

(So far) the primary developers of OuhscMunge are the external evaluators for [Oklahoma's MIECHV](https://www.ok.gov/health/Child_and_Family_Health/Family_Support_and_Prevention_Service/MIECHV_Program_-_Federal_Home_Visiting_Grant/MIECHV_Program_Resources/index.html) program.

Expand All @@ -38,3 +40,10 @@ Main Branch:
Dev Branch:

![codecov.io](http://codecov.io/github/OuhscBbmc/OuhscMunge/branch.svg?branch=dev)

### Build Status and Package Characteristics

| [GitHub](https://github.com/OuhscBbmc/OuhscMunge) | [Travis-CI](https://app.travis-ci.com/OuhscBbmc/OuhscMunge/builds) | [CodeCov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/) |
| :----- | :---------------------------: | :-------: |
| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://codecov.io/gh/OuhscBbmc/OuhscMunge) |
| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/tree/dev) |
5 changes: 3 additions & 2 deletions inst/WORDLIST
Expand Up @@ -8,6 +8,7 @@ Dev
Funders
HRSA
MIECHV
NIGMS
NSE
ODBC
OUHSC
Expand All @@ -16,6 +17,7 @@ PatientDOB
RStudio's
RedcapExamplesAndPatterns
SHA
Translational
Visel
abc
alistaire
Expand All @@ -24,7 +26,6 @@ camelCase
codecov
cryptographic
csv
dbConnect
devtools
dplyr
dsn
Expand All @@ -41,8 +42,8 @@ io
lettercase
libcurl
mrn
na
nonmissing
nonmissingness
odbc
openssl
patientdob
Expand Down

0 comments on commit d56d665

Please sign in to comment.