Skip to content

Commit

Permalink
DS-4360 Add outcome variable name mapping to template
Browse files Browse the repository at this point in the history
  • Loading branch information
jrwishart committed Apr 11, 2023
1 parent b7dbd4b commit b729063
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 66 deletions.
18 changes: 14 additions & 4 deletions R/estimationdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,11 @@ EstimationData <- function(formula = NULL,
#' the label of the variable, the name of the variable, question type of the variable,
#' the dataset it originates from and others.
#' @param x A \code{data.frame} containing the data to be templated.
#' @param outcome.variable An optional name of the outcome variable. Should be a character(1L) string.
#' If specified, the outcome variable will be omitted from the list.
#' @return A list of lists. Each sublist contains information about each variable from the input
#' data frame. Each sublist describes the variable with the following elements:
#' data frame. If the outcome.variable argument is provided, that variable should be
#' omitted from the list. Each sublist describes the variable with the following elements:
#' \itemize{
#' \item type: The type of the variable (numeric or factor)
#' \item label: The label of the variable
Expand All @@ -233,12 +236,19 @@ EstimationData <- function(formula = NULL,
#' }
#' @export
#' @importFrom stats setNames
EstimationDataTemplate <- function(x) {
EstimationDataTemplate <- function(x, outcome.name) {
stopifnot("input must be a data.frame" = is.data.frame(x),
"input must have at least one row" = nrow(x) > 0)

outcome.name.provided <- !missing(outcome.name)
if (outcome.name.provided) {
stopifnot("outcome.name must be a string" = is.character(outcome.name),
"outcome.name should have length 1" = length(outcome.name) == 1L,
"outcome.name must be a column in the data.frame" = !is.null(x[[outcome.name]]))
}
# Use setNames explictly since lapply returns syntactic names
setNames(lapply(x, createVariableTemplate), names(x))
template <- setNames(lapply(x, createVariableTemplate), names(x))
attr(template, "outcome.name") <- if (outcome.name.provided) outcome.name else NA_character_
template
}

createVariableTemplate <- function(x,
Expand Down
8 changes: 6 additions & 2 deletions man/EstimationDataTemplate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

146 changes: 86 additions & 60 deletions tests/testthat/test-estimationdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ test_that("Check Template creation", {
z = basic.factor,
zo = basic.ordered
)
expect_equal(EstimationDataTemplate(basic.df),
expected.template <- structure(
list(
x = list(
type = "numeric",
Expand All @@ -209,8 +209,27 @@ test_that("Check Template creation", {
ordered = TRUE,
default.value = LETTERS[1]
)
)
),
outcome.name = NA_character_
)
expect_equal(EstimationDataTemplate(basic.df), expected.template)
# Outcome variable removed when requested
for (outcome in names(basic.df)) {
template.with.outcome.info <- expected.template
attr(template.with.outcome.info, "outcome.name") <- outcome
expect_equal(EstimationDataTemplate(basic.df, outcome.name = outcome),
template.with.outcome.info)
}
incorrect.name.types <- list(1L, 1.0, TRUE, matrix(1:2, ncol = 1L))
for (outcome.name in incorrect.name.types) {
expect_error(EstimationDataTemplate(basic.df, outcome.name = outcome.name),
"outcome.name must be a string")
}
expect_error(EstimationDataTemplate(basic.df, outcome.name = c("x", "y")),
"outcome.name should have length 1")
expect_error(EstimationDataTemplate(basic.df, outcome.name = "not.in.data.frame"),
"outcome.name must be a column in the data.frame")
# Using a outcome name that is not in the data.frame produces an error
# Check unobserved levels handled
factor.w.unobserved <- factor(letters[1:5], levels = letters[1:6])
ordered.w.unobserved <- factor(LETTERS[1:5], levels = LETTERS[1:6],
Expand All @@ -222,31 +241,34 @@ test_that("Check Template creation", {
zo = ordered.w.unobserved
)
expect_equal(EstimationDataTemplate(data.with.unordered),
list(
x = list(
type = "numeric",
default.value = 1L
),
y = list(
type = "numeric",
default.value = 1.0
structure(
list(
x = list(
type = "numeric",
default.value = 1L
),
y = list(
type = "numeric",
default.value = 1.0
),
z = list(
type = "factor",
levels = letters[1:6],
observed.levels = letters[1:5],
has.unobserved.levels = TRUE,
ordered = FALSE,
default.value = letters[1]
),
zo = list(
type = "factor",
levels = LETTERS[1:6],
observed.levels = LETTERS[1:5],
has.unobserved.levels = TRUE,
ordered = TRUE,
default.value = LETTERS[1]
)
),
z = list(
type = "factor",
levels = letters[1:6],
observed.levels = letters[1:5],
has.unobserved.levels = TRUE,
ordered = FALSE,
default.value = letters[1]
),
zo = list(
type = "factor",
levels = LETTERS[1:6],
observed.levels = LETTERS[1:5],
has.unobserved.levels = TRUE,
ordered = TRUE,
default.value = LETTERS[1]
)
outcome.name = NA_character_
)
)
# Check metadata (attributes) when all exist
Expand Down Expand Up @@ -278,41 +300,45 @@ test_that("Check Template creation", {
`Basic group` = basic.ordered,
check.names = FALSE
)
expected.list <- list(
`Hello World` = list(
type = "numeric",
default.value = 1L
),
`Fancy Hello` = list(
type = "numeric",
label = "A fancy numeric",
name = "q1a",
questiontype = "PickOne",
question = "Q1",
dataset = "foo.sav",
default.value = 1.0
),
`Fancy factor` = list(
type = "factor",
label = "A fancy factor",
name = "q1a",
questiontype = "PickOne",
question = "Q1",
dataset = "foo.sav",
levels = levels(basic.factor),
observed.levels = levels(basic.factor),
has.unobserved.levels = FALSE,
ordered = FALSE,
default.value = levels(basic.factor)[1L]
expected.list <-
structure(
list(
`Hello World` = list(
type = "numeric",
default.value = 1L
),
`Fancy Hello` = list(
type = "numeric",
label = "A fancy numeric",
name = "q1a",
questiontype = "PickOne",
question = "Q1",
dataset = "foo.sav",
default.value = 1.0
),
`Fancy factor` = list(
type = "factor",
label = "A fancy factor",
name = "q1a",
questiontype = "PickOne",
question = "Q1",
dataset = "foo.sav",
levels = levels(basic.factor),
observed.levels = levels(basic.factor),
has.unobserved.levels = FALSE,
ordered = FALSE,
default.value = levels(basic.factor)[1L]
),
`Basic group` = list(
type = "factor",
levels = LETTERS[1:5],
observed.levels = LETTERS[1:5],
has.unobserved.levels = FALSE,
ordered = TRUE,
default.value = LETTERS[1]
)
),
`Basic group` = list(
type = "factor",
levels = LETTERS[1:5],
observed.levels = LETTERS[1:5],
has.unobserved.levels = FALSE,
ordered = TRUE,
default.value = LETTERS[1]
)
outcome.name = NA_character_
)
expect_equal(EstimationDataTemplate(mixed.df), expected.list)
# Too many attributes, but only desired ones kept
Expand Down

0 comments on commit b729063

Please sign in to comment.