/
case-when-variable.R
184 lines (170 loc) · 6.77 KB
/
case-when-variable.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#' Create a categorical or numeric variable based on conditions
#'
#' Conditions are specified using a series of formulas: the left-hand side is
#' the condition that must be true (a `CrunchLogicalExpr`) and the right-hand
#' side is where to get the value if the condition on the left-hand side is
#' true. When creating a categorical variable, the right-hand side must be
#' a `Category` or a categorical `CrunchVariable` or `CrunchExpression`, while
#' for numeric variables it is a single number or variable or expression.
#'
#' @param ... formulas where the left hand side is a `CrunchLogicalExpression` (or `TRUE`
#' to indicate the "else" case that will be met if all the other expression are
#' not met) and the right hand side is a CrunchVariable that should be filled in,
#' a `Category` object describing the Category it should be used, a string
#' which will be the name of the `Category` or `NA` to indicate that it should
#' be replaced with the system missing value. For `makeCaseWhenVariable()`
#' non-formula arguments will be passed to `[VarDef()]`
#' @param data A CrunchDataset to use if variable aliases are left bare in the
#' formulas.
#' @param cases A list of formulas that match the description in `...` or a list of
#' lists with named items, "expression" (like the left-hand side of the formulas above),
#' "fill" for a variable to fill in, or "name", "id", and other items that describe a
#' category.
#' @param type The type of the variable to output (either "categorical" or "numeric"), only
#' required if all fills are expressions and so their type cannot be guessed automatically.
#' @param name For `makeCaseWhenVariable()` the name of the variable to create.
#' @return `makeCaseWhenVariable()` returns a `VariableDefinition` and
#' `caseWhenExpr()` returns an expression
#' @export
#' @examples
#' \dontrun{
#' # Creating categorical variables
#' ds$new_var <- makeCaseWhenVariable(
#' ds$x %in% c("a", "b") ~ ds$y, # can fill with a variable
#' ds$x %in% c("c", "d") ~ Category(name = "c or d", numeric_value = 10), # or a Category
#' # If none of the categories match, will be set to missing unless you
#' # specify an "else" case with `TRUE` in the left hand side
#' TRUE ~ Category(name = "catch all"),
#' name = "combined x and y"
#' )
#'
#' ds$brand_x_pref <- makeCaseWhenVariable(
#' ds$brand[[1]] == "Brand X" ~ ds$pref[[1]],
#' ds$brand[[2]] == "Brand X" ~ ds$pref[[2]],
#' ds$brand[[3]] == "Brand X" ~ ds$pref[[3]],
#' name = "brand x preference"
#' )
#'
#' ds$x_among_aware <- makeCaseWhenVariable(
#' ds$aware_x == "Yes" ~ ds$x,
#' TRUE ~ Category(name = "(Not aware)", missing = TRUE),
#' name = "x (among respondents aware of x)"
#' )
#'
# # Creating numeric variables
#' ds$new_num_var <- makeCaseWhenVariable(
#' ds$x %in% c("a", "b") ~ ds$z, # LHS as before, RHS can be numeric variables,
#' ds$x == "c" ~ ds$z * 10, # expressions,
#' ds$x == "d" ~ 100, # or numbers
#' name = "New numeric variable"
#' )
#'
#' ds$capped_z <- makeCaseWhenVariable(
#' ds$z > 10 ~ 10,
#' TRUE ~ ds$z,
#' name = "Capped z"
#' )
#'
#' # caseWhenExpr can be used inside other expressions
#' ds$brand_x_prefer_high <- VarDef(
#' selectCategories(
#' caseWhenExpr(
#' ds$brand_shown[[1]] == "Brand X" ~ ds$ratings[[1]],
#' ds$brand_shown[[2]] == "Brand X" ~ ds$ratings[[2]],
#' ds$brand_shown[[3]] == "Brand X" ~ ds$ratings[[3]]
#' ),
#' c("Best", "Very Good")
#' ),
#' name = "Rate X highly"
#' )
#'
#' # Using lists in `cases` argument can be helpful when working programmatically
#' source_var <- ds$x
#' inclusion_condition <- ds$skipped_x != "Yes"
#'
#' ds$x2_among_aware <- makeCaseWhenVariable(
#' cases = list(list(fill = source_var, expression = inclusion_condition)),
#' name = "x2 among aware"
#' )
#' }
makeCaseWhenVariable <- function(..., data = NULL, cases = NULL, name, type = NULL) {
dots <- list(...)
formula_dots <- vapply(dots, function(x) inherits(x, "formula"), logical(1))
args <- list(
data = caseWhenExpr(data = data, cases = c(cases, unname(dots[formula_dots]))),
name = name,
type = type
)
args <- c(args, dots[!formula_dots])
do.call(VarDef, args)
}
#' @export
#' @rdname makeCaseWhenVariable
caseWhenExpr <- function(..., data = NULL, cases = NULL, type = NULL) {
cases <- unname(c(cases, list(...)))
case_fills <- lapply(cases, parse_case_when_formula, data = data)
# Get set of unique IDs that fill in for when IDs are missing
used_ids <- vapply(case_fills, function(x) x$id %||% NA, numeric(1))
case_ids <- used_ids
case_ids[is.na(used_ids)] <- setdiff(
seq_along(case_fills),
used_ids
)[seq_len(sum(is.na(used_ids)))]
cases <- mapply(function(case_fill, case_id) {
# Make a temporary cases for expressions that will be filled in
if ("fill" %in% names(case_fill)) {
list(
expression = case_fill$expression,
id = as.integer(case_id),
name = paste0("casefill__internal", case_id)
)
} else {
case_fill
}
}, case_fills, case_ids, SIMPLIFY = FALSE)
need_fills <- vapply(case_fills, function(x) "fill" %in% names(x), logical(1))
if (!any(need_fills)) return(caseExpr(cases = cases))
fills <- lapply(which(need_fills), function(cf_num) {
case_fill <- case_fills[[cf_num]]
list(fill = case_fill$fill, id = case_ids[cf_num])
})
fillExpr(caseExpr(cases = cases), fills = fills, type = type)
}
parse_case_when_formula <- function(formula, data) {
if (is.list(formula)) {
if (identical(formula$expression, TRUE)) formula$expression <- "else"
return(formula)
}
if (length(formula) != 3) {
halt(
"The condition provided must be a proper formula: ",
deparseAndFlatten(formula)
)
}
expr <- evalLHS(formula, data)
if (!inherits(expr, c("logical", "CrunchLogicalExpr"))) {
halt(
"The left-hand side provided must be a logical or a ",
"CrunchLogicalExpr: ", dQuote(LHS_string(formula))
)
}
if (identical(expr, TRUE)) expr <- "else"
rhs <- evalRHS(formula, data)
if (is.variable(rhs)) {
rhs <- list(fill = rhs)
} else if (inherits(rhs, "Category")) {
rhs <- lapply(rhs, identity)
} else if (is.character(rhs)) {
rhs <- list(name = rhs)
} else if (is.numeric(rhs)) {
rhs <- list(fill = rhs)
} else if (is.na(rhs)) {
list(name = "No Data", missing = TRUE)
} else {
halt(
"The right-hand side provided must be a Category, CrunchVariable ",
"string, number, or `NA`: ", dQuote(RHS_string(formula))
)
}
c(list(expression = expr), rhs)
}