/
change-category-id.R
119 lines (108 loc) · 4.73 KB
/
change-category-id.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#' Change the id of a category for a categorical variable
#'
#' Changes the id of a category from an existing value to a new one. The
#' variable can be a categorical, categorical array, or multiple response
#' variable. The category changed will have the same numeric value and missing
#' status as before. The one exception to this is if the numeric value is the
#' same as the id, then the new numeric value will be the same as the new id.
#'
#' It is highly recommended to disable any exclusion filter before using
#' `changeCategoryID`, especially if it is being called multiple times in quick
#' succession (e.g. as part of an automated script). If a problematic exclusion
#' is encountered `changeCategoryID` will attempt to disable and re-enable the
#' exclusion, but that process will be repeated for every call made which could
#' have adverse consequences (not to mention slow down processing time).
#'
#' @param variable the variable in a crunch dataset that will be changed (note:
#' the variable must be categorical, categorical array, or multiple response)
#' @param from the (old) id identifying the category you want to change
#' @param to the (new) id for the category
#' @return `variable` with category `from` and all associated data values mapped
#' to id `to`
#' @examples
#' \dontrun{
#' ds$country <- changeCategoryID(ds$country, 2, 6)
#' }
#' @export
changeCategoryID <- function(variable, from, to) {
if (!has.categories(variable)) {
halt("The variable ", name(variable), " doesn't have categories.")
}
if (!(is.numeric(from) & length(from) == 1)) {
halt("from should be a single numeric")
}
if (!(is.numeric(to) & length(to) == 1)) {
halt("to should be a single numeric")
}
pos.from <- match(from, ids(categories(variable)))
if (is.na(pos.from)) {
halt("No category with id ", from)
}
if (to %in% ids(categories(variable))) {
halt("Id ", to, " is already a category, please provide a new category id.")
}
## Add new category
newcat <- categories(variable)[[pos.from]]
# if the old id matches the old numeric value, likely the user wants these
# to be the same, so change the new numeric value to be the same as the
# new id.
if (newcat$id == newcat$numeric_value %||% FALSE) {
newcat$numeric_value <- to
}
newcat$id <- to
names(categories(variable))[pos.from] <- "__TO_DELETE__"
categories(variable) <- c(categories(variable), newcat)
## Move data to that new id
moveTheData <- function(variable, from, to) {
if (is.Categorical(variable)) {
variable[variable == from] <- to
} else if (is.Array(variable)) {
# If the variable is an array, then lapply over the subvariables
# TODO: change iteration over shojicatalogs to allow iterating over
# the variable directly
lapply(names(variable), function(subvarname) {
variable[[subvarname]][variable[[subvarname]] == from] <- to
})
}
return(variable)
}
moveTheData(variable, from, to)
## Delete old category
keep <- seq_along(categories(variable))
keep[pos.from] <- length(keep)
keep <- keep[-length(keep)]
tryCatch(categories(variable) <- categories(variable)[keep],
error = function(e) {
msg <- conditionMessage(e)
if (grepl("Cannot delete categories", msg)) {
# If dropping the to-be-dropped category failed, it could
# be because there's an exclusion. If so unset it, or else
# the updated values below will not include all rows
ds <- loadDataset(datasetReference(variable))
if (!is.null(exclusion(ds))) {
old_exclusion <- exclusion(ds)
exclusion(ds) <- NULL
on.exit(exclusion(ds) <- old_exclusion)
} else {
# if there is no exclusion, we don't have an easy fix,
# so return the error
stop(e)
}
# warn the user what's up
warning(
"Temporarily disabling the exclusion while ",
"changing category IDs. See `?changeCategoryID` ",
"for more information."
)
# retry the move and category update without the exclusion
moveTheData(variable, from, to)
categories(variable) <- categories(variable)[keep]
} else {
# if the error was not that a category couldn't be deleted,
# we don't have an easy fix, so return the error.
stop(e)
}
}
)
invisible(variable)
}