/
set_fct.R
78 lines (61 loc) · 2.25 KB
/
set_fct.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#' @rdname set_fct.data.frame
#' @export
set_fct <- function(.data, ..., first_level = NULL, order_fct = FALSE, labels = NULL, max_levels = Inf){
UseMethod("set_fct", .data)
}
#' set factor
#'
#' allows option to manually set the first level of the factor, for consistency with
#' yardstick which automatically considers the first level
#' as the "positive class" when evaluating classification.
#'
#' @method set_fct data.frame
#' @param .data dataframe
#' @param ... tidyselect (default selection: all character columns)
#' @param first_level character string to set the first level of the factor
#' @param labels chr vector of labels, length equal to factor levels
#' @param order_fct logical. ordered factor?
#' @param max_levels integer. uses \code{\link[forcats]{fct_lump_n}} to limit the number of categories. Only the top \code{max_levels} are preserved, and the rest being lumped into "other"
#'
#' @return tibble
#' @export
#'
#' @examples
#'
#' ## simply set the first level of a factor
#'
#' iris$Species %>% levels
#'
#' iris %>%
#' set_fct(Species, first_level = "virginica") %>%
#' dplyr::pull(Species) %>%
#' levels()
set_fct.data.frame <- function(.data, ..., first_level = NULL, order_fct = FALSE, max_levels = Inf){
.data %>%
select_otherwise(..., otherwise = where(is.character), return_type = "names") -> nms
if (!is.null(first_level)) {
first_level <- as.character(first_level)
}
.data %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(nms), .fns = ~set_fct(., first_level = first_level,
order_fct = order_fct,
max_levels = max_levels) ))
}
#' @rdname set_fct.data.frame
#' @method set_fct default
#' @export
set_fct.default <- function(.data, ..., first_level = NULL, order_fct = FALSE, max_levels = Inf){
.data %>%
factor(ordered = order_fct) %>%
forcats::fct_relevel(first_level, after = 0L) %>%
forcats::fct_lump(n = max_levels, ties.method = "first")
}
#' is_probability
#'
#' @param x numeric vector
#' @keywords internal
#'
#' @return logical
is_probability <- function(x){
is.double(x) && all(dplyr::between(x, 0, 1), na.rm = T) & dplyr::n_distinct(x) > 2
}