-
Notifications
You must be signed in to change notification settings - Fork 6
/
filter_obs.R
165 lines (151 loc) · 5.43 KB
/
filter_obs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#' @title Filter observation list to exclude situations, variables or dates
#'
#' @inheritParams estim_param
#' @param var (optional, if not given all variables will be kept) Vector containing the names of the variables to include or exclude
#' @param situation (optional, if not given all situations will be kept) Vector containing the names of the situations to include or exclude
#' @param dates (optional, if not given all dates will be kept) Vector containing the dates (POSIXct format) to include or exclude
#' @param include (optional, FALSE by default) Flag indicating if the variables / situations / dates listed in inputs must be included (TRUE) or not (FALSE) in the resulting list
#' @param var_names `r lifecycle::badge("deprecated")` `var_names` is no
#' longer supported, use `var` instead.
#' @param sit_names `r lifecycle::badge("deprecated")` `sit_names` is no
#' longer supported, use `situation` instead.
#'
#' @return obs_list List of filtered observed values (same format as `obs_list` input argument)
#'
#' @seealso For more detail and examples, see the different vignettes in
#' [CroptimizR website](https://sticsrpacks.github.io/CroptimizR/)
#'
#' @export
#'
#' @importFrom rlang .data
#'
#' @examples
#'
#' obs_list <- list(
#' sit1 = data.frame(
#' Date = as.POSIXct(c("2009-11-30", "2009-12-10")),
#' var1 = c(1.1, 1.5), var2 = c(NA, 2.1)
#' ),
#' sit2 = data.frame(
#' Date = as.POSIXct(c("2009-11-30", "2009-12-5")),
#' var1 = c(1.3, 2)
#' )
#' )
#'
#' # Keep only var1
#' filter_obs(obs_list, var = c("var1"), include = TRUE)
#'
#' # Exclude observations at date "2009-11-30"
#' filter_obs(obs_list, dates = as.POSIXct(c("2009-11-30")))
#'
filter_obs <- function(obs_list, var = NULL, situation = NULL, dates = NULL,
include = FALSE, var_names = lifecycle::deprecated(),
sit_names = lifecycle::deprecated()) {
# Managing parameter names changes between versions:
if (lifecycle::is_present(sit_names)) {
lifecycle::deprecate_warn("0.5.0", "filter_obs(sit_names)", "filter_obs(situation)")
} else {
sit_names <- situation # to remove when we update inside the function
}
if (lifecycle::is_present(var_names)) {
lifecycle::deprecate_warn("0.5.0", "filter_obs(var_names)", "filter_obs(var)")
} else {
var_names <- var # to remove when we update inside the function
}
# Check obs_list format
if (is.null(obs_list)) {
stop("obs_list is NULL.")
}
if (!is.obs(obs_list)) {
stop("Incorrect format for argument obs_list.")
}
# Filter Situations
## check that sit_names are in obs_list
if (!is.null(sit_names)) {
tmp <- intersect(sit_names, names(obs_list))
if (is.null(tmp) || !setequal(tmp, sit_names)) {
warning("Argument sit_names contains situations that are not included in obs_list. \n obs_list contains: ", paste(names(obs_list), collapse = " "))
sit_names <- tmp
}
## Filter
if (include) {
obs_list <- obs_list[sit_names]
} else {
obs_list[sit_names] <- NULL
if (length(obs_list) == 0) {
warning("All situations have been excluded from the list")
return(NULL)
}
}
}
# Transform obs_list in a data.frame for easier filtering of var and dates
df <- dplyr::bind_rows(obs_list, .id = "id")
# Filter Variables
## check that var_names are in obs_list
if (!is.null(var_names)) {
tmp <- intersect(var_names, names(df))
if (is.null(tmp) || !setequal(tmp, var_names)) {
warning("Argument var_names contains variables that are not included in obs_list. \n obs_list contains: ", paste(colnames(df), collapse = " "))
if (length(tmp) == 0) {
return(list())
} # If variable does not exist at all, return empty df
var_names <- tmp
}
## Filter
if (include) {
keep <- c("id", "Date", intersect("Plant", names(df)))
df <- df[, c(keep, var_names)]
} else {
df[var_names] <- NULL
if (ncol(df) == 2) {
warning("All variables have been excluded from the list")
return(NULL)
}
}
}
# Filter Dates
## check that dates are in obs_list
if (!is.null(dates)) {
included <- sapply(dates, function(x) any(df$Date == x))
if (!all(c = included)) {
warning("Argument dates contains dates that are not included in obs_list: ", paste(dates[!included], collapse = " "))
dates <- dates[included]
}
## Filter
if (include) {
df <- dplyr::filter(df, .data$Date == dates)
} else {
df <- dplyr::filter(df, .data$Date != dates)
if (nrow(df) == 0) {
warning("All dates have been excluded from the list.")
return(NULL)
}
}
}
if ("Plant" %in% colnames(df)) {
skeepcols <- 3
} else {
skeepcols <- 2
}
# Remove rows with only NAs:
df <- df[rowSums(is.na(df[, (skeepcols + 1):ncol(df), drop = FALSE])) != (ncol(df) - skeepcols), ]
if (!all(names(obs_list) %in% unique(df$id))) {
warning(
"No observations found in situation(s) ",
paste(names(obs_list)[!(names(obs_list) %in% unique(df$id))], collapse = ", ")
)
}
# Re-transform the df into a list
obs_list <- split(df, df$id)
# Remove column "id" and remove columns with only NAs:
obs_list <- lapply(
obs_list,
function(x) {
select(x, !.data$id & where(~ !all(is.na(.x))))
}
)
return(obs_list)
}
# Remove when tidyselect exports where() (very soon),
# see https://github.com/r-lib/tidyselect/issues/244
utils::globalVariables("where")