-
Notifications
You must be signed in to change notification settings - Fork 1
/
monitor_loadLatest.R
176 lines (142 loc) · 5.66 KB
/
monitor_loadLatest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#' @export
#'
#' @title Load most recent monitoring data from all sources
#'
#' @param archiveBaseUrl Base URL for monitoring v2 data files.
#' @param archiveBaseDir Local base directory for monitoring v2 data files.
#' @param QC_negativeValues Type of QC to apply to negative values.
#'
#' @return A \emph{mts_monitor} object with PM2.5 monitoring data. (A list with
#' \code{meta} and \code{data} dataframes.)
#'
#' @description Combine recent data from AirNow, AIRSIS and WRCC:
#'
#' If \code{archiveDataDir} is defined, data will be loaded from this local
#' archive. Otherwise, data will be loaded from the monitoring data repository
#' maintained by the USFS AirFire team.
#'
#' The files loaded by this function are updated multiple times an hour and
#' contain data for the previous 10 days.
#'
#' For daily updates covering the most recent 45 days, use \code{monitor_loadDaily()}.
#'
#' For data extended more than 45 days into the past, use \code{monitor_load()}.
#'
#' @note This function guarantees that only a single time series will be
#' associated with each \code{locationID} using the following logic:
#' \enumerate{
#' \item{AirNow data takes precedence over data from AIRSIS or WRCC}
#' \item{more recent data takes precedence over older data}
#' }
#' This relevant mostly for "temporary" monitors which may be replaced after they
#' are initially deployed. If you want access to all device deployments associated
#' with a specific \code{locationID}, you can use the provider specific functions:
#' \code{\link{airnow_loadLatest}},
#' \code{\link{airsis_loadLatest}} and
#' \code{\link{wrcc_loadLatest}}
#'
# #' @seealso \code{\link{monitor_load}}
#' @seealso \code{\link{monitor_loadAnnual}}
#' @seealso \code{\link{monitor_loadDaily}}
#' @examples
#' \dontrun{
#' library(AirMonitor)
#' # Fail gracefully if any resources are not available
#' try({
#'
#' monitor_loadLatest() %>%
#' monitor_filter(stateCode %in% CONUS) %>%
#' monitor_leaflet()
#'
#' }, silent = FALSE)
#' }
monitor_loadLatest <- function(
archiveBaseUrl = paste0(
"https://airfire-data-exports.s3.us-west-2.amazonaws.com/",
"monitoring/v2"
),
archiveBaseDir = NULL,
QC_negativeValues = c("zero", "na", "ignore")
) {
parameterName <- "PM2.5"
# ----- Validate parameters --------------------------------------------------
QC_negativeValues <- match.arg(QC_negativeValues)
if ( is.null(archiveBaseUrl) && is.null(archiveBaseDir) )
stop("one of 'archiveBaseUrl' or 'archiveBaseDir' must be defined")
# ----- Load data ------------------------------------------------------------
monitorList <- list()
try({
monitorList[["airnow"]] <-
airnow_loadLatest(archiveBaseUrl, archiveBaseDir, QC_negativeValues, parameterName) %>%
monitor_dropEmpty()
}, silent = TRUE)
try({
monitorList[["airsis"]] <-
airsis_loadLatest(archiveBaseUrl, archiveBaseDir, QC_negativeValues) %>%
monitor_dropEmpty()
}, silent = TRUE)
try({
monitorList[["wrcc"]] <-
wrcc_loadLatest(archiveBaseUrl, archiveBaseDir, QC_negativeValues) %>%
monitor_dropEmpty()
}, silent = TRUE)
# ----- Remove older deployments ---------------------------------------------
for ( name in names(monitorList) ) {
monitor <- monitorList[[name]]
# Find locations with multiple deployments
duplicateLocationIDs <-
monitor$meta$locationID[duplicated(monitor$meta$locationID)] %>%
unique()
# Filter to include only locations with multiple deployments
monitor <-
monitor %>%
monitor_filter(.data$locationID %in% duplicateLocationIDs)
# Find last valid datum for each deployment (see monitor_getCurrentStatus.R)
monitor$meta$lastValidIndex <-
# Start with data
monitor$data %>%
# Ensure rows are arranged by datetime and then remove 'datetime'
dplyr::arrange(.data$datetime) %>%
dplyr::select(-.data$datetime) %>%
# Find last non-NA index
apply(2, function(x) { rev(which(!is.na(x)))[1] })
# Find deployments to be removed
deploymentList <- list()
for (locationID in duplicateLocationIDs) {
latestValid <-
monitor$meta %>%
dplyr::filter(.data$locationID == !!locationID) %>%
dplyr::pull(.data$lastValidIndex) %>%
max()
deploymentList[[locationID]] <-
monitor$meta %>%
dplyr::filter(.data$locationID == !!locationID) %>%
dplyr::filter(.data$lastValidIndex != !!latestValid) %>%
dplyr::pull(.data$deviceDeploymentID)
}
deploymentsToRemove <- unlist(deploymentList)
# Replace monitor object with only the most recent deployments
deploymentsToRetain <-
setdiff(monitorList[[name]]$meta$deviceDeploymentID, deploymentsToRemove)
monitorList[[name]] <-
monitorList[[name]] %>%
monitor_select(deploymentsToRetain)
# NOTE: Some locations like the Rocky Mtn Fire Cache will have multiple
# NOTE: monitors all producing data at the same time. in this case, we
# NOTE: rely on dplyr::distinct() below to simply pick the first one.
}
# ----- Remove duplicate locations -------------------------------------------
# NOTE: Whenever we have multiple monitors reporting from the same location,
# NOTE: we always favor the data fom AirNow over AIRSIS and WRCC.
# NOTE: Because airnow comes first in monitorList, AirNow data
# NOTE: will be preferentially retained.
monitor_all <-
monitor_combine(monitorList)
ids <-
monitor_all$meta %>%
dplyr::distinct(.data$locationID, .keep_all = TRUE) %>%
dplyr::pull(.data$deviceDeploymentID)
monitor <- monitor_all %>% monitor_select(ids)
# ----- Return ---------------------------------------------------------------
return(monitor)
}