/
addEsriAddress.R
175 lines (142 loc) · 5.98 KB
/
addEsriAddress.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#' @keywords internal
#' @export
#' @import MazamaCoreUtils
#'
#' @title Add address information to a dataframe
#'
#' @param df dataframe with geolocation information (\emph{e.g.} those created by \code{wrcc_qualityControl()} or \code{airsis_qualityControl})
#' @param lonVar name of longitude variable in the incoming dataframe
#' @param latVar name of the latitude variable in the incoming dataframe
#' @param existingMeta existing 'meta' dataframe from which to obtain metadata for known monitor deployments
#' @description ESRI APIs are used to determine
#' address information associated with the locations specified by the
#' \code{longitude} and \code{latitude} columns of the incoming dataframe.
#' @return Input dataframe with additional columns: \code{siteName, countyName}.
#' @references \url{https://developers.arcgis.com/rest/geocode/api-reference/geocoding-reverse-geocode.htm}
addEsriAddress <- function(
df,
lonVar = "longitude",
latVar = "latitude",
existingMeta = NULL
) {
logger.debug(" ----- addEsriAddress() ----- ")
# Sanity check -- make sure df does not have class "tbl_df" or "tibble"
df <- as.data.frame(df, stringsAsFactors = FALSE)
# Sanity check -- names
if ( !lonVar %in% names(df) || !latVar %in% names(df) ) {
logger.error("Dataframe does not contain columns lonVar='%s' or latVar='%s'", lonVar, latVar)
logger.error("Please specify lonVar and latVar arguments")
stop(paste0("Dataframe does not contain columns lonVar='",lonVar,"' or latVar='",latVar,"'"))
}
# Initialize siteName and countyName columns
if ( is.null(df$siteName) ) df$siteName <- as.character(NA)
if ( is.null(df$countyName) ) df$countyName <- as.character(NA)
# ----- Add siteName from ESRI API ---------------------------
# NOTE: URL looks like:
# NOTE:
# NOTE: https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/reverseGeocode?outSR=4326&returnIntersection=false&location=-121.06092453002931%2C47.255990221115475&f=json
# NOTE:
# NOTE: Successful return looks like:
# {
# "address": {
# "AddNum": "90",
# "Addr_type": "PointAddress",
# "Address": "90 Guzzie Ln",
# "Block": "",
# "City": "Ronald",
# "CountryCode": "USA",
# "District": "",
# "LongLabel": "90 Guzzie Ln, Ronald, WA, 98940, USA",
# "Match_addr": "90 Guzzie Ln, Ronald, Washington, 98940",
# "MetroArea": "",
# "Neighborhood": "",
# "PlaceName": "",
# "Postal": "98940",
# "PostalExt": "",
# "Region": "Washington",
# "Sector": "",
# "ShortLabel": "90 Guzzie Ln",
# "Subregion": "Kittitas County",
# "Territory": "",
# "Type": ""
# },
# "location": {
# "spatialReference": {
# "latestWkid": 4326,
# "wkid": 4326
# },
# "x": -121.06043450738626,
# "y": 47.2559559888915
# }
# }
logger.trace("Getting site names for %s location(s)", nrow(df))
# When siteName is missing, create one similar to AirNow with "locality-route"
for ( i in seq_len(nrow(df)) ) {
# NOTE: monitorID for AIRSIS and WRCC contains location information and will always
# NOTE: be associated with a unique siteName. Reusing metadata will dramatically
# NOTE: decrease the number of API requests we make.
# Check for existing metadata for this monitorID
metadataExists <- FALSE
monitorID <- df[i,'monitorID']
if ( !is.null(existingMeta) ) {
if ( monitorID %in% existingMeta$monitorID ) {
if ( !is.na(existingMeta[monitorID,'siteName']) && existingMeta[monitorID,'siteName'] != "" ) {
metadataExists <- TRUE
}
}
}
if ( metadataExists ) {
# Use existing siteName and countyName if they already exist
logger.trace("\tusing existing metadata for %s", monitorID)
df$siteName[i] <- existingMeta[monitorID,'siteName']
df$countyName[i] <- existingMeta[monitorID, 'countyName']
} else {
# Query ESRI for siteName and countyName
location <- c(df[i,lonVar],df[i,latVar])
logger.trace("\tESRI address request for location = %s, %s", location[1], location[2])
if ( !anyNA(location) ) {
# Always wrap any webservice reqeust
result <- try({
urlBase <- "https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/reverseGeocode?"
location <- paste0("location=",paste0(location[1],",",location[2]),"&")
format <- "f=json"
tokenString <- NULL
if ( !is.null(getEsriToken()) ) {
tokenString <- paste0("&token=", getEsriToken())
}
url <- paste0(urlBase, location, format, tokenString)
# Get and parse the return
r <- httr::GET(url)
if ( httr::http_error(r) ) {
logger.error("ESRI address service failed with: %s", httr::content(r))
logger.error("ESRI address service failed for URL: %s", url)
return(df)
}
returnObj <- httr::content(r)
elements <- names(returnObj$address)
# Create siteName similar to AirNow with "City-Address"
if ( "City" %in% elements && "Address" %in% elements) {
if ( returnObj$address$Address == "" ) {
df$siteName[i] <- paste0(returnObj$address$City)
} else {
df$siteName[i] <- paste0(returnObj$address$City,"-",returnObj$address$Address)
}
} else if ( "City" %in% elements ) {
df$siteName[i] <- paste0(returnObj$address$City)
} else if ( "Address" %in% elements ) {
df$siteName[i] <- paste0(returnObj$address$Address)
}
# Create county
if ( "Subregion" %in% elements ) {
df$countyName[i] <- stringr::str_replace(returnObj$address$Subregion,' County','')
}
}, silent = TRUE)
if ( "try-error" %in% class(result) ) {
logger.trace("\t%s", url)
logger.warn("Unable to add ESRI address: %s", geterrmessage())
}
}
}
}
return(df)
}