-
Notifications
You must be signed in to change notification settings - Fork 0
/
ReadNWIS.jl
358 lines (310 loc) · 11.9 KB
/
ReadNWIS.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# Functions to go from NWIS URL to data
struct FunctionNotDefinedException <: Exception
var::String
end
"""
readNWISdv(siteNumbers, parameterCd;
startDate="", endDate="", statCd="00003", format="rdb")
Function to obtain daily value data from the NWIS web service.
# Examples
```jldoctest
julia> df, response = readNWISdv("01646500", "00060",
startDate="2010-10-01", endDate="2010-10-01");
julia> df # df contains the formatted data as a DataFrame
1×5 DataFrame
Row │ agency_cd site_no datetime 68478_00060_00003 68478_00060_00003_c ⋯
│ String7 String15 String15 String7 String3 ⋯
─────┼──────────────────────────────────────────────────────────────────────────
1 │ USGS 01646500 2010-10-01 13100 A ⋯
1 column omitted
julia> typeof(response) # response is the unmodified HTTP GET response object
HTTP.Messages.Response
```
"""
function readNWISdv(siteNumbers, parameterCd;
startDate="", endDate="", statCd="00003", format="rdb")
# construct the query URL
url = constructNWISURL(
siteNumbers,
parameterCd = parameterCd,
startDate = startDate,
endDate = endDate,
service = "dv",
statCd = statCd,
format = format,
expanded = true,
ratingType = "base",
statReportType = "daily",
statType = "mean"
)
# use the readNWIS function to query and return the data
df, response = readNWIS(url)
return df, response
end
"""
readNWISpCode(parameterCd)
Function to obtain parameter code information from the NWIS web service.
As currently implemented, support for multiple parameter codes is not included.
# Examples
```jldoctest
julia> df, response = readNWISpCode("00060");
julia> df # df contains the formatted data as a DataFrame
1×13 DataFrame
Row │ parameter_cd group parm_nm epa_equivalen ⋯
│ String7 String15 String String15 ⋯
─────┼──────────────────────────────────────────────────────────────────────────
1 │ 00060 Physical Discharge, cubic feet per second Not checked ⋯
10 columns omitted
julia> typeof(response) # response is the unmodified HTTP GET response object
HTTP.Messages.Response
```
"""
function readNWISpCode(parameterCd)
# construct the query URL
url = constructNWISURL(
"",
parameterCd = parameterCd,
startDate = "",
endDate = "",
service = "pCode",
statCd = "",
format = "rdb",
expanded = true,
ratingType = "",
statReportType = "",
statType = ""
)
# use the readNWIS function to query and return the data
df, response = readNWIS(url)
return df, response
end
"""
readNWISqw(siteNumbers;
startDate="", endDate="", format="rdb", expanded=true)
Function to obtain water quality data from the NWIS web service.
"""
function readNWISqw(siteNumbers;
startDate="", endDate="", format="rdb", expanded=true)
# throw error as functionality doesn't work yet...
throw(FunctionNotDefinedException(
"qwdata service querying functionality has not been developed yet."))
# construct the query URL
url = constructNWISURL(
siteNumbers,
parameterCd = "",
startDate = startDate,
endDate = endDate,
service = "qw",
statCd = "",
format = format,
expanded = expanded,
ratingType = "",
statReportType = "",
statType = ""
)
# use the readNWIS function to query and return the data
df, response = readNWIS(url)
return df, response
end
"""
readNWISqwdata(siteNumbers;
startDate="", endDate="", format="rdb", expanded=true)
Alias to `readNWISqw()`.
"""
function readNWISqwdata(siteNumbers;
startDate="", endDate="", format="rdb", expanded=true)
return readNWISqw(siteNumbers;
startDate=startDate, endDate=endDate, format=format,
expanded=expanded)
end
"""
readNWISsite(siteNumbers)
Function to obtain site information from the NWIS web service.
# Examples
```jldoctest
julia> df, response = readNWISsite("05114000");
julia> df # df contains the formatted data as a DataFrame
1×12 DataFrame
Row │ agency_cd site_no station_nm site_tp_cd dec_lat_ ⋯
│ String7 String15 String31 String3 String15 ⋯
─────┼──────────────────────────────────────────────────────────────────────────
1 │ USGS 05114000 SOURIS RIVER NR SHERWOOD, ND ST 48.99001 ⋯
8 columns omitted
julia> typeof(response) # response is the unmodified HTTP GET response object
HTTP.Messages.Response
```
"""
function readNWISsite(siteNumbers)
# construct the query URL
url = constructNWISURL(
siteNumbers,
parameterCd = "",
startDate = "",
endDate = "",
service = "site",
statCd = "",
format = "rdb",
expanded = true,
ratingType = "",
statReportType = "",
statType = ""
)
# use the readNWIS function to query and return the data
df, response = readNWIS(url)
return df, response
end
"""
readNWISunit(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
Function to obtain instantaneous value data from the NWIS web service.
# Examples
```jldoctest
julia> df, response = readNWISunit("01646500", "00060",
startDate="2022-12-29",
endDate="2022-12-29");
julia> first(df) # df contains the formatted data as a DataFrame
DataFrameRow
Row │ agency_cd site_no datetime tz_cd 69928_00060 69928_0006 ⋯
│ String7 String15 String31 String3 String7 String3 ⋯
─────┼──────────────────────────────────────────────────────────────────────────
1 │ USGS 01646500 2022-12-29 00:00 EST 12700 P ⋯
1 column omitted
julia> typeof(response) # response is the unmodified HTTP GET response object
HTTP.Messages.Response
```
"""
function readNWISunit(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
# construct the query URL
url = constructNWISURL(
siteNumbers,
parameterCd = parameterCd,
startDate = startDate,
endDate = endDate,
service = "uv",
statCd = "",
format = format,
expanded = true,
ratingType = "",
statReportType = "",
statType = ""
)
# use the readNWIS function to query and return the data
df, response = readNWIS(url)
return df, response
end
"""
readNWISuv(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
Alias for `readNWISunit()`.
"""
function readNWISuv(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
return readNWISunit(siteNumbers, parameterCd;
startDate=startDate, endDate=endDate, format=format)
end
"""
readNWISiv(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
Alias for `readNWISunit()`.
"""
function readNWISiv(siteNumbers, parameterCd;
startDate="", endDate="", format="rdb")
return readNWISunit(siteNumbers, parameterCd;
startDate=startDate, endDate=endDate, format=format)
end
"""
readNWIS(obs_url)
Function to take an NWIS url (typically constructed using the
`constructNWISURL()` function) and return the associated data.
"""
function readNWIS(obs_url)
# do the API GET query
response = _custom_get(obs_url)
# then, depending on the URL, do different things
if occursin("rdb", obs_url) == true
df = _readRDB(response)
elseif occursin("json", obs_url) == true
df = _readJSON(response)
elseif occursin("waterml", obs_url) == true
df = _readWaterML(response)
else
# get portion of URL associated with return format
fmt_str = split(split(obs_url, "format")[2], "&")[1]
# throw the associated informative error
throw(ArgumentError(
"Format, $fmt_str, is not currently recognized or handled by DataRetrieval.jl"
))
end
return df, response
end
"""
_readRDB(response)
Private function to parse the API response from an RDB query.
R has additional functionality of being able to specify a timezone when
data is that granular, could add this too.
"""
function _readRDB(response)
# read the response body into a dataframe
df = DataFrame(CSV.File(response.body; comment="#"))
if "datetime" in names(df)
# filter based on date-time column
df = filter(:datetime => x -> length(x) >= 10, df)
elseif "dec_lat_va" in names(df)
# filter based on some latitude length expectation
df = filter(:dec_lat_va => x -> length(x) >= 4, df)
elseif "parameter_cd" in names(df)
# filter based on some parameter code length expectation
df = filter(:parameter_cd => x -> length(x) >= 4, df)
else
println("no datetime, latitude, or parameter_cd column found, returning all data")
end
# return the data frame
return df
end
"""
_readWaterML(response)
Private function to parse the response body buffer object from a WaterML query.
"""
function _readWaterML(response)
# throw error as functionality doesn't work yet...
throw(FunctionNotDefinedException("WaterML format not yet supported."))
body = String(response.body)
# parse xml content
data = parsexml(body)
# need to write intelligent code to parse the xml content into a data frame
end
"""
_readJSON(response)
Private function to parse the response body buffer object from a JSON query.
"""
function _readJSON(response)
# read JSON
dict = JSON.parse(String(response.body))
# get and munge the data into a data frame
merged_df = DataFrame()
for timeseries in dict["value"]["timeSeries"]
site_no = timeseries["sourceInfo"]["siteCode"][1]["value"]
param_cd = timeseries["variable"]["variableCode"][1]["value"]
for parameter in timeseries["values"]
col_name = param_cd
record_json = parameter["value"]
if record_json == ""
continue
end
record_df = DataFrame(record_json)
# assign the site number
record_df.site_no .= site_no
# adjust qualifiers to be the string
record_df.qualifiers .= [join(x, ",") for x in record_df.qualifiers]
# convert the values to floats
record_df.value .= [parse(Float64, x) for x in record_df.value]
# rename the columns
rename!(record_df, :value => col_name)
rename!(record_df, :dateTime => :datetime)
merged_df = vcat(merged_df, record_df)
end
end
# return the data frame
return merged_df
end