This repository has been archived by the owner on Jan 20, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcached_get.R
98 lines (94 loc) · 4.36 KB
/
cached_get.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# HTTP GET with caching
#
# Convenience wrapper for web GET operations. Caching, setting the user-agent
# string, and basic checking of the result are handled.
#
# @param url string: the url of the page to retrieve
# @param type string: the expected content type. Either "text" (default),
# "json", or "filename" (this caches the content directly to a file and
# returns the filename without attempting to read it in)
# @param caching string: caching behaviour, by default from
# ala_config()$caching
# @param on_redirect, on_server_error, on_client_error function: passed to
# check_status_code()
# @return for type == "text" the content is returned as text. For
# type == "json", the content is parsed using jsonlite::fromJSON.
# For "filename", the name of the stored file is returned.
# @details Depending on the value of caching, the page is either retrieved from
# the cache or from the url, and stored in the cache if appropriate.
# The user-agent string is set according to ala_config()$user_agent.
# The returned response (if not from cached file) is also passed to
# check_status_code().
# @references \url{https://api.ala.org.au/}
# @examples
#
# out <- cached_get(url = "https://biocache-ws.ala.org.au/ws/index/fields",
# type = "json")
#
cached_get <- function(url, type = "text", caching = ala_config()$caching,
verbose = ala_config()$verbose, on_redirect = NULL,
on_client_error = NULL, on_server_error = NULL,
encoding = ala_config()$text_encoding) {
assert_that(is.notempty.string(url))
assert_that(is.string(type))
type <- match.arg(tolower(type),
c("text", "json", "filename", "binary_filename"))
assert_that(is.string(caching))
caching <- match.arg(tolower(caching), c("on", "off", "refresh"))
assert_that(is.flag(verbose))
## strip newlines or multiple spaces from url: these seem to cause
## unexpected behaviour
url <- str_replace_all(url, "[\r\n ]+", " ")
if (nchar(url) > getOption("ALA4R_server_config")$server_max_url_length) {
warning("URL length may be longer than is allowed by the server")
}
## For consistency and simplicity we always download to file. When caching
## is "off" or "refresh" the cached file will be re-downloaded each time.
if (identical(caching, "off") &&
!(type %in% c("filename", "binary_filename"))) {
if (verbose) message(sprintf("GETting URL %s", url))
this_outfile <- tempfile()
} else {
this_outfile <- ala_cache_filename(url)
}
## use caching
thisfile <- download_to_file(url, outfile = this_outfile,
binary_file = identical(type,
"binary_filename"),
verbose = verbose, on_redirect = on_redirect,
on_client_error = on_client_error,
on_server_error = on_server_error)
if (!file.exists(thisfile)) {
## file does not exist
NULL
} else {
if (type %in% c("json", "text")) {
if (!(file.info(thisfile)$size > 0)) {
NULL
} else {
## for json, previously we read as string first, then passed
## this to fromJSON. This was compatible with either RJSON's or
## rjsonlite's version of fromJSON (only RJSON's version can
## take a filename directly). However, it introduced issues
## with readLines and handling of encoding. Now we pass the
## file directly to jsonlite::fromJSON
if (type == "json") {
## convert directly from file - this also allows jsonlite
## to handle encoding issues
jsonlite::fromJSON(thisfile)
} else {
fid <- file(thisfile, "rt")
out <- readLines(fid, warn = FALSE, encoding = encoding)
close(fid)
out
}
}
} else if (type %in% c("filename", "binary_filename")) {
thisfile
} else {
## should not be here! did we add an allowed type to the arguments
## without adding handler code down here?
stop(sprintf("unrecognized type %s in cached_get", type))
}
}
}