Skip to content

Commit

Permalink
S2_download() enhancements:
Browse files Browse the repository at this point in the history
* `tries` parameter added allowing automatic error handling
* `skipExisting` parameter is tri-state now:
  * `always` downloads the data only if local copy doesn't exist (no matter its size)
  * `samesize`  downlads the data if local copy doesn't exist or has different size
  * `never` downloads the data no matter if local copy exists
* `timeout` now defaults to 1800 (seconds)
  • Loading branch information
zozlak committed Apr 29, 2019
1 parent 696d664 commit 82fd5c0
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 36 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: sentinel2
Title: Tools to access Sentinel-2 data pre-processed by IVFL, BOKU Vienna
Version: 0.4.3
Version: 0.5.0
Authors@R: c(
person("Sebastian", "Boeck", email = "sebastian.boeck@boku.ac.at", role = c("aut", "cre")),
person("Mateusz", "Zoltak", email = "mateusz.zoltak@boku.ac.at", role = c("ctb"))
person("Mateusz", "Zoltak", email = "mateusz.zoltak@boku.ac.at", role = c("aut"))
)
Description: Tools to conveniently query and access pre-processed Sentinel-2
data. Registration to 'https://s2.boku.eodc.eu' is required for most operations.
Expand All @@ -29,4 +29,4 @@ License: GPL-3
Encoding: UTF-8
LazyData: true
VignetteBuilder: knitr
RoxygenNote: 6.1.0
RoxygenNote: 6.1.1
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# 0.5.0 (2019-04-29)

* `S2_download()` enhancements:
* `tries` parameter added allowing automatic error handling
* `skipExisting` parameter is tri-state now:
* `always` downloads the data only if local copy doesn't exist
(no matter its size)
* `samesize` downlads the data if local copy doesn't exist or has different size
(be aware it doesn't work for full granule zip downloads, in such a case it's
equivalent to `never`)
* `never` downloads the data no matter if local copy exists
* `timeout` now defaults to 1800 (seconds) which seems to be big-enough and prevents
download jobs from getting stalled

# 0.4.3 (2019-04-26)

* Fixes in `S2_download()`:
Expand Down
76 changes: 50 additions & 26 deletions R/S2_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
#'
#' @param url character (valid) url to download file from.
#' @param destfile character download destination.
#' @param skipExisting logical skip if file already exists.
#' @param skipExisting character should locally existing files be skipped - one
#' of "always", "samesize" (only if remote and local file size matches),
#' "never".
#' @param zip logical if \code{TRUE}, the url will be downloaded as zip archive
#' and (automatically) unzipped in the parent directory of 'destfile'
#' (plays any role only when downloading granules).
#' @param progressBar should a progress bar be displayed?
#' @param timeout single file download timeout in seconds (0 means no timeout)
#' @param tries how many times try download in case of failures
#' @param ... further arguments not implemented directly - see
#' the \href{https://s2.boku.eodc.eu/wiki/#!granule.md#GET_https://s2.boku.eodc.eu/granule/{granuleId}}{granule API doc}
#' and the \href{https://s2.boku.eodc.eu/wiki/#!image.md#GET_https://s2.boku.eodc.eu/image/{imageId}}{image API doc}.
Expand Down Expand Up @@ -44,15 +47,16 @@
#' )
#' }

S2_download = function(url, destfile, zip = TRUE, skipExisting = TRUE, progressBar = TRUE, timeout = 0, ...){
S2_download = function(url, destfile, zip = TRUE, skipExisting = 'samesize', progressBar = TRUE, timeout = 1800, tries = 1, ...){
url = as.character(url)
destfile = as.character(destfile)
stopifnot(
is.vector(url), length(url) > 0, is.vector(destfile),
is.logical(skipExisting),
is.vector(skipExisting), is.character(skipExisting), length(skipExisting) == 1, all(!is.na(skipExisting)),
is.vector(zip), is.logical(zip), length(zip) == 1, all(!is.na(zip)),
is.vector(progressBar), is.logical(progressBar), length(progressBar) == 1, all(!is.na(progressBar)),
is.vector(timeout), is.numeric(timeout), length(timeout) == 1, all(!is.na(timeout)),
is.vector(tries), is.numeric(tries), length(tries) == 1, all(!is.na(tries) & tries > 0),
length(url) == length(destfile)
)
filter = !is.na(url)
Expand All @@ -75,55 +79,75 @@ S2_download = function(url, destfile, zip = TRUE, skipExisting = TRUE, progressB
url = paste0(url, '?', addParam)
}

ch = curl::new_handle()
chGet = curl::new_handle()
chHead = curl::new_handle(nobody = TRUE)
if (timeout > 0) {
curl::handle_setopt(ch, timeout = timeout)
curl::handle_setopt(chGet, timeout = timeout)
curl::handle_setopt(chHead, timeout = timeout)
}

success = rep(FALSE, length(url))
if (progressBar) {
pb = utils::txtProgressBar(0, length(url), style = 3)
}
breakLoop = FALSE
for (i in seq_along(url)) {
if (isFALSE(skipExisting) | !file.exists(destfile[i])) {
breakLoop = FALSE
# short tracks
if (breakLoop) {
break
}
if (file.exists(destfile[i]) & skipExisting == 'always') {
if (progressBar) {
utils::setTxtProgressBar(pb, i)
}
next
}
# full track
toGo = tries
while (toGo > 0 & !success[i] & !breakLoop) {
toGo = toGo - 1
tryCatch(
{
curl::curl_download(url = url[i], destfile = destfile[i], handle = ch, quiet = TRUE)
# get expected download length
resp = curl::curl_fetch_memory(url[i], chHead)$headers
headers = curl::parse_headers(resp)
contentLength = c(as.integer(sub('^.* ', '', grep('^content-length: [0-9]+$', headers, value = TRUE, ignore.case = TRUE))), -1L)[1]

# when needed, perform a download
if (!file.exists(destfile[i]) | skipExisting == 'never' | file.size(destfile[i]) != contentLength) {
curl::curl_download(url = url[i], destfile = destfile[i], handle = chGet, quiet = TRUE)

if (file.size(destfile[i]) != contentLength & contentLength >= 0L) {
unlink(destfile[i])
stop('downloaded file size does not match remote file size')
}

signature = readBin(destfile[i], 'raw', 4)
if (all(signature == as.raw(c(80L, 75L, 3L, 4L))) & zip) {
destfile[i] = sub('[.]zip$', '', destfile[i])
zipfile = paste0(destfile[i], '.zip')
file.rename(destfile[i], zipfile)
utils::unzip(zipfile = zipfile, exdir = destfile[i])
# unpacking zip files
signature = readBin(destfile[i], 'raw', 4)
if (all(signature == as.raw(c(80L, 75L, 3L, 4L))) & zip) {
destfile[i] = sub('[.]zip$', '', destfile[i])
zipfile = paste0(destfile[i], '.zip')
file.rename(destfile[i], zipfile)
utils::unzip(zipfile = zipfile, exdir = destfile[i])
}
}

success[i] = TRUE
},
warning = function(w) {
# downloaded stopped by a keyboard interrupt
if (all(w$message == 'Operation was aborted by an application callback')) {
if (file.exists(destfile[i])) {
unlink(destfile[i])
}
breakLoop <<- TRUE
}
},
error = function(e) {
if (file.exists(destfile[i])) {
unlink(destfile[i])
}
}
error = function(e) {}
)
if (breakLoop) {
break
}
}

if (progressBar) {
utils::setTxtProgressBar(pb, i)
}
}

return(invisible(success))
}

Expand Down
12 changes: 9 additions & 3 deletions man/S2_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-S2_generate_RGB.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that('S2_generate_RGB() works', {
dplyr::filter(band %in% c('B02', 'B03', 'B04')) %>%
dplyr::arrange(date, band)
file = tempfile()
S2_generate_RGB(imgs$granuleId[1], destfile = file, overwrite = TRUE)
S2_generate_RGB(imgs$granuleId[1], atmCorr = TRUE, resolution = 'lowest', destfile = file, overwrite = TRUE)
expect_true(file.exists(file))
expect_gt(file.size(file), 1000000)
unlink(file)
Expand Down
30 changes: 27 additions & 3 deletions tests/testthat/test-downloads.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,34 @@ test_that('S2 downloads images', {
}
})

data = S2_query_image(imageId = 29392766)
data = S2_query_image(imageId = 29392766, granuleId = 1380347)
S2_download(data$url, 'test.jp2')
expect_true(file.exists('test.jp2'))
expect_equal(file.info('test.jp2')$size, 3190469)
expect_equal(file.size('test.jp2'), 3190469)

writeLines('foo', 'test.jp2')
S2_download(data$url, 'test.jp2', skipExisting = 'always')
expect_equal(file.size('test.jp2'), 4)

writeLines('foo', 'test.jp2')
S2_download(data$url, 'test.jp2', skipExisting = 'samesize')
expect_equal(file.size('test.jp2'), 3190469)
})

test_that('S2 downloads timeout works', {
on.exit({
if (file.exists('test.jp2')) {
unlink('test.jp2')
}
})

data = S2_query_image(imageId = 30135471, granuleId = 1437243)

results = S2_download(data$url, 'test.jp2', timeout = 1, skipExisting = 'never')
expect_false(results)

results = S2_download(data$url, 'test.jp2', timeout = 1, tries = 3, skipExisting = 'never')
expect_false(results)
})

test_that('S2 downloads granules', {
Expand All @@ -25,7 +49,7 @@ test_that('S2 downloads granules', {
}
tryCatch(
{
S2_download('https://test%40s2.boku.eodc.eu:test@s2.boku.eodc.eu/granule/2920000', destfile = 'testDir', zip = TRUE, skipExisting = FALSE)
S2_download('https://test%40s2.boku.eodc.eu:test@s2.boku.eodc.eu/granule/2920000', destfile = 'testDir', zip = TRUE, skipExisting = 'never')
expect_true(file.exists('testDir/MTD_TL.xml'))
},
finally = {
Expand Down

0 comments on commit 82fd5c0

Please sign in to comment.