S2_download() enhancements:

* `tries` parameter added allowing automatic error handling * `skipExisting` parameter is tri-state now: * `always` downloads the data only if local copy doesn't exist (no matter its size) * `samesize` downlads the data if local copy doesn't exist or has different size * `never` downloads the data no matter if local copy exists * `timeout` now defaults to 1800 (seconds)
IVFL-BOKU · Apr 29, 2019 · 82fd5c0 · 82fd5c0
1 parent 696d664
commit 82fd5c0
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,9 +1,9 @@
 Package: sentinel2
 Title: Tools to access Sentinel-2 data pre-processed by IVFL, BOKU Vienna
-Version: 0.4.3
+Version: 0.5.0
 Authors@R: c(
     person("Sebastian", "Boeck",  email = "sebastian.boeck@boku.ac.at", role = c("aut", "cre")),
-    person("Mateusz",   "Zoltak", email = "mateusz.zoltak@boku.ac.at",  role = c("ctb"))
+    person("Mateusz",   "Zoltak", email = "mateusz.zoltak@boku.ac.at",  role = c("aut"))
   )
 Description: Tools to conveniently query and access pre-processed Sentinel-2
     data. Registration to 'https://s2.boku.eodc.eu' is required for most operations.
@@ -29,4 +29,4 @@ License: GPL-3
 Encoding: UTF-8
 LazyData: true
 VignetteBuilder: knitr
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+# 0.5.0 (2019-04-29)
+
+* `S2_download()` enhancements:
+    * `tries` parameter added allowing automatic error handling
+    * `skipExisting` parameter is tri-state now:
+        * `always` downloads the data only if local copy doesn't exist 
+           (no matter its size)
+        * `samesize`  downlads the data if local copy doesn't exist or has different size
+          (be aware it doesn't work for full granule zip downloads, in such a case it's
+           equivalent to `never`)
+        * `never` downloads the data no matter if local copy exists
+    * `timeout` now defaults to 1800 (seconds) which seems to be big-enough and prevents
+      download jobs from getting stalled
+
 # 0.4.3 (2019-04-26)
 
 * Fixes in `S2_download()`:

diff --git a/R/S2_download.R b/R/S2_download.R
@@ -4,12 +4,15 @@
 #'
 #' @param url character (valid) url to download file from.
 #' @param destfile character download destination.
-#' @param skipExisting logical skip if file already exists.
+#' @param skipExisting character should locally existing files be skipped - one
+#'   of "always", "samesize" (only if remote and local file size matches),
+#'   "never".
 #' @param zip logical if \code{TRUE}, the url will be downloaded as zip archive
 #'   and (automatically) unzipped in the parent directory of 'destfile'
 #'   (plays any role only when downloading granules).
 #' @param progressBar should a progress bar be displayed?
 #' @param timeout single file download timeout in seconds (0 means no timeout)
+#' @param tries how many times try download in case of failures
 #' @param ... further arguments not implemented directly - see
 #'   the \href{https://s2.boku.eodc.eu/wiki/#!granule.md#GET_https://s2.boku.eodc.eu/granule/{granuleId}}{granule API doc}
 #'   and the \href{https://s2.boku.eodc.eu/wiki/#!image.md#GET_https://s2.boku.eodc.eu/image/{imageId}}{image API doc}.
@@ -44,15 +47,16 @@
 #'   )
 #' }
 
-S2_download = function(url, destfile, zip = TRUE, skipExisting = TRUE, progressBar = TRUE, timeout = 0, ...){
+S2_download = function(url, destfile, zip = TRUE, skipExisting = 'samesize', progressBar = TRUE, timeout = 1800, tries = 1, ...){
   url = as.character(url)
   destfile = as.character(destfile)
   stopifnot(
     is.vector(url), length(url) > 0, is.vector(destfile),
-    is.logical(skipExisting),
+    is.vector(skipExisting), is.character(skipExisting), length(skipExisting) == 1, all(!is.na(skipExisting)),
     is.vector(zip), is.logical(zip), length(zip) == 1, all(!is.na(zip)),
     is.vector(progressBar), is.logical(progressBar), length(progressBar) == 1, all(!is.na(progressBar)),
     is.vector(timeout), is.numeric(timeout), length(timeout) == 1, all(!is.na(timeout)),
+    is.vector(tries), is.numeric(tries), length(tries) == 1, all(!is.na(tries) & tries > 0),
     length(url) == length(destfile)
   )
   filter = !is.na(url)
@@ -75,55 +79,75 @@ S2_download = function(url, destfile, zip = TRUE, skipExisting = TRUE, progressB
     url = paste0(url, '?', addParam)
   }
 
-  ch = curl::new_handle()
+  chGet = curl::new_handle()
+  chHead = curl::new_handle(nobody = TRUE)
   if (timeout > 0) {
-    curl::handle_setopt(ch, timeout = timeout)
+    curl::handle_setopt(chGet, timeout = timeout)
+    curl::handle_setopt(chHead, timeout = timeout)
   }
 
   success = rep(FALSE, length(url))
   if (progressBar) {
     pb = utils::txtProgressBar(0, length(url), style = 3)
   }
+  breakLoop = FALSE
   for (i in seq_along(url)) {
-    if (isFALSE(skipExisting) | !file.exists(destfile[i])) {
-      breakLoop = FALSE
+    # short tracks
+    if (breakLoop) {
+      break
+    }
+    if (file.exists(destfile[i]) & skipExisting == 'always') {
+      if (progressBar) {
+        utils::setTxtProgressBar(pb, i)
+      }
+      next
+    }
+    # full track
+    toGo = tries
+    while (toGo > 0 & !success[i] & !breakLoop) {
+      toGo = toGo - 1
       tryCatch(
         {
-          curl::curl_download(url = url[i], destfile = destfile[i], handle = ch, quiet = TRUE)
+          # get expected download length
+          resp = curl::curl_fetch_memory(url[i], chHead)$headers
+          headers = curl::parse_headers(resp)
+          contentLength = c(as.integer(sub('^.* ', '', grep('^content-length: [0-9]+$', headers, value = TRUE, ignore.case = TRUE))), -1L)[1]
+
+          # when needed, perform a download
+          if (!file.exists(destfile[i]) | skipExisting == 'never' | file.size(destfile[i]) != contentLength) {
+            curl::curl_download(url = url[i], destfile = destfile[i], handle = chGet, quiet = TRUE)
+
+            if (file.size(destfile[i]) != contentLength & contentLength >= 0L) {
+              unlink(destfile[i])
+              stop('downloaded file size does not match remote file size')
+            }
 
-          signature = readBin(destfile[i], 'raw', 4)
-          if (all(signature == as.raw(c(80L, 75L, 3L, 4L))) & zip) {
-            destfile[i] = sub('[.]zip$', '', destfile[i])
-            zipfile = paste0(destfile[i], '.zip')
-            file.rename(destfile[i], zipfile)
-            utils::unzip(zipfile = zipfile, exdir = destfile[i])
+            # unpacking zip files
+            signature = readBin(destfile[i], 'raw', 4)
+            if (all(signature == as.raw(c(80L, 75L, 3L, 4L))) & zip) {
+              destfile[i] = sub('[.]zip$', '', destfile[i])
+              zipfile = paste0(destfile[i], '.zip')
+              file.rename(destfile[i], zipfile)
+              utils::unzip(zipfile = zipfile, exdir = destfile[i])
+            }
           }
 
           success[i] = TRUE
         },
         warning = function(w) {
+          # downloaded stopped by a keyboard interrupt
           if (all(w$message == 'Operation was aborted by an application callback')) {
-            if (file.exists(destfile[i])) {
-              unlink(destfile[i])
-            }
             breakLoop <<- TRUE
           }
         },
-        error = function(e) {
-          if (file.exists(destfile[i])) {
-            unlink(destfile[i])
-          }
-        }
+        error = function(e) {}
       )
-      if (breakLoop) {
-        break
-      }
     }
+
     if (progressBar) {
       utils::setTxtProgressBar(pb, i)
     }
   }
-
   return(invisible(success))
 }
 

diff --git a/man/S2_download.Rd b/man/S2_download.Rd
diff --git a/tests/testthat/test-S2_generate_RGB.R b/tests/testthat/test-S2_generate_RGB.R
@@ -5,7 +5,7 @@ test_that('S2_generate_RGB() works', {
     dplyr::filter(band %in% c('B02', 'B03', 'B04')) %>%
     dplyr::arrange(date, band)
   file = tempfile()
-  S2_generate_RGB(imgs$granuleId[1], destfile = file, overwrite = TRUE)
+  S2_generate_RGB(imgs$granuleId[1], atmCorr = TRUE, resolution = 'lowest', destfile = file, overwrite = TRUE)
   expect_true(file.exists(file))
   expect_gt(file.size(file), 1000000)
   unlink(file)

diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
@@ -10,10 +10,34 @@ test_that('S2 downloads images', {
     }
   })
 
-  data = S2_query_image(imageId = 29392766)
+  data = S2_query_image(imageId = 29392766, granuleId = 1380347)
   S2_download(data$url, 'test.jp2')
   expect_true(file.exists('test.jp2'))
-  expect_equal(file.info('test.jp2')$size, 3190469)
+  expect_equal(file.size('test.jp2'), 3190469)
+
+  writeLines('foo', 'test.jp2')
+  S2_download(data$url, 'test.jp2', skipExisting = 'always')
+  expect_equal(file.size('test.jp2'), 4)
+
+  writeLines('foo', 'test.jp2')
+  S2_download(data$url, 'test.jp2', skipExisting = 'samesize')
+  expect_equal(file.size('test.jp2'), 3190469)
+})
+
+test_that('S2 downloads timeout works', {
+  on.exit({
+    if (file.exists('test.jp2')) {
+      unlink('test.jp2')
+    }
+  })
+
+  data = S2_query_image(imageId = 30135471, granuleId = 1437243)
+
+  results = S2_download(data$url, 'test.jp2', timeout = 1, skipExisting = 'never')
+  expect_false(results)
+
+  results = S2_download(data$url, 'test.jp2', timeout = 1, tries = 3, skipExisting = 'never')
+  expect_false(results)
 })
 
 test_that('S2 downloads granules', {
@@ -25,7 +49,7 @@ test_that('S2 downloads granules', {
   }
   tryCatch(
     {
-      S2_download('https://test%40s2.boku.eodc.eu:test@s2.boku.eodc.eu/granule/2920000', destfile = 'testDir', zip = TRUE, skipExisting = FALSE)
+      S2_download('https://test%40s2.boku.eodc.eu:test@s2.boku.eodc.eu/granule/2920000', destfile = 'testDir', zip = TRUE, skipExisting = 'never')
       expect_true(file.exists('testDir/MTD_TL.xml'))
     },
     finally = {