Skip to content

Commit

Permalink
Merge branch 'master' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
wibeasley committed Feb 2, 2020
2 parents 9d22dad + 0b3d67c commit ffd786f
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 35 deletions.
22 changes: 17 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,33 @@ Authors@R: c(
person(
"Will", "Beasley",
role=c("aut", "cre"),
email="wibeasley@hotmail.com",
email="wibeasley@hotmail.com",
comment = c(ORCID = "0000-0002-5613-5006")
),
person(
"Thomas J.", "Leeper",
role = c("aut"),
role = c("aut"),
email = "thosjleeper@gmail.com",
comment = c(ORCID = "0000-0003-4097-6326")
),
person(
"Philip", "Durbin",
role = c("aut"),
role = c("aut"),
email = "philipdurbin@gmail.com",
comment = c(ORCID = "0000-0002-9528-9470")
),
person(
"Shiro", "Kuriwaki",
role = c("aut"),
email = "shirokuriwaki@gmail.com",
comment = c(ORCID = "0000-0002-5687-2647")
),
person(
"Sebastian", "Karcher",
role=c("aut"),
email="karcher@u.northwestern.edu",
comment = c(ORCID = "0000-0001-8249-7388")
),
person(
"Jan", "Kanis", role = "ctb"
))
Expand All @@ -39,8 +51,8 @@ Suggests:
testthat,
UNF,
yaml
Description: Provides access to Dataverse version 4 APIs <https://dataverse.org/>,
enabling data search, retrieval, and deposit. For Dataverse versions <= 4.0,
Description: Provides access to Dataverse version 4 APIs <https://dataverse.org/>,
enabling data search, retrieval, and deposit. For Dataverse versions <= 4.0,
use the deprecated 'dvn' package <https://cran.r-project.org/package=dvn>.
License: GPL-2
URL: https://github.com/iqss/dataverse-client-r
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# CHANGES TO dataverse 0.2.2 (upcoming)

* Make filter queries (fq) work in `dataverse_search` (#36 @adam3smith)
* Update maintainer to Will Beasley (wibeasley@hotmail.com) (#38)
* More robust file retrieval (#39 @kuriwaki)
* Tests use https://demo.dataverse.org/dataverse/dataverse-client-r/. (#40)
* Fixes most get_file errors by removing query argument (#33 kuriwaki)
* Fix getting multiple files by id in `get_file()` (#47 @adam3smith)

# CHANGES TO dataverse 0.2.1

Expand Down
7 changes: 4 additions & 3 deletions R/dataverse_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ function(...,
query[["show_relevance"]] <- show_relevance
## show_facets
query[["show_facets"]] <- show_facets
## fq
if (!is.null(start)) {
query[["fq"]] <- match.arg(fq)
## fq
# we're passing the unencoded fq string on to the API using I() as the API doesn't handle encoded strings properly
if (!is.null(fq)) {
query[["fq"]] <- I(fq)
}

# setup URL
Expand Down
69 changes: 43 additions & 26 deletions R/get_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' @title Download File(s)
#' @description Download Dataverse File(s)
#' @details This function provides access to data files from a Dataverse entry.
#' @param file An integer specifying a file identifier; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.
#' @param file An integer specifying a file identifier; or a vector of integers specifying file identifiers; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.
#' @template ds
#' @param format A character string specifying a file format. For \code{get_file}: by default, this is \dQuote{original} (the original file format). If \dQuote{RData} or \dQuote{prep} is used, an alternative is returned. If \dQuote{bundle}, a compressed directory containing a bundle of file formats is returned. For \code{get_file_metadata}, this is \dQuote{ddi}.
#' @param vars A character vector specifying one or more variable names, used to extract a subset of the data.
Expand Down Expand Up @@ -30,6 +30,9 @@
#' flist <- dataset_files(2692151)
#' get_file(flist[[2]])
#'
#' # retrieve all files in a dataset in their original format (returns a list of raw vectors)
#' file_ids <- get_dataset("doi:10.7910/DVN/CXOB4K")[['files']]$id
#' f3 <- get_file(file_ids, format = "original")
#' # read file as data.frame
#' if (require("rio")) {
#' tmp <- tempfile(fileext = ".dta")
Expand All @@ -54,6 +57,10 @@ get_file <-
server = Sys.getenv("DATAVERSE_SERVER"),
...) {
format <- match.arg(format)

# single file ID
if (is.numeric(file))
fileid <- file

# get file ID from 'dataset'
if (!is.numeric(file)) {
Expand All @@ -67,35 +74,37 @@ get_file <-
} else {
fileid <- file
}


# request multiple files -----
if (length(fileid) > 1) {
fileid <- paste0(fileid, collapse = ",")
u <- paste0(api_url(server), "access/datafiles/", file)
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
tempf <- tempfile(fileext = ".zip")
tempd <- tempfile()
dir.create(tempd)
on.exit(unlink(tempf), add = TRUE)
on.exit(unlink(tempd), add = TRUE)
writeBin(httr::content(r, as = "raw"), tempf)
to_extract <- utils::unzip(tempf, list = TRUE)
out <- lapply(to_extract$Name[to_extract$Name != "MANIFEST.TXT"], function(zipf) {
utils::unzip(zipfile = tempf, files = zipf, exdir = tempd)
readBin(file.path(tempd, zipf), "raw", n = 1e8)
})
return(out)
}
# # request multiple files -----
# if (length(fileid) > 1) {
# fileid <- paste0(fileid, collapse = ",")
# u <- paste0(api_url(server), "access/datafiles/", fileid)
# r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
# httr::stop_for_status(r)
# tempf <- tempfile(fileext = ".zip")
# tempd <- tempfile()
# dir.create(tempd)
# on.exit(unlink(tempf), add = TRUE)
# on.exit(unlink(tempd), add = TRUE)
# writeBin(httr::content(r, as = "raw"), tempf)
# to_extract <- utils::unzip(tempf, list = TRUE)
# out <- lapply(to_extract$Name[to_extract$Name != "MANIFEST.TXT"], function(zipf) {
# utils::unzip(zipfile = tempf, files = zipf, exdir = tempd)
# readBin(file.path(tempd, zipf), "raw", n = 1e8)
# })
# return(out)
# }

# request single file -----
if (length(fileid) == 1) {
# downloading files sequentially and add the raw vectors to a list
out <- vector("list", length(fileid))
for (i in 1:length(fileid)) {
if (format == "bundle") {
u <- paste0(api_url(server), "access/datafile/bundle/", fileid)
u <- paste0(api_url(server), "access/datafile/bundle/", fileid[i])
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
}
if (format != "bundle") {
u <- paste0(api_url(server), "access/datafile/", fileid)
u <- paste0(api_url(server), "access/datafile/", fileid[i])
query <- list()
if (!is.null(vars)) {
query$vars <- paste0(vars, collapse = ",")
Expand All @@ -106,15 +115,23 @@ get_file <-

# request single file in non-bundle format ----
# add query if ingesting a tab (detect from original file name)
if (length(query) == 1 & grepl("\\.tab$", file)) {
if (length(query) == 1 & grepl("\\.tab$", file[i])) {
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...)
} else {
# do not add query if not an ingestion file
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
}
}
httr::stop_for_status(r)
return(httr::content(r, as = "raw"))
out[[i]] <- httr::content(r, as = "raw")
}
# return the raw vector if there's a single file
if (length(out) == 1) {
return (out[[1]])
}
else {
# return a list of raw vectors otherwise
return (out)
}
}

Expand Down
5 changes: 4 additions & 1 deletion man/files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions tests/testthat/tests-get_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,22 @@ test_that("download file from file id", {
expect_true(is.raw(actual))
expect_true(1000 < object.size(actual)) # Should be 1+ KB
})

test_that("download multiple files with file id - no folder", {
file_ids <- get_dataset("doi:10.70122/FK2/LZAJEQ", server = "demo.dataverse.org")[['files']]$id
actual <- get_file(
file_ids,
format="original",
server = "demo.dataverse.org")
expect_true(length(actual) == 2) # two files in the dataset
expect_true(is.raw(actual[[2]]))
expect_true(object.size(actual[[2]]) > 300) # Should be >300 B
})

test_that("download multiple files with file id - with folders", {
file_ids <- get_dataset("doi:10.70122/FK2/V54HGA", server = "demo.dataverse.org")[['files']]$id
actual <- get_file(file_ids, format="original", server = "demo.dataverse.org")
expect_true(length(actual) == 2) # two files in the dataset
expect_true(is.raw(actual[[2]]))
expect_true(object.size(actual[[2]]) > 70) # Should be >70 B
})
16 changes: 16 additions & 0 deletions tests/testthat/tests-search.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,19 @@ test_that("named argument search", {
test_that("simple search w/type argument", {
expect_true(is.data.frame(dataverse_search(author = "Gary King", type = "dataset", key = "", server = "dataverse.harvard.edu")))
})

test_that("date range search using fq", {
expect_true(is.data.frame(dataverse_search("*", fq = "dateSort:[2018-01-01T00:00:00Z+TO+2019-01-01T00:00:00Z]", type = "dataset", key = "", server = "dataverse.harvard.edu")))
})

test_that("publication year using fq", {
expect_true(is.data.frame(dataverse_search("*", fq = "publicationDate:2018", type = "dataset", key = "", server = "dataverse.harvard.edu")))
})

test_that("filter dataverses by subject using fq", {
expect_true(is.data.frame(dataverse_search("*", fq = "subject_ss:Social+Sciences", type = "dataverse", key = "", server = "dataverse.harvard.edu")))
})

test_that("empty fq search", {
expect_length(dataverse_search("*", fq = "dateSort:[2019-02-01T00:00:00Z+TO+2019-01-01T00:00:00Z]", type = "dataset", key = "", server = "dataverse.harvard.edu"), 0)
})

0 comments on commit ffd786f

Please sign in to comment.