Skip to content

Commit

Permalink
Merge pull request #55 from IQSS/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
wibeasley authored Mar 20, 2020
2 parents 0b3d67c + 2ed1a11 commit ff70181
Show file tree
Hide file tree
Showing 17 changed files with 368 additions and 93 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: dataverse
Version: 0.2.1.9001
Version: 0.2.1.9002
Title: Client for Dataverse 4 Repositories
Authors@R: c(
person(
Expand Down Expand Up @@ -48,6 +48,7 @@ Suggests:
covr,
foreign,
knitr,
purrr,
testthat,
UNF,
yaml
Expand All @@ -59,4 +60,4 @@ URL: https://github.com/iqss/dataverse-client-r
BugReports: https://github.com/iqss/dataverse-client-r/issues
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
18 changes: 9 additions & 9 deletions R/SWORD.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,10 @@ print.sword_service_document <- function(x, ...) {
#' @return A list.
#' @examples
#' \dontrun{
#' # retrieve your service document
#' d <- service_document()
#'
#' # list available datasets in first dataverse
#' list_datasets(d[[2]])
#' Sys.setenv("DATAVERSE_SERVER" = "demo.dataverse.org")
#' Sys.setenv("DATAVERSE_KEY" = "c7208dd2-6ec5-469a-bec5-f57e164888d4")
#' dv <- get_dataverse("dataverse-client-r")
#' list_datasets(dv)
#' }
#' @seealso Managing a Dataverse: \code{\link{publish_dataverse}}; Managing a dataset: \code{\link{dataset_atom}}, \code{\link{list_datasets}}, \code{\link{create_dataset}}, \code{\link{delete_dataset}}, \code{\link{publish_dataset}}; Managing files within a dataset: \code{\link{add_file}}, \code{\link{delete_file}}
#' @export
Expand All @@ -80,11 +79,12 @@ list_datasets <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server =

# clean up response structure
x <- xml2::as_list(xml2::read_xml(r$content))
out <- list(title = x[["title"]][[1L]],
generator = x[["generator"]],
dataverseHasBeenReleased = x[["dataverseHasBeenReleased"]][[1L]])
feed <- x[["feed"]]
out <- list(title = feed[["title"]][[1L]],
generator = feed[["generator"]],
dataverseHasBeenReleased = feed[["dataverseHasBeenReleased"]][[1L]])
out[["datasets"]] <- do.call("rbind.data.frame",
lapply(x[which(names(x) == "entry")], function(ds) {
lapply(feed[which(names(feed) == "entry")], function(ds) {
list(title = ds[["title"]][[1L]],
id = ds[["id"]][[1L]])
})
Expand Down
4 changes: 2 additions & 2 deletions R/SWORD_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ create_zip.character <- function(x, ...) {
}
create_zip.data.frame <- function(x, ...) {
tmpdf <- tempfile(fileext = ".zip")
on.exit(file.remove(tmpdf))
on.exit(file.remove(tmpdf), add = TRUE)
tmp <- tempfile(fileext = ".zip")
save(x, file = tmpdf)
stopifnot(!utils::zip(tmp, tmpdf))
return(tmp)
}
create_zip.list <- function(x, ...) {
tmpdf <- sapply(seq_along(x), tempfile(fileext = ".zip"))
on.exit(file.remove(tmpdf))
on.exit(file.remove(tmpdf), add = TRUE)
mapply(x, tmpdf, function(x, f) save(x, file = f), SIMPLIFY = TRUE)
tmp <- tempfile(fileext = ".zip")
stopifnot(!utils::zip(tmp, tmpdf))
Expand Down
118 changes: 82 additions & 36 deletions R/get_dataset.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,107 @@
#' @rdname get_dataset
#' @title Get dataset
#' @description Retrieve a Dataverse dataset or its metadata
#' @details \code{get_dataset} retrieves details about a Dataverse dataset. \code{dataset_metadata} returns a named metadata block for a dataset. This is already returned by \code{\link{get_dataset}}, but this function allows you to retrieve just a specific block of metadata, such as citation information. \code{dataset_files} returns a list of files in a dataset, similar to \code{\link{get_dataset}}. The difference is that this returns only a list of \dQuote{dataverse_dataset} objects, whereas \code{\link{get_dataset}} returns metadata and a data.frame of files (rather than a list of file objects).
#'
#' @details
#' \code{get_dataset} retrieves details about a Dataverse dataset.
#'
#' \code{dataset_metadata} returns a named metadata block for a dataset.
#' This is already returned by \code{\link{get_dataset}}, but this function allows
#' you to retrieve just a specific block of metadata, such as citation information.
#'
#' \code{dataset_files} returns a list of files in a dataset, similar to
#' \code{\link{get_dataset}}. The difference is that this returns only a list of
#' \dQuote{dataverse_dataset} objects, whereas \code{\link{get_dataset}} returns
#' metadata and a data.frame of files (rather than a list of file objects).
#'
#' @template ds
#' @template version
#' @template envvars
#' @template dots
#' @return A list of class \dQuote{dataverse_dataset} or a list of a form dependent on the specific metadata block retrieved. \code{dataset_files} returns a list of objects of class \dQuote{dataverse_file}.
#' @examples
#' \dontrun{
#' # download file from:
#' # https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI
#' monogan <- get_dataverse("monogan")
#' monogan_data <- dataverse_contents(monogan)
#' d1 <- get_dataset(monogan_data[[1]])
#' dataset_files(monogan_data[[1]])
#' f <- get_file(d1$files$datafile$id[3])
#' Sys.setenv("DATAVERSE_SERVER" = "demo.dataverse.org")
#' Sys.setenv("DATAVERSE_KEY" = "c7208dd2-6ec5-469a-bec5-f57e164888d4")
#'
#' # Download file from: https://demo.dataverse.org/file.xhtml?fileId=769385
#' dv <- get_dataverse("dataverse-client-r")
#' contents <- dataverse_contents(dv)
#'
#' dataset_files(contents[[1]]) # Dataset contains 2 files
#' dataset_metadata(contents[[1]]) # Easier to query later
#'
#' set <- get_dataset(contents[[1]]) # 1st dataset w/n dataverse
#' f <- get_file(set$files$id[2]) # 2nd file w/n dataset
#'
#' # Check the *binary* representation of the file.
#' length(f)
#' head(f)
#'
#' # Examine the plain-text representation.
#' tmp <- tempfile(fileext = "svg")
#' writeBin(as.vector(f), tmp)
#' svg_lines <- readLines(tmp)
#' head(svg_lines)
#' }
#' @seealso \code{\link{create_dataset}}, \code{\link{update_dataset}}, \code{\link{delete_dataset}}, \code{\link{publish_dataset}}, \code{\link{dataset_files}}, \code{\link{dataset_metadata}}
#' @export
get_dataset <- function(dataset, version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
if (!is.null(version)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version)
} else {
u <- paste0(api_url(server), "datasets/", dataset)
}
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
parse_dataset(httr::content(r, as = "text", encoding = "UTF-8"))
get_dataset <- function(
dataset,
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
if (!is.null(version)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version)
} else {
u <- paste0(api_url(server), "datasets/", dataset)
}
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
parse_dataset(httr::content(r, as = "text", encoding = "UTF-8"))
}

#' @rdname get_dataset
#' @param block A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending on the dataset, such as \dQuote{geospatial} or \dQuote{socialscience}.
#' @importFrom utils str
#' @export
dataset_metadata <- function(dataset, version = ":latest", block = "citation", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
if (!is.null(block)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata/", block)
} else {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata")
}
dataset_metadata <- function(
dataset,
version = ":latest",
block = "citation",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
if (!is.null(block)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata/", block)
} else {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata")
}

r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
out <- httr::content(r, as = "text", encoding = "UTF-8")
jsonlite::fromJSON(out)[["data"]]
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
out <- httr::content(r, as = "text", encoding = "UTF-8")
jsonlite::fromJSON(out)[["data"]]
}

#' @rdname get_dataset
#' @export
dataset_files <- function(dataset, version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data
structure(lapply(out, `class<-`, "dataverse_file"))
dataset_files <- function(
dataset,
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data
structure(lapply(out, `class<-`, "dataverse_file"))
}
25 changes: 20 additions & 5 deletions R/get_dataverse.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,43 @@
#' @rdname get_dataverse
#' @title Get Dataverse
#' @description Retrieve details of a Dataverse
#'
#' @details \code{get_dataverse} function retrieves basic information about a Dataverse from a Dataverse server. To see the contents of the Dataverse, use \code{\link{dataverse_contents}} instead. Contents might include one or more \dQuote{datasets} and/or further Dataverses that themselves contain Dataverses and/or datasets. To view the file contents of a single Dataset, use \code{\link{get_dataset}}.
#'
#' @template dv
#' @template envvars
#' @param check A logical indicating whether to check that the value of \code{dataverse} is actually a numeric
#' @template dots
#' @return A list of class \dQuote{dataverse}.
#'
#' @examples
#' \dontrun{
#' # view the root dataverse for a server
#' get_dataverse(":root")
#' dataverse_contents(":root")
#'
#' Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
#' # download file from:
#' # https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI
#' monogan <- get_dataverse("monogan")
#' (monogan_data <- dataverse_contents(monogan))
#' dv <- get_dataverse("monogan")
#' (contents <- dataverse_contents(dv))
#'
#' # get a dataset from the dataverse
#' d1 <- get_dataset(monogan_data[[1]])
#' f <- get_file(d1$files$datafile$id[3])
#' d1 <- get_dataset(contents[[1]])
#' f <- get_file(d1$files$id[3])
#' }
#' @seealso To manage Dataverses: \code{\link{create_dataverse}}, \code{\link{delete_dataverse}}, \code{\link{publish_dataverse}}, \code{\link{dataverse_contents}}; to get datasets: \code{\link{get_dataset}}; to search for Dataverses, datasets, or files: \code{\link{dataverse_search}}
#' @seealso To manage Dataverses:
#' \code{\link{create_dataverse}},
#' \code{\link{delete_dataverse}},
#' \code{\link{publish_dataverse}},
#' \code{\link{dataverse_contents}};
#'
#' To get datasets:
#' \code{\link{get_dataset}};
#'
#' To search for Dataverses, datasets, or files:
#' \code{\link{dataverse_search}}
#'
#' @export
get_dataverse <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), check = TRUE, ...) {
if (isTRUE(check)) {
Expand Down
38 changes: 20 additions & 18 deletions R/get_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,18 @@
#' }
#' @importFrom utils unzip
#' @export
get_file <-
function(file,
dataset = NULL,
format = c("original", "RData", "prep", "bundle"),
# thumb = TRUE,
vars = NULL,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...) {
get_file <- function(
file,
dataset = NULL,
format = c("original", "RData", "prep", "bundle"),
# thumb = TRUE,
vars = NULL,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
) {
format <- match.arg(format)

# single file ID
if (is.numeric(file))
fileid <- file
Expand All @@ -74,7 +75,7 @@ get_file <-
} else {
fileid <- file
}


# # request multiple files -----
# if (length(fileid) > 1) {
Expand Down Expand Up @@ -142,13 +143,14 @@ get_file_name_from_header <- function(x) {
#' @rdname files
#' @import xml2
#' @export
get_file_metadata <-
function(file,
dataset = NULL,
format = c("ddi", "preprocessed"),
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...) {
get_file_metadata <- function(
file,
dataset = NULL,
format = c("ddi", "preprocessed"),
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
) {
# get file ID from doi
if (!is.numeric(file)) {
if (inherits(file, "dataverse_file")) {
Expand Down
40 changes: 32 additions & 8 deletions man/get_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ff70181

Please sign in to comment.