finish update_dataset_file() func (closes #10)

IQSS · Jun 15, 2017 · 27d08a1 · 27d08a1
1 parent 08aa65d
commit 27d08a1
Show file tree

Hide file tree

Showing 7 changed files with 138 additions and 49 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: dataverse
-Version: 0.1.23
-Date: 2017-06-14
+Version: 0.1.24
+Date: 2017-06-15
 Title: Client for Dataverse 4 Repositories
 Authors@R: c(person("Thomas J.", "Leeper", role = c("aut", "cre"),
                     email = "thosjleeper@gmail.com"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,8 @@
-# CHANGES TO dataverse 0.1.22
+# CHANGES TO dataverse 0.1.24
+
+* Added an `update_dataset_file()` function and improved associated documentation. (#10)
+
+# CHANGES TO dataverse 0.1.23
 
 * Added a provisional `add_dataset_file()` function. (#10)
 * Reorganized some code.

diff --git a/R/add_dataset_file.R b/R/add_dataset_file.R
@@ -1,11 +1,15 @@
 #' @rdname add_dataset_file
 #' @title Add or update a file in a dataset
 #' @description Add or update a file in a dataset
-#' @details From Dataverse v4.6.1, the \dQuote{native} API provides endpoints to add and update files without going through the SWORD workflow. To use SWORD instead, see \code{\link{add_file}}. \code{add_dataset_file} adds a new file to a specified dataset. \code{update_dataset_file} can be used to replace/update a published file.
+#' @details From Dataverse v4.6.1, the \dQuote{native} API provides endpoints to add and update files without going through the SWORD workflow. To use SWORD instead, see \code{\link{add_file}}. \code{add_dataset_file} adds a new file to a specified dataset.
+#' 
+#' \code{update_dataset_file} can be used to replace/update a published file. Note that it only works on published files, so unpublished drafts cannot be updated - the dataset must first either be published (\code{\link{publish_dataset}}) or deleted (\code{\link{delete_dataset}}).
 #' 
 #' @param file A character string
 #' @template ds
 #' @param id An integer specifying a file identifier; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.
+#' @param description Optionally, a character string providing a description of the file.
+#' @param force A logical indicating whether to force the update even if the file types differ. Default is \code{TRUE}.
 #' @template envvars
 #' @template dots
 #' @return \code{add_dataset_file} returns the new file ID.
@@ -16,34 +20,58 @@
 #' ds <- create_dataset("mydataverse", body = meta)
 #'
 #' saveRDS(mtcars, tmp <- tempfile(fileext = ".rds"))
-#' f <- add_dataset_file(tmp, dataset = ds)
+#' f <- add_dataset_file(tmp, dataset = ds, description = "mtcars")
 #' 
 #' # publish dataset
 #' publish_dataset(ds)
 #'
 #' # update file and republish
 #' saveRDS(iris, tmp)
-#' update_dataset_file(tmp, dataset = ds, id = f)
+#' update_dataset_file(tmp, dataset = ds, id = f, 
+#'                     description = "Actually iris")
 #' publish_dataset(ds)
 #' 
 #' # cleanup
 #' unlink(tmp)
 #' delete_dataset(ds)
 #' }
 #' @export
-add_dataset_file <- function(file, dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
+add_dataset_file <- 
+function(file, 
+         dataset, 
+         description = NULL, 
+         key = Sys.getenv("DATAVERSE_KEY"), 
+         server = Sys.getenv("DATAVERSE_SERVER"), 
+         ...) {
     dataset <- dataset_id(dataset)
+
+    bod2 <- list()
+    if (!is.null(description)) {
+        bod2$description <- description
+    }
+    jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE))
+
     u <- paste0(api_url(server), "datasets/", dataset, "/add")
     r <- httr::POST(u, httr::add_headers("X-Dataverse-key" = key), ..., 
-                    body = list(file = httr::upload_file(file)), encode = "multipart")
+                    body = list(file = httr::upload_file(file),
+                                jsonData = jsondata), 
+                    encode = "multipart")
     httr::stop_for_status(r)
     out <- jsonlite::fromJSON(httr::content(r, "text", encoding = "UTF-8"))
     out$data$files$dataFile$id[1L]
 }
 
 #' @rdname create_dataset
 #' @export
-update_dataset_file <- function(file, dataset = NULL, id, body = NULL, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
+update_dataset_file <- 
+function(file, 
+         dataset = NULL, 
+         id, 
+         description = NULL, 
+         force = TRUE, 
+         key = Sys.getenv("DATAVERSE_KEY"), 
+         server = Sys.getenv("DATAVERSE_SERVER"), 
+         ...) {
     dataset <- dataset_id(dataset)
 
     # get file ID from 'dataset'
@@ -57,11 +85,18 @@ update_dataset_file <- function(file, dataset = NULL, id, body = NULL, key = Sys
         }
     }
 
+    bod2 <- list(forceReplace = force)
+    if (!is.null(description)) {
+        bod2$description <- description
+    }
+    jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE))
+
     u <- paste0(api_url(server), "files/", id, "/replace")
     r <- httr::POST(u, httr::add_headers("X-Dataverse-key" = key), ..., 
                     body = list(file = httr::upload_file(file),
-                                jsonData = list(forceReplace = TRUE)), 
+                                jsonData = jsondata
+                                ), 
                     encode = "multipart")
     httr::stop_for_status(r)
-    httr::content(r, as = "text", encoding = "UTF-8")
+    structure(jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data$files[[1L]], class = "dataverse_file")
 }
diff --git a/README.Rmd b/README.Rmd
@@ -1,5 +1,10 @@
 # R Client for Dataverse 4 Repositories
 
+```{r knitr_options, echo=FALSE, results="hide"}
+options(width = 120)
+knitr::opts_chunk$set(results = "hold")
+```
+
 [![Dataverse Project logo](http://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](http://dataverse.org)
 
 The **dataverse** package provides access to [Dataverse 4](http://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting.
@@ -24,20 +29,20 @@ Currently, the package wraps the data management features of the Dataverse API.
 
 Dataverse supplies a pretty robust search API to discover Dataverses, datasets, and files. The simplest searches simply consist of a query string:
 
-```{r}
+```{r search1}
 library("dataverse")
 str(dataverse_search("Gary King"), 1)
 ```
 
 More complicated searches might specify metadata fields:
 
-```{r}
+```{r search2}
 str(dataverse_search(author = "Gary King", title = "Ecological Inference"), 1)
 ```
 
 And searches can be restricted to specific types of objects (Dataverse, dataset, or file):
 
-```{r}
+```{r search3}
 str(dataverse_search(author = "Gary King", type = "dataset"), 1)
 ```
 
@@ -48,13 +53,13 @@ The results are paginated using `per_page` argument. To retrieve subsequent page
 
 The easiest way to access data from Dataverse is to use a persistent identifier (typically a DOI). You can retrieve the contents of a Dataverse dataset:
 
-```{r}
+```{r get_dataset}
 get_dataset("doi:10.7910/DVN/ARKOTI")
 ```
 
 Knowing a file name, you can also access that file (e.g., a Stata dataset) directly in R:
 
-```{r}
+```{r get_file}
 f <- get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI")
 
 # load it into memory
@@ -112,6 +117,8 @@ publish_dataset(ds)
 get_dataverse("mydataverse")
 ```
 
+Through the native API it is possible to update a dataset by modifying its metadata with `update_dataset()` or file contents using `update_dataset_file()` and then republish a new version using `publish_dataset()`.
+
 ## Installation
 
 [![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse)

diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # R Client for Dataverse 4 Repositories
 
+
+
 [![Dataverse Project logo](http://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](http://dataverse.org)
 
 The **dataverse** package provides access to [Dataverse 4](http://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting.
@@ -125,28 +127,43 @@ get_dataset("doi:10.7910/DVN/ARKOTI")
 
 ```
 ## Dataset (75170): 
-## Version (75170): 1.0, RELEASED
+## Version: 1.0, RELEASED
 ## Release Date: 2015-07-07T02:57:02Z
 ## License: CC0
 ## 17 Files:
-## 'data.frame':	32 obs. of  17 variables:
-##  $ description        : chr  "Salta, Argentina field experiment on e-voting versus traditional voting. Citation: Alvarez, R. Michael, Ines Le"| __truncated__ "National Survey of High School Biology Teachers. Citation: Berkman, Michael and Eric Plutzer. 2010. Evolution, "| __truncated__ "Replication code for Chapter 1 (Obtaining R and Downloading Packages). No additional data required." "Replication code for Chapter 2 (Loading and Manipulating Data). Required data files: hmnrghts.txt, sen113kh.ord"| __truncated__ ...
-##  $ label              : chr  "alpl2013.tab" "BPchap7.tab" "chapter01.R" "chapter02.R" ...
-##  $ version            : int  2 2 2 2 2 2 2 2 2 2 ...
-##  $ datasetVersionId   : int  75170 75170 75170 75170 75170 75170 75170 75170 75170 75170 ...
-##  $ categories         :List of 32
-##  $ id                 : int  2692294 2692295 2692202 2692206 2692210 2692204 2692205 2692212 2692209 2692208 ...
-##  $ filename           : chr  "alpl2013.tab" "BPchap7.tab" "chapter01.R" "chapter02.R" ...
-##  $ contentType        : chr  "text/tab-separated-values" "text/tab-separated-values" "text/plain; charset=US-ASCII" "text/plain; charset=US-ASCII" ...
-##  $ filesize           : int  210991 61284 1293 5591 5766 1938 2327 4064 7228 6433 ...
-##  $ description        : chr  "Salta, Argentina field experiment on e-voting versus traditional voting. Citation: Alvarez, R. Michael, Ines Le"| __truncated__ "National Survey of High School Biology Teachers. Citation: Berkman, Michael and Eric Plutzer. 2010. Evolution, "| __truncated__ "Replication code for Chapter 1 (Obtaining R and Downloading Packages). No additional data required." "Replication code for Chapter 2 (Loading and Manipulating Data). Required data files: hmnrghts.txt, sen113kh.ord"| __truncated__ ...
-##  $ storageIdentifier  : chr  "14e664cd3c7-d64f88cca576" "14e664cd409-7a2dc0c380f9" "14e66326932-5c24bd6e6707" "14e663269e2-61fc90d7afec" ...
-##  $ originalFileFormat : chr  "application/x-stata" "application/x-stata" NA NA ...
-##  $ originalFormatLabel: chr  "Stata Binary" "Stata Binary" "UNKNOWN" "UNKNOWN" ...
-##  $ UNF                : chr  "UNF:6:d9ZNXvmiPfiunSAiXRpVfg==" "UNF:6:B3/HJbnzktaX5eEJA2ItiA==" NA NA ...
-##  $ rootDataFileId     : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
-##  $ md5                : chr  "2132170a713e5a213ab87dcaea287250" "e8c62465ef6a1a8451a21a43ce7b264e" "cfd66db2e50b3142bcda576cf78dc057" "e9c536034e029450a79ce830e47dd463" ...
-##  $ checksum           :'data.frame':	32 obs. of  2 variables:
+##                           label version      id                  contentType
+## 1                  alpl2013.tab       2 2692294    text/tab-separated-values
+## 2                   BPchap7.tab       2 2692295    text/tab-separated-values
+## 3                   chapter01.R       2 2692202 text/plain; charset=US-ASCII
+## 4                   chapter02.R       2 2692206 text/plain; charset=US-ASCII
+## 5                   chapter03.R       2 2692210 text/plain; charset=US-ASCII
+## 6                   chapter04.R       2 2692204 text/plain; charset=US-ASCII
+## 7                   chapter05.R       2 2692205 text/plain; charset=US-ASCII
+## 8                   chapter06.R       2 2692212 text/plain; charset=US-ASCII
+## 9                   chapter07.R       2 2692209 text/plain; charset=US-ASCII
+## 10                  chapter08.R       2 2692208 text/plain; charset=US-ASCII
+## 11                  chapter09.R       2 2692211 text/plain; charset=US-ASCII
+## 12                  chapter10.R       1 2692203 text/plain; charset=US-ASCII
+## 13                  chapter11.R       1 2692207 text/plain; charset=US-ASCII
+## 14 comprehensiveJapanEnergy.tab       2 2692296    text/tab-separated-values
+## 15         constructionData.tab       2 2692293    text/tab-separated-values
+## 16             drugCoverage.csv       1 2692233 text/plain; charset=US-ASCII
+## 17         hanmerKalkanANES.tab       2 2692290    text/tab-separated-values
+## 18                 hmnrghts.tab       2 2692298    text/tab-separated-values
+## 19                 hmnrghts.txt       1 2692238                   text/plain
+## 20                   levant.tab       2 2692289    text/tab-separated-values
+## 21                       LL.csv       1 2692228 text/plain; charset=US-ASCII
+## 22                 moneyDem.tab       2 2692292    text/tab-separated-values
+## 23            owsiakJOP2013.tab       2 2692297    text/tab-separated-values
+## 24                PESenergy.csv       1 2692230 text/plain; charset=US-ASCII
+## 25                  pts1994.csv       1 2692229 text/plain; charset=US-ASCII
+## 26                  pts1995.csv       1 2692231 text/plain; charset=US-ASCII
+## 27                 sen113kh.ord       1 2692239 text/plain; charset=US-ASCII
+## 28                SinghEJPR.tab       2 2692299    text/tab-separated-values
+## 29                 SinghJTP.tab       2 2692288    text/tab-separated-values
+## 30                 stdSingh.tab       2 2692291    text/tab-separated-values
+## 31                       UN.csv       1 2692232 text/plain; charset=US-ASCII
+## 32                  war1800.tab       2 2692300    text/tab-separated-values
 ```
 
 Knowing a file name, you can also access that file (e.g., a Stata dataset) directly in R:
@@ -166,7 +183,7 @@ If you don't know the file name in advance, you can parse the available files re
 
 ### Data Archiving
 
-The data archiving (or "deposit") workflow is built on [SWORD v2.0](http://swordapp.org/sword-v2/). This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:
+Dataverse provides two - basically unrelated - workflows for managing (adding, documenting, and publishing) datasets. The first is built on [SWORD v2.0](http://swordapp.org/sword-v2/). This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:
 
 ```R
 # retrieve your service document
@@ -178,21 +195,39 @@ metadat <- list(title = "My Study",
                 description = "An example study")
 
 # create the dataset
-dat <- initiate_sword_dataset("mydataverse", body = metadat)
+ds <- initiate_sword_dataset("mydataverse", body = metadat)
 
 # add files to dataset
 tmp <- tempfile()
 write.csv(iris, file = tmp)
-f <- add_file(dat, file = tmp)
+f <- add_file(ds, file = tmp)
 
 # publish new dataset
-publish_dataset(dat)
+publish_sword_dataset(ds)
+
+# dataset will now be published
+list_datasets("mydataverse")
+```
+
+The second workflow is called the "native" API and is similar but uses slightly different functions:
+
+```R
+# create the dataset
+ds <- create_dataset("mydataverse")
+
+# add files
+tmp <- tempfile()
+write.csv(iris, file = tmp)
+f <- add_dataset_file(file = tmp, dataset = ds)
+
+# publish dataset
+publish_dataset(ds)
 
 # dataset will now be published
-list_datasets(dat)
+get_dataverse("mydataverse")
 ```
 
-Dataverse actually implements two ways to release datasets: the SWORD API and the "native" API. Documentation of the latter is forthcoming.
+Through the native API it is possible to update a dataset by modifying its metadata with `update_dataset()` or file contents using `update_dataset_file()` and then republish a new version using `publish_dataset()`.
 
 ## Installation
 

diff --git a/man/add_dataset_file.Rd b/man/add_dataset_file.Rd