diff --git a/conf/docker-aio/run-test-suite.sh b/conf/docker-aio/run-test-suite.sh index 988250efada..9cbc90a152a 100755 --- a/conf/docker-aio/run-test-suite.sh +++ b/conf/docker-aio/run-test-suite.sh @@ -8,4 +8,4 @@ fi # Please note the "dataverse.test.baseurl" is set to run for "all-in-one" Docker environment. # TODO: Rather than hard-coding the list of "IT" classes here, add a profile to pom.xml. -mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT -Ddataverse.test.baseurl=$dvurl +mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT -Ddataverse.test.baseurl=$dvurl diff --git a/doc/release-notes/4529-download-by-dataset.md b/doc/release-notes/4529-download-by-dataset.md new file mode 100644 index 00000000000..22227e149f0 --- /dev/null +++ b/doc/release-notes/4529-download-by-dataset.md @@ -0,0 +1,6 @@ +In previous versions of Dataverse, downloading all files from a dataset via API was a two step process: + +- Find all the database id of the files. +- Download all the files, using those ids (comma-separated). + +Now you can download all files from a dataset (assuming you have access to them) via API by passing the dataset persistent ID (PID such as DOI or Handle) or the dataset's database id. Versions are also supported like with the "download metadata" API you can pass :draft, :latest, :latest-published, or numbers (1.1, 2.0). diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index a3fbdfd73f4..690445852b9 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -7,6 +7,75 @@ More advanced features of the Access API include format-specific transformations .. contents:: |toctitle| :local: +.. _download-by-dataset-api: + +Downloading All Files in a Dataset +---------------------------------- + +The "download by dataset" API downloads as many files as possible from a dataset as a zipped bundle. + +By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter. + +There are a number of reasons why not all of the files can be downloaded: + +- Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files). +- The Dataverse installation has limited how large the zip bundle can be. + +In the curl example below, the flags ``-O`` and ``J`` are used. When there are no errors, this has the effect of saving the file as "dataverse_files.zip" (just like the web interface). The flags force errors to be downloaded as a file. + +Please note that in addition to the files from dataset, an additional file call "MANIFEST.TXT" will be included in the zipped bundle. It has additional information about the files. + +There are two forms of the "download by dataset" API, a basic form and one that supports dataset versions. + +Basic Download By Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The basic form downloads files from the latest accessible version of the dataset. If you are not using an API token, this means the most recently published version. If you are using an API token with full access to the dataset, this means the draft version or the most recently published version if no draft exists. + +A curl example using a DOI (no version): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ + + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/?persistentId=$PERSISTENT_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/?persistentId=doi:10.70122/FK2/N2XGBJ + +Download By Dataset By Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The second form of the "download by dataset" API allows you to specify which version you'd like to download files from. As with the ``datasets`` API endpoints described in the :doc:`native-api` section, the following identifiers can be used. + +* ``:draft`` the draft version, if any +* ``:latest`` either a draft (if exists) or the latest published version. +* ``:latest-published`` the latest published version +* ``x.y`` a specific version, where ``x`` is the major version number and ``y`` is the minor version number. +* ``x`` same as ``x.0`` + +A curl example using a DOI (with version): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ + export VERSION=2.0 + + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/versions/$VERSION?persistentId=$PERSISTENT_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/versions/2.0?persistentId=doi:10.70122/FK2/N2XGBJ + Basic File Access ----------------- diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index 3c9bfd9b137..1c96a0af542 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -106,7 +106,9 @@ Downloading Files The :doc:`dataaccess` section explains how to download files. -In order to download files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`. +To download all the files in a dataset, see :ref:`download-by-dataset-api`. + +In order to download individual files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`. Downloading Metadata ~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index ea2d512f98f..e5bcc9e7f3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -28,6 +28,7 @@ import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.UserNotificationServiceBean; import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -44,10 +45,16 @@ import edu.harvard.iq.dataverse.dataaccess.StoredOriginalFile; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; +import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.RequestAccessCommand; import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; @@ -541,12 +548,73 @@ public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") bool return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } + + @Path("dataset/{id}") + @GET + @Produces({"application/zip"}) + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); + String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); + return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @Path("dataset/{id}/versions/{versionId}") + @GET + @Produces({"application/zip"}) + public Response downloadAllFromVersion(@PathParam("id") String datasetIdOrPersistentId, @PathParam("versionId") String versionId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + final Dataset ds = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + DatasetVersion dsv = execCommand(handleVersion(versionId, new Datasets.DsVersionHandler>() { + + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds); + } + + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds); + } + })); + if (dsv == null) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); + } + String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); + return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + private static String getFileIdsAsCommaSeparated(List fileMetadatas) { + List ids = new ArrayList<>(); + for (FileMetadata fileMetadata : fileMetadatas) { + Long fileId = fileMetadata.getDataFile().getId(); + ids.add(String.valueOf(fileId)); + } + return String.join(",", ids); + } + /* * API method for downloading zipped bundles of multiple files: */ - - // TODO: Rather than only supporting looking up files by their database IDs, - // consider supporting persistent identifiers. @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 628db853e5c..61844ab6b9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -219,7 +219,7 @@ public class Datasets extends AbstractApiBean { * Used to consolidate the way we parse and handle dataset versions. * @param */ - private interface DsVersionHandler { + public interface DsVersionHandler { T handleLatest(); T handleDraft(); T handleSpecific( long major, long minor ); @@ -1684,7 +1684,7 @@ private void msgt(String m){ } - private T handleVersion( String versionId, DsVersionHandler hdl ) + public static T handleVersion( String versionId, DsVersionHandler hdl ) throws WrappedResponse { switch (versionId) { case ":latest": return hdl.handleLatest(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 97d40798485..fc0695aa00d 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2329,6 +2329,7 @@ access.api.requestList.fileNotFound=Could not find datafile with id {0}. access.api.requestList.noKey=You must provide a key to get list of access requests for a file. access.api.requestList.noRequestsFound=There are no access requests for this file {0}. access.api.exception.metadata.not.available.for.nontabular.file=This type of metadata is only available for tabular files. +access.api.exception.version.not.found=Could not find requested dataset version. #permission permission.AddDataverse.label=AddDataverse diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java new file mode 100644 index 00000000000..4f4c34c1c8b --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -0,0 +1,387 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.HashSet; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import static javax.ws.rs.core.Response.Status.OK; +import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.Assert; +import static org.junit.Assert.assertTrue; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DownloadFilesIT { + + @BeforeClass + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + /** + * This test is focused on downloading all files by their version. All files + * are public. + */ + @Test + public void downloadAllFilesByVersion() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + Path pathtoReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); + Files.write(pathtoReadme, "In the beginning...".getBytes()); + + Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoReadme.toString(), apiToken); + uploadReadme.prettyPrint(); + uploadReadme.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("README.md")); + + Path pathtoContribFile = Paths.get(Files.createTempDirectory(null) + File.separator + "CONTRIBUTING.md"); + Files.write(pathtoContribFile, "Patches welcome!".getBytes()); + + Response uploadContribFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoContribFile.toString(), apiToken); + uploadContribFile.prettyPrint(); + uploadContribFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("CONTRIBUTING.md")); + + // The creator gets the files from the draft. + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound1 = gatherFilenames(downloadFiles1.getBody().asInputStream()); + + // Note that a MANIFEST.TXT file is added. + HashSet expectedFiles1 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles1, filenamesFound1); + + // A guest user can't download unpublished files. + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles2.prettyPrint(); + downloadFiles2.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("User :guest is not permitted to perform requested action.")); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Publishing version 1.0. + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Now a guest user can download files (published now) + Response downloadFiles3 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles3.then().assertThat() + .statusCode(OK.getStatusCode()); + + Path pathtoLicenseFile = Paths.get(Files.createTempDirectory(null) + File.separator + "LICENSE.md"); + Files.write(pathtoLicenseFile, "Apache".getBytes()); + + Response uploadLicenseFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoLicenseFile.toString(), apiToken); + uploadLicenseFile.prettyPrint(); + uploadLicenseFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("LICENSE.md")); + + Response downloadFiles4 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles4.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound2 = gatherFilenames(downloadFiles4.getBody().asInputStream()); + + // The creator gets the draft version with an extra file. + HashSet expectedFiles2 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles2, filenamesFound2); + + Response downloadFiles5 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles5.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound3 = gatherFilenames(downloadFiles5.getBody().asInputStream()); + + // A guest user gets the 1.0 version with only 3 files. + HashSet expectedFiles3 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles3, filenamesFound3); + + // Publishing version 2.0 + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadFiles6 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles6.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound4 = gatherFilenames(downloadFiles6.getBody().asInputStream()); + + // By not specifying a version, the creator gets the latest version. In this case, 2.0 (published) with 4 files. + HashSet expectedFiles4 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles4, filenamesFound4); + + String datasetVersion = "1.0"; + Response downloadFiles7 = UtilIT.downloadFiles(datasetPid, datasetVersion, apiToken); + downloadFiles7.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound5 = gatherFilenames(downloadFiles7.getBody().asInputStream()); + + // Creator specifies the 1.0 version and gets the expected 3 files. + HashSet expectedFiles5 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles5, filenamesFound5); + + // Add Code of Conduct file + Path pathtoCocFile = Paths.get(Files.createTempDirectory(null) + File.separator + "CODE_OF_CONDUCT.md"); + Files.write(pathtoCocFile, "Be excellent to each other.".getBytes()); + + // This Code of Conduct file will be in version 3.0 once it's published. For now it's a draft. + Response uploadCocFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoCocFile.toString(), apiToken); + uploadCocFile.prettyPrint(); + uploadCocFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("CODE_OF_CONDUCT.md")); + + Response downloadFiles8 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles8.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound6 = gatherFilenames(downloadFiles8.getBody().asInputStream()); + + // If the creator doesn't specify a version, they get the latest draft with 5 files. + HashSet expectedFiles6 = new HashSet<>(Arrays.asList("CODE_OF_CONDUCT.md", "LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles6, filenamesFound6); + + String datasetVersionLatestPublished = ":latest-published"; + Response downloadFiles9 = UtilIT.downloadFiles(datasetPid, datasetVersionLatestPublished, apiToken); + downloadFiles9.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound7 = gatherFilenames(downloadFiles9.getBody().asInputStream()); + + // The contributor requested "latest published" and got version 3 with 4 files. + HashSet expectedFiles7 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles7, filenamesFound7); + + // Guests cannot download draft versions. + String datasetVersionDraft = ":draft"; + Response downloadFiles10 = UtilIT.downloadFiles(datasetPid, datasetVersionDraft, null); + downloadFiles10.prettyPrint(); + downloadFiles10.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("User :guest is not permitted to perform requested action.")); + + // Users are told about bad API tokens. + Response downloadFiles11 = UtilIT.downloadFiles(datasetPid, "junkToken"); + downloadFiles11.prettyPrint(); + downloadFiles11.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("Bad api key ")); + + } + + /** + * This test is focused on downloading all files that are restricted. + */ + @Test + public void downloadAllFilesRestricted() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + Path pathToSecrets = Paths.get(Files.createTempDirectory(null) + File.separator + "secrets.md"); + Files.write(pathToSecrets, "The Nobel Prize will be mine.".getBytes()); + + Response uploadSecrets = UtilIT.uploadFileViaNative(datasetId.toString(), pathToSecrets.toString(), apiToken); + uploadSecrets.prettyPrint(); + uploadSecrets.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("secrets.md")); + + Long origFileId = JsonPath.from(uploadSecrets.body().asString()).getLong("data.files[0].dataFile.id"); + + Response restrictResponse = UtilIT.restrictFile(origFileId.toString(), true, apiToken); + restrictResponse.prettyPrint(); + restrictResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("File secrets.md restricted.")); + + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download a restricted file from a draft. + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); + + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles2.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download a restricted file and an unrestricted file from a draft. + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // A guest user can't download the only file because it's restricted. + Response downloadFiles3 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles3.prettyPrint(); + downloadFiles3.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("'/api/v1/access/dataset/%3ApersistentId' you are not authorized to access this object via this api endpoint. Please check your code for typos, or consult our API guide at http://guides.dataverse.org.")); + + // The creator uploads a README that will be public. + Path pathToReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); + Files.write(pathToReadme, "My findings.".getBytes()); + + Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathToReadme.toString(), apiToken); + uploadReadme.prettyPrint(); + uploadReadme.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("README.md")); + + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Now a guest user can download files and get the public one. + Response downloadFiles4 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles4.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The guest can only get the unrestricted file (and the manifest). + Assert.assertEquals(new HashSet<>(Arrays.asList("README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles4.getBody().asInputStream())); + + Response downloadFiles5 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles5.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download both files (and the manifest). + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles5.getBody().asInputStream())); + + } + + /** + * This test is focused on downloading all files when tabular files are + * present (original vs archival). + */ + @Test + public void downloadAllFilesTabular() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "scripts/search/data/tabular/50by1000.dta"; + + Response uploadTabular = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadTabular.prettyPrint(); + uploadTabular.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("50by1000.dta")); + + // UtilIT.MAXIMUM_INGEST_LOCK_DURATION is 3 but not long enough. + assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, 4)); + + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + // By default we get the archival version (.tab). + Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.tab", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); + + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, UtilIT.DownloadFormat.original, apiToken); + downloadFiles2.then().assertThat() + .statusCode(OK.getStatusCode()); + + // By passing format=original we get the original version, Stata (.dta) in this case. + Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.dta", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); + } + + private HashSet gatherFilenames(InputStream inputStream) throws IOException { + HashSet filenamesFound = new HashSet<>(); + try (ZipInputStream zipStream = new ZipInputStream(inputStream)) { + ZipEntry entry = null; + while ((entry = zipStream.getNextEntry()) != null) { + String entryName = entry.getName(); + filenamesFound.add(entryName); + zipStream.closeEntry(); + } + } + return filenamesFound; + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ab9b9e05073..4f8351d3beb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -750,6 +750,53 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) { return given().get(getString + "?format=original&key=" + apiToken); } + public enum DownloadFormat { + original + } + + static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) { + String datasetVersion = null; + DownloadFormat format = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); + } + + static Response downloadFiles(String datasetIdOrPersistentId, DownloadFormat format, String apiToken) { + String datasetVersion = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); + } + + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) { + DownloadFormat format = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); + } + + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, DownloadFormat format, String apiToken) { + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isNumber(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + String optionalVersion = ""; + if (datasetVersion != null) { + optionalVersion = "/versions/" + datasetVersion; + } + String optionalFormat = ""; + if (format != null) { + if (!"".equals(optionalQueryParam)) { + optionalFormat = "&format=" + format; + } else { + optionalFormat = "?format=" + format; + } + } + return requestSpecification.get("/api/access/dataset/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat); + } + static Response subset(String fileId, String variables, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken)