IQSS · kcondon · Oct 19, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/...se-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/...se-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md
@@ -0,0 +1,12 @@
+Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features:
+
+- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size.
+
+
+- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters:
+
+  - contentType
+  - accessStatus
+  - categoryName
+  - tabularTagName
+  - searchText
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
@@ -964,7 +964,7 @@ The fully expanded example above (without environment variables) looks like this
 
   curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files"
 
-This endpoint supports optional pagination, through the ``limit`` and ``offset`` query params:
+This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters:
 
 .. code-block:: bash
 
@@ -1044,7 +1044,7 @@ Usage example:
 
   curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?includeDeaccessioned=true"
 
-.. note:: Keep in mind that you can combine all of the above query params depending on the results you are looking for.
+.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for.
 
 Get File Counts in a Dataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1132,7 +1132,7 @@ Usage example:
 
 Please note that filtering values are case sensitive and must be correctly typed for the endpoint to recognize them.
 
-Keep in mind that you can combine all of the above query params depending on the results you are looking for.
+Keep in mind that you can combine all of the above query parameters depending on the results you are looking for.
 
 View Dataset Files and Folders as a Directory Index
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1888,6 +1888,61 @@ Usage example:
 
   curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?mode=Archival"
 
+Category name filtering is also optionally supported. To return the size of all files available for download matching the requested category name.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?categoryName=Data"
+
+Tabular tag name filtering is also optionally supported. To return the size of all files available for download for which the requested tabular tag has been added.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?tabularTagName=Survey"
+
+Content type filtering is also optionally supported. To return the size of all files available for download matching the requested content type.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?contentType=image/png"
+
+Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return the size of all files available for download that contain the text searched in one of such fields.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?searchText=word"
+
+File access filtering is also optionally supported. In particular, by the following possible values:
+
+* ``Public``
+* ``Restricted``
+* ``EmbargoedThenRestricted``
+* ``EmbargoedThenPublic``
+
+If no filter is specified, the files will match all of the above categories.
+
+Please note that filtering query parameters are case sensitive and must be correctly typed for the endpoint to recognize them.
+
+By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below.
+
+If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?includeDeaccessioned=true"
+
+.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for.
+
 Submit a Dataset for Review
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
@@ -53,7 +53,7 @@ public enum FileOrderCriteria {
     }
 
     /**
-     * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileDownloadSizeMode)}
+     * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileSearchCriteria, FileDownloadSizeMode)}
      * <p>
      * All: Includes both archival and original sizes for tabular files
      * Archival: Includes only the archival size for tabular files
@@ -191,16 +191,17 @@ public List<FileMetadata> getFileMetadatas(DatasetVersion datasetVersion, Intege
      * Returns the total download size of all files for a particular DatasetVersion
      *
      * @param datasetVersion the DatasetVersion to access
+     * @param searchCriteria for retrieving only files matching this criteria
      * @param mode           a FileDownloadSizeMode to base the search on
      * @return long value of total file download size
      */
-    public long getFilesDownloadSize(DatasetVersion datasetVersion, FileDownloadSizeMode mode) {
+    public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria, FileDownloadSizeMode mode) {
         return switch (mode) {
             case All ->
-                    Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, false));
+                    Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, false, searchCriteria));
             case Original ->
-                    Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, true));
-            case Archival -> getArchivalFilesSize(datasetVersion, false);
+                    Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, true, searchCriteria));
+            case Archival -> getArchivalFilesSize(datasetVersion, false, searchCriteria);
         };
     }
 
@@ -301,22 +302,24 @@ private void applyOrderCriteriaToGetFileMetadatasQuery(JPAQuery<FileMetadata> qu
         }
     }
 
-    private long getOriginalTabularFilesSize(DatasetVersion datasetVersion) {
+    private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) {
         JPAQueryFactory queryFactory = new JPAQueryFactory(em);
-        Long result = queryFactory
+        JPAQuery<?> baseQuery = queryFactory
                 .from(fileMetadata)
                 .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId()))
                 .from(dataTable)
-                .where(dataTable.dataFile.eq(fileMetadata.dataFile))
-                .select(dataTable.originalFileSize.sum()).fetchFirst();
+                .where(dataTable.dataFile.eq(fileMetadata.dataFile));
+        applyFileSearchCriteriaToQuery(baseQuery, searchCriteria);
+        Long result = baseQuery.select(dataTable.originalFileSize.sum()).fetchFirst();
         return (result == null) ? 0 : result;
     }
 
-    private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular) {
+    private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular, FileSearchCriteria searchCriteria) {
         JPAQueryFactory queryFactory = new JPAQueryFactory(em);
         JPAQuery<?> baseQuery = queryFactory
                 .from(fileMetadata)
                 .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId()));
+        applyFileSearchCriteriaToQuery(baseQuery, searchCriteria);
         Long result;
         if (ignoreTabular) {
             result = baseQuery.where(fileMetadata.dataFile.dataTables.isEmpty()).select(fileMetadata.dataFile.filesize.sum()).fetchFirst();

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2992,19 +2992,37 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam(
     public Response getDownloadSize(@Context ContainerRequestContext crc,
                                     @PathParam("identifier") String dvIdtf,
                                     @PathParam("versionId") String version,
+                                    @QueryParam("contentType") String contentType,
+                                    @QueryParam("accessStatus") String accessStatus,
+                                    @QueryParam("categoryName") String categoryName,
+                                    @QueryParam("tabularTagName") String tabularTagName,
+                                    @QueryParam("searchText") String searchText,
                                     @QueryParam("mode") String mode,
+                                    @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
                                     @Context UriInfo uriInfo,
                                     @Context HttpHeaders headers) {
 
         return response(req -> {
+            FileSearchCriteria fileSearchCriteria;
+            try {
+                fileSearchCriteria = new FileSearchCriteria(
+                        contentType,
+                        accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null,
+                        categoryName,
+                        tabularTagName,
+                        searchText
+                );
+            } catch (IllegalArgumentException e) {
+                return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
+            }
             DatasetVersionFilesServiceBean.FileDownloadSizeMode fileDownloadSizeMode;
             try {
                 fileDownloadSizeMode = mode != null ? DatasetVersionFilesServiceBean.FileDownloadSizeMode.valueOf(mode) : DatasetVersionFilesServiceBean.FileDownloadSizeMode.All;
             } catch (IllegalArgumentException e) {
                 return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode);
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers);
-            long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileDownloadSizeMode);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned);
+            long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode);
             String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
             jsonObjectBuilder.add("message", message);

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3863,7 +3863,7 @@ public void getDownloadSize() throws IOException, InterruptedException {
         int expectedTextFilesStorageSize = testFileSize1 + testFileSize2;
 
         // Get the total size when there are no tabular files
-        Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken);
+        Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
                 .body("data.storageSize", equalTo(expectedTextFilesStorageSize));
 
@@ -3878,7 +3878,7 @@ public void getDownloadSize() throws IOException, InterruptedException {
         Thread.sleep(2000);
 
         // Get the total size ignoring the original tabular file sizes
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode());
 
         int actualSizeIgnoringOriginalTabularSizes = Integer.parseInt(getDownloadSizeResponse.getBody().jsonPath().getString("data.storageSize"));
@@ -3889,21 +3889,21 @@ public void getDownloadSize() throws IOException, InterruptedException {
         // Get the total size including only original sizes and ignoring archival sizes for tabular files
         int expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedTextFilesStorageSize;
 
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
                 .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular));
 
         // Get the total size including both the original and archival tabular file sizes
         int tabularArchivalSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize;
         int expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedTextFilesStorageSize;
 
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
                 .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes));
 
         // Get the total size sending invalid file download size mode
         String invalidMode = "invalidMode";
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, invalidMode, apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, invalidMode, false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode())
                 .body("message", equalTo("Invalid mode: " + invalidMode));
 
@@ -3917,15 +3917,47 @@ public void getDownloadSize() throws IOException, InterruptedException {
         // Get the total size including only original sizes and ignoring archival sizes for tabular files
         expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedSizeIncludingOnlyOriginalForTabular;
 
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
                 .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular));
 
         // Get the total size including both the original and archival tabular file sizes
         expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedSizeIncludingAllSizes;
 
-        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken);
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken);
         getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
                 .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes));
+
+        // Get the total size including both the original and archival tabular file sizes with search criteria
+        getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, "text/plain", FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, "test_", DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken);
+        // We exclude tabular sizes from the expected result since the search criteria filters by content type "text/plain" and search text "test_"
+        int expectedSizeIncludingAllSizesAndApplyingCriteria = testFileSize1 + testFileSize2;
+        getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
+                .body("data.storageSize", equalTo(expectedSizeIncludingAllSizesAndApplyingCriteria));
+
+        // Test Deaccessioned
+        Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
+        publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
+        publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken);
+        deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // includeDeaccessioned false
+        Response getVersionFileCountsResponseNoDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken);
+        getVersionFileCountsResponseNoDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        // includeDeaccessioned true
+        Response getVersionFileCountsResponseDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, apiToken);
+        getVersionFileCountsResponseDeaccessioned.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Test that the dataset file counts for a deaccessioned dataset cannot be accessed by a guest
+        // By latest published version
+        Response getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null);
+        getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+        // By specific version 1.0
+        getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, "1.0", null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null);
+        getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
     }
 }