From a018055f2ed981b634175a135f676f0d3ec5cc0e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 11 Apr 2020 15:02:19 -0400 Subject: [PATCH 001/189] cleanup - add explicit type --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index a4370c7b38f..593a7f1c7e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1646,7 +1646,7 @@ public static boolean isPackageFile(DataFile dataFile) { return DataFileServiceBean.MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(dataFile.getContentType()); } - public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { + public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { String driverId = dataset.getDataverseContext().getEffectiveStorageDriverId(); boolean directEnabled = Boolean.getBoolean("dataverse.files." + driverId + ".upload-redirect"); //Should only be requested when it is allowed, but we'll log a warning otherwise From d31443171b37d72976a1dcf2b7263c319aca1815 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 11 Apr 2020 15:03:47 -0400 Subject: [PATCH 002/189] add multipart upload api calls and add to S3StorageIO class also refactor S3StorageIO to re-use single client per store, use more static methods --- .../harvard/iq/dataverse/api/Datasets.java | 111 +++++++ .../iq/dataverse/dataaccess/S3AccessIO.java | 275 ++++++++++++------ 2 files changed, 301 insertions(+), 85 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0b2e25a7f02..52661e0a152 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -127,8 +127,10 @@ import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; +import javax.json.JsonException; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import javax.json.JsonReader; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; @@ -152,6 +154,8 @@ import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; +import com.amazonaws.services.s3.model.PartETag; + @Path("datasets") public class Datasets extends AbstractApiBean { @@ -1514,6 +1518,113 @@ public Response getUploadUrl(@PathParam("id") String idSupplied) { return wr.getResponse(); } } + +@GET +@Path("{id}/mpupload") +public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if(s3io == null) { + return error(Response.Status.NOT_FOUND,"Direct upload not supported for files in this dataset: " + dataset.getId()); + } + JsonObjectBuilder urlsBuilder = null; + String storageIdentifier = null; + try { + urlsBuilder = s3io.generateTemporaryS3UploadUrls(fileSize); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } + + JsonObjectBuilder response = Json.createObjectBuilder() + .add("urls", urlsBuilder) + .add("storageIdentifier", storageIdentifier ); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } +} + +@DELETE +@Path("{id}/mpupload") +public Response abortMPUpload(@PathParam("id") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + try { + S3AccessIO.abortMultipartUpload(dataset, storageidentifier, uploadId); + } catch (IOException io) { + logger.warning("Multipart upload abort failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + return ok("Multipart Upload aborted"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } +} + +@PUT +@Path("{id}/mpupload") +public Response completeMPUpload(String partETagBody, @PathParam("id") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + List eTagList = new ArrayList(); + + try { + JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); + JsonObject object = jsonReader.readObject(); + jsonReader.close(); + for(String partNo : object.keySet()) { + eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); + } + } catch (JsonException je) { + logger.info("Unable to parse eTags from: " + partETagBody); + throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + try { + S3AccessIO.completeMultipartUpload(dataset, storageidentifier, uploadId, eTagList); + } catch (IOException io) { + logger.warning("Multipart upload abort failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + return ok("Multipart Upload aborted"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } +} + /** * Add a File to an existing Dataset * diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index b12013b8f8f..08cce80ad96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -9,7 +9,10 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.Headers; import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; +import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectTaggingRequest; @@ -17,6 +20,8 @@ import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ResponseHeaderOverrides; @@ -45,11 +50,14 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; +import javax.json.Json; +import javax.json.JsonObjectBuilder; import javax.validation.constraints.NotNull; /** @@ -67,34 +75,18 @@ public class S3AccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); + private static HashMap driverClientMap = new HashMap(); + private static HashMap driverTMMap = new HashMap(); + public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { super(dvObject, req, driverId); - readSettings(); this.setIsLocalFile(false); try { - // get a standard client, using the standard way of configuration the credentials, etc. - AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. - if (!s3CEUrl.isEmpty()) { - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); - } - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false - s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false - s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled - s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - - s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); - // let's build the client :-) - this.s3 = s3CB.build(); - - // building a TransferManager instance to support multipart uploading for files over 4gb. - this.tm = TransferManagerBuilder.standard() - .withS3Client(this.s3) - .build(); + bucketName=getBucketName(driverId); + s3=getClient(driverId); + tm=getTransferManager(driverId); + } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", @@ -102,7 +94,6 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { } } - public S3AccessIO(String storageLocation, String driverId) { this(null, null, driverId); // TODO: validate the storage location supplied @@ -110,22 +101,16 @@ public S3AccessIO(String storageLocation, String driverId) { key = storageLocation.substring(storageLocation.indexOf('/')+1); } + //Used for tests only public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { super(dvObject, req, driverId); - readSettings(); + bucketName = getBucketName(driverId); this.setIsLocalFile(false); this.s3 = s3client; } private AmazonS3 s3 = null; private TransferManager tm = null; - //See readSettings() for the source of these values - private String s3CEUrl = null; - private String s3CERegion = null; - private boolean s3pathStyleAccess = false; - private boolean s3payloadSigning = false; - private boolean s3chunkedEncoding = true; - private String s3profile = "default"; private String bucketName = null; private String key = null; @@ -793,28 +778,36 @@ String getDestinationKey(String auxItemTag) throws IOException { */ String getMainFileKey() throws IOException { if (key == null) { - // TODO: (?) - should we worry here about the datafile having null for the owner here? - // or about the owner dataset having null for the authority and/or identifier? - // we should probably check for that and throw an exception. (unless we are - // super positive that this condition would have been intercepted by now) - String baseKey = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + this.getDataFile().getOwner().getIdentifierForFileStorage(); - String storageIdentifier = dvObject.getStorageIdentifier(); - - if (storageIdentifier == null || "".equals(storageIdentifier)) { - throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); - } - - if (storageIdentifier.startsWith(this.driverId + "://")) { - bucketName = storageIdentifier.substring((this.driverId + "://").length(), storageIdentifier.lastIndexOf(":")); - key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); - } else { - throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object."); - } + DataFile df = this.getDataFile(); + // TODO: (?) - should we worry here about the datafile having null for the owner here? + key = getMainFileKey(df.getOwner(), df.getStorageIdentifier()); } - return key; } + static String getMainFileKey(Dataset owner, String storageIdentifier) throws IOException { + String key = null; + // or about the owner dataset having null for the authority and/or identifier? + // we should probably check for that and throw an exception. (unless we are + // super positive that this condition would have been intercepted by now) + String baseKey = owner.getAuthorityForFileStorage() + "/" + owner.getIdentifierForFileStorage(); + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + if (storageIdentifier.indexOf("://")>0) { + //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); + //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - + //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). + //String bucketName = storageIdentifier.substring(driverId.length() + 3, storageIdentifier.lastIndexOf(":")); + key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); + } else { + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object."); + } + return key; + } + public boolean downloadRedirectEnabled() { String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); if ("true".equalsIgnoreCase(optionValue)) { @@ -917,12 +910,61 @@ public String generateTemporaryS3UploadUrl() throws IOException { return urlString; } + public JsonObjectBuilder generateTemporaryS3UploadUrls(long fileSize) { + + long chunkSize=512l; + JsonObjectBuilder response = Json.createObjectBuilder(); + try { + key = getMainFileKey(); + java.util.Date expiration = new java.util.Date(); + long msec = expiration.getTime(); + msec += 60 * 1000 * getUrlExpirationMinutes(); + expiration.setTime(msec); + InitiateMultipartUploadRequest initiationRequest = new InitiateMultipartUploadRequest(bucketName, key); + InitiateMultipartUploadResult initiationResponse = s3.initiateMultipartUpload(initiationRequest); + String uploadId = initiationResponse.getUploadId(); + for(int i=1;i<=(fileSize/chunkSize) + (fileSize%chunkSize > 0 ? 1: 0);i++) { + GeneratePresignedUrlRequest uploadPartUrlRequest = + new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); + uploadPartUrlRequest.addRequestParameter("uploadId", uploadId); + uploadPartUrlRequest.addRequestParameter("partNumber", Integer.toString(i)); + + //Require user to add this header to indicate a temporary file + // generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + + URL presignedUrl; + try { + presignedUrl = s3.generatePresignedUrl(uploadPartUrlRequest); + } catch (SdkClientException sce) { + logger.warning("SdkClientException generating temporary S3 url for "+key+" ("+sce.getMessage()+")"); + presignedUrl = null; + } + String urlString = null; + if (presignedUrl != null) { + String endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url"); + String proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url"); + if(proxy!=null) { + urlString = presignedUrl.toString().replace(endpoint, proxy); + } else { + urlString = presignedUrl.toString(); + } + } + response.add(Integer.toString(i), urlString); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return response; + } + + int getUrlExpirationMinutes() { String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); if (optionValue != null) { Integer num; try { - num = new Integer(optionValue); + num = Integer.parseInt(optionValue); } catch (NumberFormatException ex) { num = null; } @@ -933,42 +975,85 @@ int getUrlExpirationMinutes() { return 60; } - private void readSettings() { - /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html - */ - s3CEUrl = System.getProperty("dataverse.files." + this.driverId + ".custom-endpoint-url", ""); - /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. - */ - s3CERegion = System.getProperty("dataverse.files." + this.driverId + ".custom-endpoint-region", "dataverse"); - /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".path-style-access", "false")); - /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".payload-signing","false")); - /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. - */ - s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".chunked-encoding","true")); - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. - */ - s3profile = System.getProperty("dataverse.files." + this.driverId + ".profile","default"); - - bucketName = System.getProperty("dataverse.files." + this.driverId + ".bucket-name"); - } + private static String getBucketName(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".bucket-name"); + } + private static TransferManager getTransferManager(String driverId) { + if(driverTMMap.containsKey(driverId)) { + return driverTMMap.get(driverId); + } else { + // building a TransferManager instance to support multipart uploading for files over 4gb. + TransferManager manager = TransferManagerBuilder.standard() + .withS3Client(getClient(driverId)) + .build(); + driverTMMap.put(driverId, manager); + return manager; + } + } + + + private static AmazonS3 getClient(String driverId) { + if(driverClientMap.containsKey(driverId)) { + return driverClientMap.get(driverId); + } else { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false")); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false")); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true")); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); + + s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); + // let's build the client :-) + AmazonS3 client = s3CB.build(); + driverClientMap.put(driverId, client); + return client; + } + } + public void removeTempTag() throws IOException { if (!(dvObject instanceof DataFile)) { logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); @@ -996,4 +1081,24 @@ public void removeTempTag() throws IOException { } + public static void abortMultipartUpload(Dataset owner, String storageIdentifier, String uploadId) throws IOException { + String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String driverId = info[0]; + AmazonS3 s3Client = getClient(driverId); + String bucketName = getBucketName(driverId); + String key = getMainFileKey(owner, storageIdentifier); + AbortMultipartUploadRequest req = new AbortMultipartUploadRequest(bucketName, key, uploadId); + s3Client.abortMultipartUpload(req); + } + + public static void completeMultipartUpload(Dataset owner, String storageIdentifier, String uploadId, List etags) throws IOException { + String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String driverId = info[0]; + AmazonS3 s3Client = getClient(driverId); + String bucketName = getBucketName(driverId); + String key = getMainFileKey(owner, storageIdentifier); + CompleteMultipartUploadRequest req = new CompleteMultipartUploadRequest(bucketName, key, uploadId, etags); + s3Client.completeMultipartUpload(req); + } + } From 61448a8d6a04a3a017a67fa19d2861c9b0d394fa Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 Apr 2020 12:04:16 -0400 Subject: [PATCH 003/189] update error msgs --- .../edu/harvard/iq/dataverse/api/Datasets.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 52661e0a152..d3c9d342812 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1529,7 +1529,7 @@ public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam( try { canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + logger.info("Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); } if (!canUpdateDataset) { return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); @@ -1567,7 +1567,7 @@ public Response abortMPUpload(@PathParam("id") String idSupplied, @QueryParam("s try { canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + logger.info("Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); } if (!canUpdateDataset) { return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); @@ -1601,13 +1601,13 @@ public Response completeMPUpload(String partETagBody, @PathParam("id") String id } } catch (JsonException je) { logger.info("Unable to parse eTags from: " + partETagBody); - throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); } boolean canUpdateDataset = false; try { canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + logger.info("Exception thrown while trying to figure out permissions while completing upload for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); } if (!canUpdateDataset) { return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); @@ -1615,11 +1615,11 @@ public Response completeMPUpload(String partETagBody, @PathParam("id") String id try { S3AccessIO.completeMultipartUpload(dataset, storageidentifier, uploadId, eTagList); } catch (IOException io) { - logger.warning("Multipart upload abort failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning("Multipart upload completion failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " dataset Id: " + dataset.getId()); logger.warning(io.getMessage()); - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); } - return ok("Multipart Upload aborted"); + return ok("Multipart Upload completed"); } catch (WrappedResponse wr) { return wr.getResponse(); } From cd7e34544cfbaeb1204ca65d5f01a34ecb85517c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 Apr 2020 16:52:47 -0400 Subject: [PATCH 004/189] try test update --- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index f92b44862e2..41916da9de7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -80,12 +80,12 @@ void keyNullstorageIdNullOrEmpty_getMainFileKey() throws IOException { // given dataFile.setStorageIdentifier(null); // when & then - assertThrows(FileNotFoundException.class, () -> {dataFileAccess.getMainFileKey(); }); + assertThrows(FileNotFoundException.class, () -> {S3AccessIO.getMainFileKey(dataSet, dataFile.getStorageIdentifier()); }); // given dataFile.setStorageIdentifier(""); // when & then - assertThrows(FileNotFoundException.class, () -> {dataFileAccess.getMainFileKey(); }); + assertThrows(FileNotFoundException.class, () -> {S3AccessIO.getMainFileKey(dataSet, dataFile.getStorageIdentifier()); }); } @Test From 8b88b393bdedfc24f81e0925895f5fa36a01e33d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 Apr 2020 18:10:54 -0400 Subject: [PATCH 005/189] Restore driverId check in getMainKey to pass tests Nominally useful if code ever changes the storageIdentifier of the dvObject after the S3AccessIO instance is created, but real code shouldn't do that :-) --- .../harvard/iq/dataverse/dataaccess/S3AccessIO.java | 12 ++++++------ .../iq/dataverse/dataaccess/S3AccessIOTest.java | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 08cce80ad96..7441c6a72e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -780,12 +780,12 @@ String getMainFileKey() throws IOException { if (key == null) { DataFile df = this.getDataFile(); // TODO: (?) - should we worry here about the datafile having null for the owner here? - key = getMainFileKey(df.getOwner(), df.getStorageIdentifier()); + key = getMainFileKey(df.getOwner(), df.getStorageIdentifier(), driverId); } return key; } - static String getMainFileKey(Dataset owner, String storageIdentifier) throws IOException { + static String getMainFileKey(Dataset owner, String storageIdentifier, String driverId) throws IOException { String key = null; // or about the owner dataset having null for the authority and/or identifier? // we should probably check for that and throw an exception. (unless we are @@ -796,14 +796,14 @@ static String getMainFileKey(Dataset owner, String storageIdentifier) throws IOE throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } - if (storageIdentifier.indexOf("://")>0) { + if (storageIdentifier.indexOf(driverId + "://")>0) { //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). //String bucketName = storageIdentifier.substring(driverId.length() + 3, storageIdentifier.lastIndexOf(":")); key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } else { - throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object."); + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object associated with driver: ." + driverId); } return key; } @@ -1086,7 +1086,7 @@ public static void abortMultipartUpload(Dataset owner, String storageIdentifier, String driverId = info[0]; AmazonS3 s3Client = getClient(driverId); String bucketName = getBucketName(driverId); - String key = getMainFileKey(owner, storageIdentifier); + String key = getMainFileKey(owner, storageIdentifier, driverId); AbortMultipartUploadRequest req = new AbortMultipartUploadRequest(bucketName, key, uploadId); s3Client.abortMultipartUpload(req); } @@ -1096,7 +1096,7 @@ public static void completeMultipartUpload(Dataset owner, String storageIdentifi String driverId = info[0]; AmazonS3 s3Client = getClient(driverId); String bucketName = getBucketName(driverId); - String key = getMainFileKey(owner, storageIdentifier); + String key = getMainFileKey(owner, storageIdentifier, driverId); CompleteMultipartUploadRequest req = new CompleteMultipartUploadRequest(bucketName, key, uploadId, etags); s3Client.completeMultipartUpload(req); } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index 41916da9de7..1f118a0ea68 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -80,16 +80,16 @@ void keyNullstorageIdNullOrEmpty_getMainFileKey() throws IOException { // given dataFile.setStorageIdentifier(null); // when & then - assertThrows(FileNotFoundException.class, () -> {S3AccessIO.getMainFileKey(dataSet, dataFile.getStorageIdentifier()); }); + assertThrows(FileNotFoundException.class, () -> {dataFileAccess.getMainFileKey(); }); // given dataFile.setStorageIdentifier(""); // when & then - assertThrows(FileNotFoundException.class, () -> {S3AccessIO.getMainFileKey(dataSet, dataFile.getStorageIdentifier()); }); + assertThrows(FileNotFoundException.class, () -> {dataFileAccess.getMainFileKey(); }); } @Test - void keyNullstorageIdNull_getMainFileKey() throws IOException { + void keyNullstorageIdInvalid_getMainFileKey() throws IOException { // given dataFile.setStorageIdentifier("invalid://abcd"); // when & then From 7b91c07c692f8ced2fd99320edb8b0ff16a9519c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Apr 2020 11:46:49 -0400 Subject: [PATCH 006/189] remove unused imports --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d3c9d342812..7f16c61b703 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -78,10 +78,8 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; From e707519a1362707c6337de8c8f0859f41f4a1c0b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Apr 2020 14:47:09 -0400 Subject: [PATCH 007/189] report the exception message when can't access bucket --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 7441c6a72e1..474b0d96bfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -125,7 +125,7 @@ public void open(DataAccessOption... options) throws IOException { throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets."); } } catch (SdkClientException sce) { - throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?)"); + throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage()); } DataAccessRequest req = this.getRequest(); From dab4af6a255699171de67628ba95b91115426971 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Apr 2020 15:40:34 -0400 Subject: [PATCH 008/189] typo on comparison --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 474b0d96bfa..51e7141eab5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -796,14 +796,14 @@ static String getMainFileKey(Dataset owner, String storageIdentifier, String dri throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } - if (storageIdentifier.indexOf(driverId + "://")>0) { + if (storageIdentifier.indexOf(driverId + "://")>=0) { //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). //String bucketName = storageIdentifier.substring(driverId.length() + 3, storageIdentifier.lastIndexOf(":")); key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } else { - throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object associated with driver: ." + driverId); + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object associated with driver: " + driverId); } return key; } From d322809485777d92dd3fbbbab0e284bfe97bf996 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Apr 2020 17:22:20 -0400 Subject: [PATCH 009/189] IQSS/6829 - account for file failures --- src/main/webapp/editFilesFragment.xhtml | 2 +- src/main/webapp/resources/js/fileupload.js | 23 ++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 04a6d8fd5f1..0fda62fb87c 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -95,7 +95,7 @@ $(document).ready(function () { uploadWidgetDropMsg(); - setupDirectUpload(#{EditDatafilesPage.directUploadEnabled()}, #{EditDatafilesPage.workingVersion.dataset.id}); + setupDirectUpload(#{EditDatafilesPage.directUploadEnabled()}); }); //]]> diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 126d5d47c87..7c368c84e4f 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -1,6 +1,6 @@ var fileList = []; var observer2=null; -var datasetId=null; +var directUploadEnabled=false; //How many files have started being processed but aren't yet being uploaded var filesInProgress=0; //The # of the current file being processed (total number of files for which upload has at least started) @@ -17,9 +17,9 @@ var finishFile = (function () { })(); -function setupDirectUpload(enabled, theDatasetId) { +function setupDirectUpload(enabled) { if(enabled) { - datasetId=theDatasetId; + directUploadEnabled=true; $('.ui-fileupload-upload').hide(); $('.ui-fileupload-cancel').hide(); //Catch files entered via upload dialog box. Since this 'select' widget is replaced by PF, we need to add a listener again when it is replaced @@ -28,7 +28,7 @@ function setupDirectUpload(enabled, theDatasetId) { fileInput.addEventListener('change', function(event) { fileList=[]; for(var i=0;i Date: Fri, 24 Apr 2020 17:41:54 -0400 Subject: [PATCH 010/189] remove ~duplicate code --- src/main/webapp/resources/js/fileupload.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 7c368c84e4f..c66df2049aa 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -242,9 +242,6 @@ function uploadFailure(jqXHR, upid, filename) { // but others, (Firefox) don't support this. The calls below retrieve the status and other info // from the call stack instead (arguments to the fail() method that calls onerror() that calls this function - //Update count of files processed so that direct uploads will complete correctly - finishFile(); - //Retrieve the error number (status) and related explanation (statusText) var status = null; var statusText =null; From b0557aa374c2ec20fc68c521c6641f821685811a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 27 Apr 2020 10:18:10 -0400 Subject: [PATCH 011/189] IQSS-6829 - avoid race with 2+ files uploading and the dataset pid being set multiple times. --- .../harvard/iq/dataverse/EditDatafilesPage.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index e0c4d3d2357..481e8400513 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -480,6 +480,13 @@ public String init() { // that the dataset id is mandatory... But 404 will do for now. return permissionsWrapper.notFound(); } + + //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. + if ( isEmpty(dataset.getIdentifier()) && directUploadEnabled() ) { + CommandContext ctxt = commandEngine.getContext(); + GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); + dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); + } this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); @@ -1745,12 +1752,7 @@ public String getRsyncScriptFilename() { public void requestDirectUploadUrl() { - //Need to assign an identifier at this point if direct upload is used. - if ( isEmpty(dataset.getIdentifier()) ) { - CommandContext ctxt = commandEngine.getContext(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); - } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); if(s3io == null) { From beb419ee143bc974ba2008c013e55b859c2d4535 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 27 Apr 2020 11:47:38 -0400 Subject: [PATCH 012/189] IQSS/6829 - create mode uses DatasetPage - initialize identifier there for directUpload case --- .../edu/harvard/iq/dataverse/DatasetPage.java | 16 ++++++++++++++-- .../harvard/iq/dataverse/EditDatafilesPage.java | 11 +---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index cb5f9896a9d..0263ebbc428 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; @@ -50,6 +51,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; +import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; + import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; @@ -1763,6 +1766,10 @@ public void updateOwnerDataverse() { } } + public boolean directUploadEnabled() { + return Boolean.getBoolean("dataverse.files." + this.dataset.getDataverseContext().getEffectiveStorageDriverId() + ".upload-redirect"); + } + private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes @@ -1930,14 +1937,19 @@ private String init(boolean initFull) { dataset.setOwner(dataverseService.find(ownerId)); dataset.setProtocol(protocol); dataset.setAuthority(authority); - //Wait until the create command before actually getting an identifier if (dataset.getOwner() == null) { return permissionsWrapper.notFound(); } else if (!permissionService.on(dataset.getOwner()).has(Permission.AddDataset)) { return permissionsWrapper.notAuthorized(); } - + //Wait until the create command before actually getting an identifier, except if we're using directUpload + //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. + if ( isEmpty(dataset.getIdentifier()) && directUploadEnabled() ) { + CommandContext ctxt = commandEngine.getContext(); + GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); + dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); + } dataverseTemplates.addAll(dataverseService.find(ownerId).getTemplates()); if (!dataverseService.find(ownerId).isTemplateRoot()) { dataverseTemplates.addAll(dataverseService.find(ownerId).getParentTemplates()); diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 481e8400513..b8fc7f0716f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -346,10 +346,6 @@ public boolean doesSessionUserHaveDataSetPermission(Permission permissionToCheck return hasPermission; } - public boolean directUploadEnabled() { - return Boolean.getBoolean("dataverse.files." + this.dataset.getDataverseContext().getEffectiveStorageDriverId() + ".upload-redirect"); - } - public void reset() { // ? } @@ -481,12 +477,7 @@ public String init() { return permissionsWrapper.notFound(); } - //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. - if ( isEmpty(dataset.getIdentifier()) && directUploadEnabled() ) { - CommandContext ctxt = commandEngine.getContext(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); - } + this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); From 8400c2071ab9f4f6f2a273d037b0c90752483ea8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 27 Apr 2020 12:21:13 -0400 Subject: [PATCH 013/189] move directUploadEnabled to systemconfig used in DatasetPage and editFilesFragment.xhtml --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 6 +----- .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 5 +++++ src/main/webapp/editFilesFragment.xhtml | 3 +-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0263ebbc428..060fe08d453 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1766,10 +1766,6 @@ public void updateOwnerDataverse() { } } - public boolean directUploadEnabled() { - return Boolean.getBoolean("dataverse.files." + this.dataset.getDataverseContext().getEffectiveStorageDriverId() + ".upload-redirect"); - } - private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes @@ -1945,7 +1941,7 @@ private String init(boolean initFull) { } //Wait until the create command before actually getting an identifier, except if we're using directUpload //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. - if ( isEmpty(dataset.getIdentifier()) && directUploadEnabled() ) { + if ( isEmpty(dataset.getIdentifier()) && systemConfig.directUploadEnabled(dataset) ) { CommandContext ctxt = commandEngine.getContext(); GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 1012cdc37c0..0286fb4b6b9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -2,6 +2,7 @@ import com.ocpsoft.pretty.PrettyContext; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; @@ -1047,4 +1048,8 @@ public boolean isDatafileValidationOnPublishEnabled() { boolean safeDefaultIfKeyNotFound = true; return settingsService.isTrueForKey(SettingsServiceBean.Key.FileValidationOnPublishEnabled, safeDefaultIfKeyNotFound); } + + public boolean directUploadEnabled(Dataset dataset) { + return Boolean.getBoolean("dataverse.files." + dataset.getDataverseContext().getEffectiveStorageDriverId() + ".upload-redirect"); + } } diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 0fda62fb87c..4a144443c2b 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -95,7 +95,6 @@ $(document).ready(function () { uploadWidgetDropMsg(); - setupDirectUpload(#{EditDatafilesPage.directUploadEnabled()}); }); //]]> @@ -127,7 +126,7 @@ Date: Mon, 27 Apr 2020 13:02:34 -0400 Subject: [PATCH 014/189] typo/fix method calls --- src/main/webapp/editFilesFragment.xhtml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 4a144443c2b..18937c0014b 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -95,6 +95,7 @@ $(document).ready(function () { uploadWidgetDropMsg(); + setupDirectUpload(#{systemConfig.directUploadEnabled(EditDatafilesPage.dataset)}); }); //]]> @@ -126,7 +127,7 @@ Date: Mon, 27 Apr 2020 15:31:17 -0400 Subject: [PATCH 015/189] set minimum part size --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 51e7141eab5..fb4e7bbf101 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -912,7 +912,7 @@ public String generateTemporaryS3UploadUrl() throws IOException { public JsonObjectBuilder generateTemporaryS3UploadUrls(long fileSize) { - long chunkSize=512l; + long chunkSize=5*1024*1024l; //5 MB minimum part size for AWS S3 (confirmed that they use base 2 definitions) JsonObjectBuilder response = Json.createObjectBuilder(); try { key = getMainFileKey(); From f844914884da53c4aeeebcae0611a8a4c47e2b2f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 27 Apr 2020 15:49:46 -0400 Subject: [PATCH 016/189] add convenience urls --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 5 +++-- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 9d65f70df93..ad4eb65373d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1544,8 +1544,9 @@ public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam( JsonObjectBuilder urlsBuilder = null; String storageIdentifier = null; try { - urlsBuilder = s3io.generateTemporaryS3UploadUrls(fileSize); - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + urlsBuilder = s3io.generateTemporaryS3UploadUrls(dataset.getId(), storageIdentifier, fileSize); + } catch (IOException io) { logger.warning(io.getMessage()); throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index fb4e7bbf101..b78850195e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -910,7 +910,7 @@ public String generateTemporaryS3UploadUrl() throws IOException { return urlString; } - public JsonObjectBuilder generateTemporaryS3UploadUrls(long fileSize) { + public JsonObjectBuilder generateTemporaryS3UploadUrls(long datasetId, String storageIdentifier, long fileSize) { long chunkSize=5*1024*1024l; //5 MB minimum part size for AWS S3 (confirmed that they use base 2 definitions) JsonObjectBuilder response = Json.createObjectBuilder(); @@ -951,7 +951,9 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(long fileSize) { } response.add(Integer.toString(i), urlString); } - } catch (IOException e) { + response.add("abort", "/api/datasets/" + datasetId + "/mpupload?uploadid=" + uploadId + "&storageidentifier=" + storageIdentifier ); + response.add("complete", "/api/datasets/" + datasetId + "/mpupload?uploadid=" + uploadId + "&storageidentifier=" + storageIdentifier ); + } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } From db00c6f0371b5f2abd7cadb54f2c64380e2be786 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 29 Apr 2020 15:22:58 -0400 Subject: [PATCH 017/189] add dv_status=temp tag in multipart uploads --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index b78850195e9..20e4d0659df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -921,6 +921,7 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(long datasetId, String st msec += 60 * 1000 * getUrlExpirationMinutes(); expiration.setTime(msec); InitiateMultipartUploadRequest initiationRequest = new InitiateMultipartUploadRequest(bucketName, key); + initiationRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); InitiateMultipartUploadResult initiationResponse = s3.initiateMultipartUpload(initiationRequest); String uploadId = initiationResponse.getUploadId(); for(int i=1;i<=(fileSize/chunkSize) + (fileSize%chunkSize > 0 ? 1: 0);i++) { @@ -928,10 +929,6 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(long datasetId, String st new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); uploadPartUrlRequest.addRequestParameter("uploadId", uploadId); uploadPartUrlRequest.addRequestParameter("partNumber", Integer.toString(i)); - - //Require user to add this header to indicate a temporary file - // generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); - URL presignedUrl; try { presignedUrl = s3.generatePresignedUrl(uploadPartUrlRequest); From f473fabc0653faab89688df0bccad8aa87761d3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Apr 2020 17:23:53 -0400 Subject: [PATCH 018/189] return no content for delete call --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ad4eb65373d..50de02e37a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1583,7 +1583,7 @@ public Response abortMPUpload(@PathParam("id") String idSupplied, @QueryParam("s logger.warning(io.getMessage()); throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); } - return ok("Multipart Upload aborted"); + return Response.noContent().build(); } catch (WrappedResponse wr) { return wr.getResponse(); } From 088efb0b022d21cf347c38fe6529426121995226 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Apr 2020 17:24:53 -0400 Subject: [PATCH 019/189] add request for upload in parts method --- .../iq/dataverse/EditDatafilesPage.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b8fc7f0716f..0818797e7a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -65,6 +65,7 @@ import org.primefaces.model.file.UploadedFile; import javax.json.Json; import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; import javax.json.JsonArray; import javax.json.JsonReader; import org.apache.commons.httpclient.HttpClient; @@ -1762,6 +1763,29 @@ public void requestDirectUploadUrl() { PrimeFaces.current().executeScript("uploadFileDirectly('" + url + "','" + storageIdentifier + "')"); } +public void requestMultipartDirectUploadUrls(long fileSize) { + + + + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if(s3io == null) { + FacesContext.getCurrentInstance().addMessage(uploadComponentId, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadWarning"), "Direct upload not supported for this dataset")); + } + JsonObjectBuilder urls = null; + String storageIdentifier = null; + try { + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + urls = s3io.generateTemporaryS3UploadUrls(dataset.getId(), storageIdentifier, fileSize); + + } catch (IOException io) { + logger.warning(io.getMessage()); + FacesContext.getCurrentInstance().addMessage(uploadComponentId, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadWarning"), "Issue in connecting to S3 store for direct upload")); + } + + PrimeFaces.current().executeScript("uploadFileDirectlyInParts('" + urls.build().toString() + "','" + storageIdentifier + "','" + fileSize + "')"); + } + + public void uploadFinished() { // This method is triggered from the page, by the Date: Fri, 1 May 2020 12:11:06 -0400 Subject: [PATCH 020/189] handle net:ERR_NETWORK_CHANGED errors w/o 'undefined' error --- src/main/webapp/resources/js/fileupload.js | 38 +++++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index c66df2049aa..6fdec2b3a6c 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -1,4 +1,24 @@ +class Datafile { + constructor(id, file) { + this.fileToUpload = file; + this.localId = id; + } + + setStorageIdentifier(si) { + this.storageIdentifier = si; + } + + getStorageIdentifier() { + return this.storageIdentifier; + } + + + +} + var fileList = []; + + var observer2=null; var directUploadEnabled=false; //How many files have started being processed but aren't yet being uploaded @@ -71,7 +91,11 @@ function queueFileForDirectUpload(file) { //Fire off the first 4 to start (0,1,2,3) if(filesInProgress < 4 ) { filesInProgress= filesInProgress+1; - requestDirectUploadUrl(); + if(file.size > 5*1024*1024) { + requestMultipartDirectUploadUrls(fike,size); + } else { + requestDirectUploadUrl(); + } } } @@ -241,9 +265,9 @@ function uploadFailure(jqXHR, upid, filename) { // On some browsers, the status is available in an event: window.event.srcElement.status // but others, (Firefox) don't support this. The calls below retrieve the status and other info // from the call stack instead (arguments to the fail() method that calls onerror() that calls this function - + //Retrieve the error number (status) and related explanation (statusText) - var status = null; + var status = 0; var statusText =null; // There are various metadata available about which file the error pertains to @@ -262,10 +286,14 @@ function uploadFailure(jqXHR, upid, filename) { id = upid; name=filename; } else { - status=arguments.callee.caller.caller.arguments[1].jqXHR.status; - statusText = arguments.callee.caller.caller.arguments[1].jqXHR.statusText; + try { name = arguments.callee.caller.caller.arguments[1].files[0].name; id = arguments.callee.caller.caller.arguments[1].files[0].row[0].attributes.upid.value; + status=arguments.callee.caller.caller.arguments[1].jqXHR.status; + statusText = arguments.callee.caller.caller.arguments[1].jqXHR.statusText; + } catch { + console.log("Unable to determine status for error - assuming network issue"); + } } //statusText for error 0 is the unhelpful 'error' From ef6ef134b2d93ab6d25e1bf9d67da4fe40d3b100 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 May 2020 12:19:57 -0400 Subject: [PATCH 021/189] remove draft code from other branch --- src/main/webapp/resources/js/fileupload.js | 26 +--------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 6fdec2b3a6c..f2e88f81f0f 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -1,24 +1,4 @@ -class Datafile { - constructor(id, file) { - this.fileToUpload = file; - this.localId = id; - } - - setStorageIdentifier(si) { - this.storageIdentifier = si; - } - - getStorageIdentifier() { - return this.storageIdentifier; - } - - - -} - var fileList = []; - - var observer2=null; var directUploadEnabled=false; //How many files have started being processed but aren't yet being uploaded @@ -91,11 +71,7 @@ function queueFileForDirectUpload(file) { //Fire off the first 4 to start (0,1,2,3) if(filesInProgress < 4 ) { filesInProgress= filesInProgress+1; - if(file.size > 5*1024*1024) { - requestMultipartDirectUploadUrls(fike,size); - } else { - requestDirectUploadUrl(); - } + requestDirectUploadUrl(); } } From a666a7b98af3961a299249c306fb5bb565a2b574 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 May 2020 12:38:08 -0400 Subject: [PATCH 022/189] handle dataverse change - keep GlobalId in direct upload case --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 060fe08d453..2c00089b7a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1757,8 +1757,14 @@ public void updateOwnerDataverse() { if (dataset.getOwner() != null && dataset.getOwner().getId() != null) { ownerId = dataset.getOwner().getId(); logger.info("New host dataverse id: "+ownerId); - // discard the dataset already created: + // discard the dataset already created + //If a global ID was already assigned, as is true for direct upload, keep it (if files were already uploaded, they are at the path corresponding to the existing global id) + GlobalId gid = dataset.getGlobalId(); dataset = new Dataset(); + if(gid!=null) { + dataset.setGlobalId(gid); + } + // initiate from scratch: (isolate the creation of a new dataset in its own method?) init(true); // rebuild the bred crumbs display: From 4bb1b660141f28353994146342b636b496b3e53f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 May 2020 17:09:25 -0400 Subject: [PATCH 023/189] IQSS/6881 delete temp files when cancelling dataset create --- .../edu/harvard/iq/dataverse/DatasetPage.java | 26 +++++- .../iq/dataverse/EditDatafilesPage.java | 88 ++++--------------- .../harvard/iq/dataverse/util/FileUtil.java | 61 +++++++++++++ src/main/webapp/dataset.xhtml | 2 +- 4 files changed, 102 insertions(+), 75 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 2c00089b7a7..add2fb58fe3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -59,6 +60,9 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -246,6 +250,8 @@ public enum DisplayMode { private Long versionId; private int selectedTabIndex; private List newFiles = new ArrayList<>(); + private List uploadedFiles = new ArrayList<>(); + private DatasetVersion workingVersion; private DatasetVersion clone; private int releaseRadio = 1; @@ -1114,6 +1120,14 @@ public void setNewFiles(List newFiles) { this.newFiles = newFiles; } + public List getUploadedFiles() { + return uploadedFiles; + } + + public void setUploadedFiles(List uploadedFiles) { + this.uploadedFiles = uploadedFiles; + } + public Dataverse getLinkingDataverse() { return linkingDataverse; } @@ -3553,9 +3567,19 @@ private String returnToDraftVersion(){ } public String cancel() { + //Files that have been finished and are now in the lower list on the page + for (DataFile newFile : newFiles) { + FileUtil.deleteTempFile(newFile, dataset, ingestService); + } + + //Files in the upload process but not yet finished + for (DataFile newFile : uploadedFiles) { + FileUtil.deleteTempFile(newFile, dataset, ingestService); + } + return returnToLatestVersion(); } - + private HttpClient getClient() { // TODO: // cache the http client? -- L.A. 4.0 alpha diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 0818797e7a9..fdd235c27c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -420,7 +420,7 @@ public void setVersionId(Long versionId) { this.versionId = versionId; } - public String initCreateMode(String modeToken, DatasetVersion version, List newFilesList, List selectedFileMetadatasList) { + public String initCreateMode(String modeToken, DatasetVersion version, List newFilesList, List uploadedFilesList, List selectedFileMetadatasList) { if (modeToken == null) { logger.fine("Request to initialize Edit Files page with null token (aborting)."); return null; @@ -441,7 +441,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, List(); + uploadedFiles = uploadedFilesList; selectedFiles = selectedFileMetadatasList; this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); @@ -940,7 +940,7 @@ public void deleteFiles() { removeDataFileFromList(dataset.getFiles(), markedForDelete.getDataFile()); removeDataFileFromList(newFiles, markedForDelete.getDataFile()); - deleteTempFile(markedForDelete.getDataFile()); + FileUtil.deleteTempFile(markedForDelete.getDataFile(), dataset, ingestService); // Also remove checksum from the list of newly uploaded checksums (perhaps odd // to delete and then try uploading the same file again, but it seems like it // should be allowed/the checksum list is part of the state to clean-up @@ -956,65 +956,6 @@ public void deleteFiles() { } } - private void deleteTempFile(DataFile dataFile) { - // Before we remove the file from the list and forget about - // it: - // The physical uploaded file is still sitting in the temporary - // directory. If it were saved, it would be moved into its - // permanent location. But since the user chose not to save it, - // we have to delete the temp file too. - // - // Eventually, we will likely add a dedicated mechanism - // for managing temp files, similar to (or part of) the storage - // access framework, that would allow us to handle specialized - // configurations - highly sensitive/private data, that - // has to be kept encrypted even in temp files, and such. - // But for now, we just delete the file directly on the - // local filesystem: - - try { - List generatedTempFiles = ingestService.listGeneratedTempFiles( - Paths.get(FileUtil.getFilesTempDirectory()), dataFile.getStorageIdentifier()); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - logger.fine("(Deleting generated thumbnail file " + generated.toString() + ")"); - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - String si = dataFile.getStorageIdentifier(); - if (si.contains("://")) { - //Direct upload files will already have a store id in their storageidentifier - //but they need to be associated with a dataset for the overall storagelocation to be calculated - //so we temporarily set the owner - if(dataFile.getOwner()!=null) { - logger.warning("Datafile owner was not null as expected"); - } - dataFile.setOwner(dataset); - //Use one StorageIO to get the storageLocation and then create a direct storage storageIO class to perform the delete - // (since delete is forbidden except for direct storage) - String sl = DataAccess.getStorageIO(dataFile).getStorageLocation(); - DataAccess.getDirectStorageIO(sl).delete(); - dataFile.setOwner(null); - } else { - //Temp files sent to this method have no prefix, not even "tmp://" - Files.delete(Paths.get(FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier())); - } - } catch (IOException ioEx) { - // safe to ignore - it's just a temp file. - logger.warning(ioEx.getMessage()); - if(dataFile.getStorageIdentifier().contains("://")) { - logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier()); - } else { - logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" - + dataFile.getStorageIdentifier()); - } - } - } - private void removeFileMetadataFromList(List fmds, FileMetadata fmToDelete) { Iterator fmit = fmds.iterator(); while (fmit.hasNext()) { @@ -1366,17 +1307,18 @@ private String returnToFileLandingPageAfterReplace(DataFile newFile) { public String cancel() { uploadInProgress = false; - if (mode == FileEditMode.SINGLE || mode == FileEditMode.SINGLE_REPLACE ) { - return returnToFileLandingPage(); - } //Files that have been finished and are now in the lower list on the page for (DataFile newFile : newFiles) { - deleteTempFile(newFile); + FileUtil.deleteTempFile(newFile, dataset, ingestService); } //Files in the upload process but not yet finished for (DataFile newFile : uploadedFiles) { - deleteTempFile(newFile); + FileUtil.deleteTempFile(newFile, dataset, ingestService); + } + + if (mode == FileEditMode.SINGLE || mode == FileEditMode.SINGLE_REPLACE ) { + return returnToFileLandingPage(); } if (workingVersion.getId() != null) { return returnToDraftVersion(); @@ -1633,7 +1575,7 @@ public void handleDropBoxUpload(ActionEvent event) { if(!uploadInProgress) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : datafiles) { - deleteTempFile(newFile); + FileUtil.deleteTempFile(newFile, dataset, ingestService); } } } @@ -1817,7 +1759,7 @@ public void uploadFinished() { if(uploadInProgress) { - uploadedFiles = new ArrayList<>(); + uploadedFiles.clear(); uploadInProgress = false; } // refresh the warning message below the upload component, if exists: @@ -2039,7 +1981,7 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { if(!uploadInProgress) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : dFileList) { - deleteTempFile(newFile); + FileUtil.deleteTempFile(newFile, dataset, ingestService); } } } @@ -2135,7 +2077,7 @@ public void handleExternalUpload() { if(!uploadInProgress) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : datafiles) { - deleteTempFile(newFile); + FileUtil.deleteTempFile(newFile, dataset, ingestService); } } } @@ -2206,7 +2148,7 @@ private String processUploadedFileList(List dFileList) { multipleDupesExisting = true; } // remove temp file - deleteTempFile(dataFile); + FileUtil.deleteTempFile(dataFile, dataset, ingestService); } else if (isFileAlreadyUploaded(dataFile)) { if (dupeFileNamesNew == null) { dupeFileNamesNew = dataFile.getFileMetadata().getLabel(); @@ -2215,7 +2157,7 @@ private String processUploadedFileList(List dFileList) { multipleDupesNew = true; } // remove temp file - deleteTempFile(dataFile); + FileUtil.deleteTempFile(dataFile, dataset, ingestService); } else { // OK, this one is not a duplicate, we want it. // But let's check if its filename is a duplicate of another diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 84307af2799..7b1b99ecb2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -35,6 +35,7 @@ import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.ingest.IngestReport; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import java.awt.image.BufferedImage; @@ -1774,5 +1775,65 @@ public static String getStorageIdentifierFromLocation(String location) { int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/"); return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1); } + + public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServiceBean ingestService) { + // Before we remove the file from the list and forget about + // it: + // The physical uploaded file is still sitting in the temporary + // directory. If it were saved, it would be moved into its + // permanent location. But since the user chose not to save it, + // we have to delete the temp file too. + // + // Eventually, we will likely add a dedicated mechanism + // for managing temp files, similar to (or part of) the storage + // access framework, that would allow us to handle specialized + // configurations - highly sensitive/private data, that + // has to be kept encrypted even in temp files, and such. + // But for now, we just delete the file directly on the + // local filesystem: + + try { + List generatedTempFiles = ingestService.listGeneratedTempFiles( + Paths.get(getFilesTempDirectory()), dataFile.getStorageIdentifier()); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + logger.fine("(Deleting generated thumbnail file " + generated.toString() + ")"); + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + String si = dataFile.getStorageIdentifier(); + if (si.contains("://")) { + //Direct upload files will already have a store id in their storageidentifier + //but they need to be associated with a dataset for the overall storagelocation to be calculated + //so we temporarily set the owner + if(dataFile.getOwner()!=null) { + logger.warning("Datafile owner was not null as expected"); + } + dataFile.setOwner(dataset); + //Use one StorageIO to get the storageLocation and then create a direct storage storageIO class to perform the delete + // (since delete is forbidden except for direct storage) + String sl = DataAccess.getStorageIO(dataFile).getStorageLocation(); + DataAccess.getDirectStorageIO(sl).delete(); + } else { + //Temp files sent to this method have no prefix, not even "tmp://" + Files.delete(Paths.get(FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier())); + } + } catch (IOException ioEx) { + // safe to ignore - it's just a temp file. + logger.warning(ioEx.getMessage()); + if(dataFile.getStorageIdentifier().contains("://")) { + logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier()); + } else { + logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" + + dataFile.getStorageIdentifier()); + } + } finally { + dataFile.setOwner(null); + } + } } diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 790676f2b3a..4aa9f01a069 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -72,7 +72,7 @@ - + From 45245b3ba7ed918a0d252266337136e55c3ece08 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 May 2020 11:28:00 -0400 Subject: [PATCH 024/189] IQSS 6881 cleanup temp files on cancel when creating dataset --- .../edu/harvard/iq/dataverse/DatasetPage.java | 23 ++- .../iq/dataverse/EditDatafilesPage.java | 45 +++-- .../harvard/iq/dataverse/util/FileUtil.java | 1 + src/main/webapp/dataset.xhtml | 4 +- src/main/webapp/resources/js/fileupload.js | 160 ++++++++++-------- 5 files changed, 131 insertions(+), 102 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index add2fb58fe3..4f082544aeb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -119,6 +119,7 @@ import javax.servlet.http.HttpServletResponse; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang3.mutable.MutableBoolean; import org.apache.commons.io.IOUtils; import org.primefaces.component.tabview.TabView; @@ -251,6 +252,7 @@ public enum DisplayMode { private int selectedTabIndex; private List newFiles = new ArrayList<>(); private List uploadedFiles = new ArrayList<>(); + private MutableBoolean uploadInProgress; private DatasetVersion workingVersion; private DatasetVersion clone; @@ -3565,19 +3567,34 @@ private String returnToDatasetOnly(){ private String returnToDraftVersion(){ return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&version=DRAFT" + "&faces-redirect=true"; } + + private String returnToParentDataverse(){ + return "/dataverse.xhtml?alias=" + dataset.getOwner().getAlias() + "&faces-redirect=true"; + } + public String cancel() { + return returnToLatestVersion(); + } + + public String cancelCreate() { + //Stop any uploads in progress (so that uploadedFiles doesn't change) + uploadInProgress.setValue(false); + + logger.info("Cancelling: " + newFiles.size() + " : " + uploadedFiles.size()); + //Files that have been finished and are now in the lower list on the page - for (DataFile newFile : newFiles) { + for (DataFile newFile : newFiles.toArray(new DataFile[0])) { FileUtil.deleteTempFile(newFile, dataset, ingestService); } //Files in the upload process but not yet finished - for (DataFile newFile : uploadedFiles) { + //ToDo - if files are added to uploadFiles after we access it, those files are not being deleted. With uploadInProgress being set false above, this should be a fairly rare race condition. + for (DataFile newFile : uploadedFiles.toArray(new DataFile[0])) { FileUtil.deleteTempFile(newFile, dataset, ingestService); } - return returnToLatestVersion(); + return returnToParentDataverse(); } private HttpClient getClient() { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index fdd235c27c8..f79d20b921e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -19,7 +19,6 @@ import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.engine.command.Command; -import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; @@ -28,7 +27,6 @@ import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestUtil; -import edu.harvard.iq.dataverse.search.FileView; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -37,16 +35,11 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; -import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; - import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -71,7 +64,6 @@ import org.apache.commons.httpclient.HttpClient; import org.apache.commons.io.IOUtils; import org.apache.commons.httpclient.methods.GetMethod; -import java.text.DateFormat; import java.util.Arrays; import java.util.Set; import java.util.logging.Level; @@ -80,6 +72,7 @@ import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletResponse; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.mutable.MutableBoolean; import org.primefaces.PrimeFaces; /** @@ -168,6 +161,9 @@ public enum FileEditMode { private Long maxFileUploadSizeInBytes = null; private Integer multipleUploadFilesLimit = null; + //MutableBoolean so it can be passed from DatasetPage, supporting DatasetPage.cancelCreate() + private MutableBoolean uploadInProgress = null; + private final int NUMBER_OF_SCROLL_ROWS = 25; private DataFile singleFile = null; @@ -420,7 +416,7 @@ public void setVersionId(Long versionId) { this.versionId = versionId; } - public String initCreateMode(String modeToken, DatasetVersion version, List newFilesList, List uploadedFilesList, List selectedFileMetadatasList) { + public String initCreateMode(String modeToken, DatasetVersion version, MutableBoolean inProgress, List newFilesList, List uploadedFilesList, List selectedFileMetadatasList) { if (modeToken == null) { logger.fine("Request to initialize Edit Files page with null token (aborting)."); return null; @@ -440,6 +436,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, List(); - uploadedFiles = new ArrayList<>(); + uploadedFiles = new ArrayList<>(); + uploadInProgress= new MutableBoolean(false); if (dataset.getId() != null){ // Set Working Version and Dataset by Datasaet Id and Version @@ -1306,7 +1304,7 @@ private String returnToFileLandingPageAfterReplace(DataFile newFile) { public String cancel() { - uploadInProgress = false; + uploadInProgress.setValue(false); //Files that have been finished and are now in the lower list on the page for (DataFile newFile : newFiles) { FileUtil.deleteTempFile(newFile, dataset, ingestService); @@ -1442,8 +1440,8 @@ private InputStream getDropBoxInputStream(String fileLink, GetMethod dropBoxMeth * @param event */ public void handleDropBoxUpload(ActionEvent event) { - if (!uploadInProgress) { - uploadInProgress = true; + if (uploadInProgress.isFalse()) { + uploadInProgress.setValue(true); } logger.fine("handleDropBoxUpload"); uploadComponentId = event.getComponent().getClientId(); @@ -1572,7 +1570,7 @@ public void handleDropBoxUpload(ActionEvent event) { } }*/ } - if(!uploadInProgress) { + if(uploadInProgress.isFalse()) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : datafiles) { FileUtil.deleteTempFile(newFile, dataset, ingestService); @@ -1595,7 +1593,7 @@ public void uploadStarted() { // (either through drag-and-drop or select menu). logger.fine("upload started"); - uploadInProgress = true; + uploadInProgress.setValue(true); } @@ -1758,9 +1756,9 @@ public void uploadFinished() { } - if(uploadInProgress) { + if(uploadInProgress.isTrue()) { uploadedFiles.clear(); - uploadInProgress = false; + uploadInProgress.setValue(false); } // refresh the warning message below the upload component, if exists: if (uploadComponentId != null) { @@ -1912,8 +1910,8 @@ private void handleReplaceFileUpload(String fullStorageLocation, */ public void handleFileUpload(FileUploadEvent event) throws IOException { - if (!uploadInProgress) { - uploadInProgress = true; + if (uploadInProgress.isFalse()) { + uploadInProgress.setValue(true); } if (event == null){ @@ -1978,7 +1976,7 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { uploadComponentId = event.getComponent().getClientId(); } - if(!uploadInProgress) { + if(uploadInProgress.isFalse()) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : dFileList) { FileUtil.deleteTempFile(newFile, dataset, ingestService); @@ -2004,8 +2002,8 @@ public void handleExternalUpload() { int lastColon = fullStorageIdentifier.lastIndexOf(':'); String storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); - if (!uploadInProgress) { - uploadInProgress = true; + if (uploadInProgress.isFalse()) { + uploadInProgress.setValue(true); } logger.fine("handleExternalUpload"); @@ -2074,7 +2072,7 @@ public void handleExternalUpload() { // ----------------------------------------------------------- uploadWarningMessage = processUploadedFileList(datafiles); } - if(!uploadInProgress) { + if(uploadInProgress.isFalse()) { logger.warning("Upload in progress cancelled"); for (DataFile newFile : datafiles) { FileUtil.deleteTempFile(newFile, dataset, ingestService); @@ -2103,7 +2101,6 @@ public void handleExternalUpload() { private String dupeFileNamesNew = null; private boolean multipleDupesExisting = false; private boolean multipleDupesNew = false; - private boolean uploadInProgress = false; private String processUploadedFileList(List dFileList) { if (dFileList == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 7b1b99ecb2b..bc7a42042d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1777,6 +1777,7 @@ public static String getStorageIdentifierFromLocation(String location) { } public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServiceBean ingestService) { + logger.info("Deleting " + dataFile.getStorageIdentifier()); // Before we remove the file from the list and forget about // it: // The physical uploaded file is still sitting in the temporary diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 4aa9f01a069..bd07f0df793 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -72,7 +72,7 @@ - + @@ -755,7 +755,7 @@ - +