Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix S3 provider priorities #10004

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
84 changes: 57 additions & 27 deletions src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.amazonaws.ClientConfiguration;
import com.amazonaws.HttpMethod;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
Expand Down Expand Up @@ -57,9 +58,11 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.Random;
import java.util.function.Predicate;
import java.util.logging.Logger;
Expand Down Expand Up @@ -108,14 +111,13 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) {
if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) {
logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified");
}
//Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened.
try {
if (bucketName == null || !s3.doesBucketExistV2(bucketName)) {
throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
}
} catch (SdkClientException sce) {
throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage());
}

// FWIW: There used to be a check here to see if the bucket exists.
// It was very redundant (checking every time we access any file) and didn't do
// much but potentially make the failure (in the unlikely case a bucket doesn't
// exist/just disappeared) happen slightly earlier (here versus at the first
// file/metadata access).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call.

} catch (Exception e) {
throw new AmazonClientException(
"Cannot instantiate a S3 client; check your AWS credentials and region",
Expand Down Expand Up @@ -1181,29 +1183,57 @@ private static AmazonS3 getClient(String driverId) {
// Boolean is inverted, otherwise setting dataverse.files.<id>.chunked-encoding=false would result in leaving Chunked Encoding enabled
s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding);

/**
* Pass in a string value if this storage driver should use a non-default AWS S3 profile.
* The default is "default" which should work when only one profile exists.
/** Configure credentials for the S3 client. There are multiple mechanisms available.
* Role-based/instance credentials are globally defined while the other mechanisms (profile, static)
* are defined per store. The logic below assures that
* * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials.
* * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials,
* * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and
* * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found.
*/
String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default");
ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);

// Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env
// vars or system properties to provide these, but use the secrets config source provided by Payara.
AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
new BasicAWSCredentials(
config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""),
config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("")
));

//Add role-based provider as in the default provider chain
InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance();
ArrayList<AWSCredentialsProvider> providers = new ArrayList<>();

String s3profile = System.getProperty("dataverse.files." + driverId + ".profile");
boolean allowInstanceCredentials = true;
// Assume that instance credentials should not be used if the profile is
// actually set for this store or if static creds are provided (below).
if (s3profile != null) {
allowInstanceCredentials = false;
}
// Try to retrieve credentials via Microprofile Config API, too. For production
// use, you should not use env vars or system properties to provide these, but
// use the secrets config source provided by Payara.
Optional<String> accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class);
Optional<String> secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class);
if (accessKey.isPresent() && secretKey.isPresent()) {
allowInstanceCredentials = false;
AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
new BasicAWSCredentials(
accessKey.get(),
secretKey.get()));
providers.add(staticCredentials);
} else if (s3profile == null) {
//Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred).
s3profile = "default";
}
if (s3profile != null) {
providers.add(new ProfileCredentialsProvider(s3profile));
}

if (allowInstanceCredentials) {
// Add role-based provider as in the default provider chain
providers.add(InstanceProfileCredentialsProvider.getInstance());
}
// Add all providers to chain - the first working provider will be used
// (role-based is first in the default cred provider chain, so we're just
// (role-based is first in the default cred provider chain (if no profile or
// static creds are explicitly set for the store), so we're just
// reproducing that, then profile, then static credentials as the fallback)
AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials);

// As the order is the reverse of how we added providers, we reverse the list here
Collections.reverse(providers);
poikilotherm marked this conversation as resolved.
Show resolved Hide resolved
AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers);
s3CB.setCredentials(providerChain);

// let's build the client :-)
AmazonS3 client = s3CB.build();
driverClientMap.put(driverId, client);
Expand Down