Skip to content

Commit 79d267c

Browse files
authored
Merge pull request #30 from trocco-io/feat/aws-credential-utils
Sync config with s3 input for aws authmethod and http_proxy
2 parents 43f43d5 + bda4c1a commit 79d267c

File tree

5 files changed

+250
-29
lines changed

5 files changed

+250
-29
lines changed

README.md

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,62 @@
2525
- **tmp_path**: temporary file directory. If null, it is associated with the default FileSystem. (string, default: null)
2626
- **tmp_path_prefix**: prefix of temporary files (string, default: 'embulk-output-s3-')
2727
- **canned_acl**: canned access control list for created objects ([enum](#cannedaccesscontrollist), default: null)
28-
- **proxy_host**: proxy host to use when accessing AWS S3 via proxy. (string, default: null )
29-
- **proxy_port**: proxy port to use when accessing AWS S3 via proxy. (string, default: null )
28+
- [Deprecated] **proxy_host**: proxy host to use when accessing AWS S3 via proxy. (string, default: null )
29+
- [Deprecated] **proxy_port**: proxy port to use when accessing AWS S3 via proxy. (string, default: null )
30+
- **http_proxy**: http proxy configuration to use when accessing AWS S3 via http proxy. (optional)
31+
- **host**: proxy host (string, required)
32+
- **port**: proxy port (int, optional)
33+
- **https**: use https or not (boolean, default true)
34+
- **user**: proxy user (string, optional)
35+
- **password**: proxy password (string, optional)
36+
37+
- **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
38+
39+
- "basic": uses access_key_id and secret_access_key to authenticate.
40+
41+
- **access_key_id**: AWS access key ID (string, required)
42+
43+
- **secret_access_key**: AWS secret access key (string, required)
44+
45+
- "env": uses AWS_ACCESS_KEY_ID (or AWS_ACCESS_KEY) and AWS_SECRET_KEY (or AWS_SECRET_ACCESS_KEY) environment variables.
46+
47+
- "instance": uses EC2 instance profile.
48+
49+
- "profile": uses credentials written in a file. Format of the file is as following, where `[...]` is a name of profile.
50+
51+
- **profile_file**: path to a profiles file. (string, default: given by AWS_CREDENTIAL_PROFILES_FILE environment varialbe, or ~/.aws/credentials).
52+
53+
- **profile_name**: name of a profile. (string, default: `"default"`)
54+
55+
```
56+
[default]
57+
aws_access_key_id=YOUR_ACCESS_KEY_ID
58+
aws_secret_access_key=YOUR_SECRET_ACCESS_KEY
59+
60+
[profile2]
61+
...
62+
```
63+
64+
- "properties": uses aws.accessKeyId and aws.secretKey Java system properties.
65+
66+
- "anonymous": uses anonymous access. This auth method can access only public files.
67+
68+
- "session": uses temporary-generated access_key_id, secret_access_key and session_token.
69+
70+
- **access_key_id**: AWS access key ID (string, required)
71+
72+
- **secret_access_key**: AWS secret access key (string, required)
73+
74+
- **session_token**: session token (string, required)
75+
76+
- "default": uses AWS SDK's default strategy to look up available credentials from runtime environment. This method behaves like the combination of the following methods.
77+
78+
1. "env"
79+
1. "properties"
80+
1. "profile"
81+
1. "instance"
82+
83+
3084
3185
### CannedAccessControlList
3286
you can choose one of the below list.

build.gradle

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,29 @@ dependencies {
4646
exclude group: "commons-logging", module: "commons-logging"
4747
}
4848

49+
compile("com.amazonaws:aws-java-sdk-sts:1.11.466") {
50+
// They conflict with embulk-core. They are once excluded here,
51+
// and added explicitly with versions exactly the same with embulk-core:0.10.29.
52+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
53+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
54+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
55+
exclude group: "joda-time", module: "joda-time"
56+
57+
// commons-logging api is provided by jcl-over-slf4j below.
58+
exclude group: "commons-logging", module: "commons-logging"
59+
}
60+
61+
compile("org.embulk:embulk-util-aws-credentials:0.4.1") {
62+
// They conflict with embulk-core. They are once excluded here,
63+
// and added explicitly with versions exactly the same with embulk-core:0.10.29.
64+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
65+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
66+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
67+
exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8"
68+
exclude group: "javax.validation", module: "validation-api"
69+
exclude group: "joda-time", module: "joda-time"
70+
}
71+
4972
compile("org.embulk:embulk-util-config:0.3.1") {
5073
// Jackson libraries conflict with embulk-core before v0.10.32.
5174
// They are once excluded here, and re-added explicitly below.

gradle/dependency-locks/embulkPluginRuntime.lockfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
com.amazonaws:aws-java-sdk-core:1.11.1034
55
com.amazonaws:aws-java-sdk-kms:1.11.1034
66
com.amazonaws:aws-java-sdk-s3:1.11.1034
7+
com.amazonaws:aws-java-sdk-sts:1.11.466
78
com.amazonaws:jmespath-java:1.11.1034
89
com.fasterxml.jackson.core:jackson-annotations:2.6.7
910
com.fasterxml.jackson.core:jackson-core:2.6.7
@@ -18,6 +19,7 @@ javax.xml.bind:jaxb-api:2.2.11
1819
joda-time:joda-time:2.9.2
1920
org.apache.httpcomponents:httpclient:4.5.13
2021
org.apache.httpcomponents:httpcore:4.4.13
22+
org.embulk:embulk-util-aws-credentials:0.4.1
2123
org.embulk:embulk-util-config:0.3.1
2224
org.embulk:embulk-util-file:0.1.3
2325
org.slf4j:jcl-over-slf4j:1.7.12
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Copyright 2022 The Embulk project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.embulk.output.s3;
18+
19+
20+
import org.embulk.util.config.Config;
21+
import org.embulk.util.config.ConfigDefault;
22+
import org.embulk.util.config.Task;
23+
24+
import java.util.Optional;
25+
26+
/**
27+
* HttpProxy is config unit for Input/Output plugins' configs.
28+
*
29+
* TODO: This unit will be moved to embulk/embulk-plugin-units.git.
30+
* TODO: Consider using @JsonProperty(defaultValue=...) in Jackson 2.6+.
31+
*/
32+
public interface HttpProxy
33+
extends Task
34+
{
35+
@Config("host")
36+
String getHost();
37+
void setHost(String host);
38+
39+
@Config("port")
40+
@ConfigDefault("null")
41+
Optional<Integer> getPort();
42+
void setPort(Optional<Integer> host);
43+
44+
@Config("https")
45+
@ConfigDefault("true")
46+
boolean getHttps();
47+
48+
@Config("user")
49+
@ConfigDefault("null")
50+
Optional<String> getUser();
51+
52+
@Config("password")
53+
@ConfigDefault("null")
54+
Optional<String> getPassword();
55+
}

src/main/java/org/embulk/output/s3/S3FileOutputPlugin.java

Lines changed: 114 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@
2525
import java.util.List;
2626
import java.util.Locale;
2727

28+
import com.amazonaws.Protocol;
29+
import com.amazonaws.auth.AWSCredentialsProvider;
30+
import com.amazonaws.client.builder.AwsClientBuilder;
31+
import com.amazonaws.retry.PredefinedRetryPolicies;
32+
import com.amazonaws.services.s3.AmazonS3;
33+
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
2834
import org.embulk.util.config.Config;
2935
import org.embulk.util.config.ConfigDefault;
3036
import org.embulk.config.ConfigDiff;
@@ -44,10 +50,10 @@
4450
import org.slf4j.Logger;
4551

4652
import com.amazonaws.ClientConfiguration;
47-
import com.amazonaws.auth.BasicAWSCredentials;
48-
import com.amazonaws.services.s3.AmazonS3Client;
4953
import com.amazonaws.services.s3.model.CannedAccessControlList;
5054
import com.amazonaws.services.s3.model.PutObjectRequest;
55+
import org.embulk.util.aws.credentials.AwsCredentials;
56+
import org.embulk.util.aws.credentials.AwsCredentialsTask;
5157
import org.slf4j.LoggerFactory;
5258

5359
import java.util.Optional;
@@ -63,7 +69,7 @@ public class S3FileOutputPlugin
6369
private static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper();
6470

6571
public interface PluginTask
66-
extends Task
72+
extends AwsCredentialsTask, Task
6773
{
6874
@Config("path_prefix")
6975
String getPathPrefix();
@@ -82,13 +88,10 @@ public interface PluginTask
8288
@ConfigDefault("null")
8389
Optional<String> getEndpoint();
8490

85-
@Config("access_key_id")
91+
@Config("http_proxy")
8692
@ConfigDefault("null")
87-
Optional<String> getAccessKeyId();
88-
89-
@Config("secret_access_key")
90-
@ConfigDefault("null")
91-
Optional<String> getSecretAccessKey();
93+
Optional<HttpProxy> getHttpProxy();
94+
void setHttpProxy(Optional<HttpProxy> httpProxy);
9295

9396
@Config("proxy_host")
9497
@ConfigDefault("null")
@@ -109,6 +112,10 @@ public interface PluginTask
109112
@Config("canned_acl")
110113
@ConfigDefault("null")
111114
Optional<CannedAccessControlList> getCannedAccessControlList();
115+
116+
@Config("region")
117+
@ConfigDefault("null")
118+
Optional<String> getRegion();
112119
}
113120

114121
public static class S3FileOutput
@@ -124,42 +131,122 @@ public static class S3FileOutput
124131

125132
private int taskIndex;
126133
private int fileIndex;
127-
private AmazonS3Client client;
134+
private AmazonS3 client;
128135
private OutputStream current;
129136
private Path tempFilePath;
130137
private String tempPath = null;
131138

132-
private static AmazonS3Client newS3Client(PluginTask task)
139+
private AmazonS3 newS3Client(final PluginTask task)
140+
{
141+
Optional<String> endpoint = task.getEndpoint();
142+
Optional<String> region = task.getRegion();
143+
144+
final AmazonS3ClientBuilder builder = AmazonS3ClientBuilder
145+
.standard()
146+
.withCredentials(getCredentialsProvider(task))
147+
.withClientConfiguration(getClientConfiguration(task));
148+
149+
// Favor the `endpoint` configuration, then `region`, if both are absent then `s3.amazonaws.com` will be used.
150+
if (endpoint.isPresent()) {
151+
if (region.isPresent()) {
152+
logger.warn("Either configure endpoint or region, " +
153+
"if both is specified only the endpoint will be in effect.");
154+
}
155+
builder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint.get(), null));
156+
}
157+
else if (region.isPresent()) {
158+
builder.setRegion(region.get());
159+
}
160+
else {
161+
// This is to keep the AWS SDK upgrading to 1.11.x to be backward compatible with old configuration.
162+
//
163+
// On SDK 1.10.x, when neither endpoint nor region is set explicitly, the client's endpoint will be by
164+
// default `s3.amazonaws.com`. And for pre-Signature-V4, this will work fine as the bucket's region
165+
// will be resolved to the appropriate region on server (AWS) side.
166+
//
167+
// On SDK 1.11.x, a region will be computed on client side by AwsRegionProvider and the endpoint now will
168+
// be region-specific `<region>.s3.amazonaws.com` and might be the wrong one.
169+
//
170+
// So a default endpoint of `s3.amazonaws.com` when both endpoint and region configs are absent are
171+
// necessary to make old configurations won't suddenly break. The side effect is that this will render
172+
// AwsRegionProvider useless. And it's worth to note that Signature-V4 won't work with either versions with
173+
// no explicit region or endpoint as the region (inferrable from endpoint) are necessary for signing.
174+
builder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration("s3.amazonaws.com", null));
175+
}
176+
177+
builder.withForceGlobalBucketAccessEnabled(true);
178+
return builder.build();
179+
}
180+
181+
private AWSCredentialsProvider getCredentialsProvider(PluginTask task)
133182
{
134-
AmazonS3Client client;
183+
return AwsCredentials.getAWSCredentialsProvider(task);
184+
}
185+
186+
private ClientConfiguration getClientConfiguration(PluginTask task)
187+
{
188+
ClientConfiguration clientConfig = new ClientConfiguration();
135189

136-
// TODO: Support more configurations.
137-
ClientConfiguration config = new ClientConfiguration();
190+
clientConfig.setMaxConnections(50); // SDK default: 50
191+
clientConfig.setSocketTimeout(8 * 60 * 1000); // SDK default: 50*1000
192+
clientConfig.setRetryPolicy(PredefinedRetryPolicies.NO_RETRY_POLICY);
138193

194+
// set http proxy
195+
// backward compatibility
139196
if (task.getProxyHost().isPresent()) {
140-
config.setProxyHost(task.getProxyHost().get());
197+
logger.warn("Configuration with \"proxy_host\" is deprecated. Use \"http_proxy.host\" instead.");
198+
if (!task.getHttpProxy().isPresent()) {
199+
ConfigMapper configMapper = CONFIG_MAPPER_FACTORY.createConfigMapper();
200+
ConfigSource configSource = CONFIG_MAPPER_FACTORY.newConfigSource();
201+
configSource.set("host", task.getProxyHost().get());
202+
HttpProxy httpProxy = configMapper.map(configSource, HttpProxy.class);
203+
task.setHttpProxy(Optional.of(httpProxy));
204+
} else {
205+
HttpProxy httpProxy = task.getHttpProxy().get();
206+
if (httpProxy.getHost().isEmpty()) {
207+
httpProxy.setHost(task.getProxyHost().get());
208+
task.setHttpProxy(Optional.of(httpProxy));
209+
}
210+
}
141211
}
142212

143213
if (task.getProxyPort().isPresent()) {
144-
config.setProxyPort(task.getProxyPort().get());
214+
logger.warn("Configuration with \"proxy_port\" is deprecated. Use \"http_proxy.port\" instead.");
215+
HttpProxy httpProxy = task.getHttpProxy().get();
216+
if (!httpProxy.getPort().isPresent()) {
217+
httpProxy.setPort(task.getProxyPort());
218+
task.setHttpProxy(Optional.of(httpProxy));
219+
}
145220
}
146221

147-
if (task.getAccessKeyId().isPresent()) {
148-
BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(
149-
task.getAccessKeyId().get(), task.getSecretAccessKey().get());
150-
151-
client = new AmazonS3Client(basicAWSCredentials, config);
222+
if (task.getHttpProxy().isPresent()) {
223+
setHttpProxyInAwsClient(clientConfig, task.getHttpProxy().get());
152224
}
153-
else {
154-
// Use default credential provider chain.
155-
client = new AmazonS3Client(config);
225+
226+
return clientConfig;
227+
}
228+
229+
private void setHttpProxyInAwsClient(ClientConfiguration clientConfig, HttpProxy httpProxy) {
230+
// host
231+
clientConfig.setProxyHost(httpProxy.getHost());
232+
233+
// port
234+
if (httpProxy.getPort().isPresent()) {
235+
clientConfig.setProxyPort(httpProxy.getPort().get());
156236
}
157237

158-
if (task.getEndpoint().isPresent()) {
159-
client.setEndpoint(task.getEndpoint().get());
238+
// https
239+
clientConfig.setProtocol(httpProxy.getHttps() ? Protocol.HTTPS : Protocol.HTTP);
240+
241+
// user
242+
if (httpProxy.getUser().isPresent()) {
243+
clientConfig.setProxyUsername(httpProxy.getUser().get());
160244
}
161245

162-
return client;
246+
// password
247+
if (httpProxy.getPassword().isPresent()) {
248+
clientConfig.setProxyPassword(httpProxy.getPassword().get());
249+
}
163250
}
164251

165252
public S3FileOutput(PluginTask task, int taskIndex)

0 commit comments

Comments
 (0)