-
Notifications
You must be signed in to change notification settings - Fork 453
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
s3 sources: Add the ability to read a single object from S3
Individual objects are downloaded, and then split on newlines with individual records being sent through to the dataflow layer. There are some open questions about how MzOffsets and other things should map into an S3 world -- "partition" can be mapped to S3 objects, but we can grow to arbitrarily large numbers of S3 objects, so a naive implementation of that won't necessarily make sense. This commit still requires testing infrastructure, but ingesting an S3 object works correctly. Part of #4914
- Loading branch information
1 parent
7ba8157
commit f69e626
Showing
17 changed files
with
529 additions
and
6 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ | |
|
||
pub mod aws; | ||
pub mod kinesis; | ||
pub mod s3; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright Materialize, Inc. All rights reserved. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the LICENSE file. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0. | ||
|
||
//! Utility functions for AWS S3 clients | ||
|
||
use std::time::Duration; | ||
|
||
use anyhow::Context; | ||
use log::info; | ||
use rusoto_core::{HttpClient, Region}; | ||
use rusoto_credential::{ | ||
AutoRefreshingProvider, ChainProvider, ProvideAwsCredentials, StaticProvider, | ||
}; | ||
use rusoto_s3::S3Client; | ||
|
||
/// Construct an S3Client | ||
/// | ||
/// If statically provided connection information information is not provided, | ||
/// falls back to using credentials gathered by rusoto's [`ChainProvider] | ||
/// wrapped in an [`AutoRefreshingProvider`]. | ||
/// | ||
/// The [`AutoRefreshingProvider`] caches the underlying provider's AWS credentials, | ||
/// automatically fetching updated credentials if they've expired. | ||
pub async fn client( | ||
region: Region, | ||
access_key_id: Option<String>, | ||
secret_access_key: Option<String>, | ||
token: Option<String>, | ||
) -> Result<S3Client, anyhow::Error> { | ||
let request_dispatcher = HttpClient::new().context("creating HTTP client for S3 client")?; | ||
let s3_client = match (access_key_id, secret_access_key) { | ||
(Some(access_key_id), Some(secret_access_key)) => { | ||
info!("Creating a new S3 client from provided access_key and secret_access_key"); | ||
S3Client::new_with( | ||
request_dispatcher, | ||
StaticProvider::new(access_key_id, secret_access_key, token, None), | ||
region, | ||
) | ||
} | ||
(None, None) => { | ||
info!( | ||
"AWS access_key_id and secret_access_key not provided, \ | ||
creating a new S3 client using a chain provider." | ||
); | ||
let mut provider = ChainProvider::new(); | ||
provider.set_timeout(Duration::from_secs(10)); | ||
provider.credentials().await?; // ensure that credentials exist | ||
let provider = | ||
AutoRefreshingProvider::new(provider).context("generating AWS credentials")?; | ||
|
||
S3Client::new_with(request_dispatcher, provider, region) | ||
} | ||
(_, _) => anyhow::bail!( | ||
"access_key_id and secret_access_key must either both be provided, or neither" | ||
), | ||
}; | ||
Ok(s3_client) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.