-
Notifications
You must be signed in to change notification settings - Fork 6.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
S3Express support #59965
S3Express support #59965
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,12 +21,44 @@ | |
#include <aws/s3/model/UploadPartCopyRequest.h> | ||
#include <aws/s3/model/DeleteObjectRequest.h> | ||
#include <aws/s3/model/DeleteObjectsRequest.h> | ||
#include <aws/s3/model/ChecksumAlgorithm.h> | ||
#include <aws/s3/model/CompletedPart.h> | ||
#include <aws/core/utils/HashingUtils.h> | ||
|
||
#include <base/defines.h> | ||
|
||
namespace DB::S3 | ||
{ | ||
|
||
namespace Model = Aws::S3::Model; | ||
|
||
/// Used only for S3Express | ||
namespace RequestChecksum | ||
{ | ||
inline void setPartChecksum(Model::CompletedPart & part, const std::string & checksum) | ||
{ | ||
part.SetChecksumCRC32(checksum); | ||
} | ||
|
||
inline void setRequestChecksum(Model::UploadPartRequest & req, const std::string & checksum) | ||
{ | ||
req.SetChecksumCRC32(checksum); | ||
} | ||
|
||
inline std::string calculateChecksum(Model::UploadPartRequest & req) | ||
{ | ||
chassert(req.GetChecksumAlgorithm() == Aws::S3::Model::ChecksumAlgorithm::CRC32); | ||
return Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateCRC32(*(req.GetBody()))); | ||
} | ||
|
||
template <typename R> | ||
inline void setChecksumAlgorithm(R & request) | ||
{ | ||
if constexpr (requires { request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32); }) | ||
request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This template function looks like a bit of overcomplication. Which actual types of requests need this function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to my understanding it is needed for all requests with body. e.g. also |
||
}; | ||
|
||
template <typename BaseRequest> | ||
class ExtendedRequest : public BaseRequest | ||
{ | ||
|
@@ -49,11 +81,13 @@ class ExtendedRequest : public BaseRequest | |
|
||
Aws::String GetChecksumAlgorithmName() const override | ||
{ | ||
chassert(!is_s3express_bucket || checksum); | ||
|
||
/// Return empty string is enough to disable checksums (see | ||
/// AWSClient::AddChecksumToRequest [1] for more details). | ||
/// | ||
/// [1]: https://github.com/aws/aws-sdk-cpp/blob/b0ee1c0d336dbb371c34358b68fba6c56aae2c92/src/aws-cpp-sdk-core/source/client/AWSClient.cpp#L783-L839 | ||
if (!checksum) | ||
if (!is_s3express_bucket && !checksum) | ||
return ""; | ||
return BaseRequest::GetChecksumAlgorithmName(); | ||
} | ||
|
@@ -84,16 +118,20 @@ class ExtendedRequest : public BaseRequest | |
} | ||
|
||
/// Disable checksum to avoid extra read of the input stream | ||
void disableChecksum() const | ||
void disableChecksum() const { checksum = false; } | ||
|
||
void setIsS3ExpressBucket() | ||
{ | ||
checksum = false; | ||
is_s3express_bucket = true; | ||
RequestChecksum::setChecksumAlgorithm(*this); | ||
} | ||
|
||
protected: | ||
mutable std::string region_override; | ||
mutable std::optional<S3::URI> uri_override; | ||
mutable ApiMode api_mode{ApiMode::AWS}; | ||
mutable bool checksum = true; | ||
bool is_s3express_bucket = false; | ||
}; | ||
|
||
class CopyObjectRequest : public ExtendedRequest<Model::CopyObjectRequest> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This namespace seems to be redundant and it seems it makes the code less clear.
These functions are very simple and it seems you can just do what you want directly without these functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, let me explain what problem I tried to solve.
in
Client::doRequest()
we doif (client_settings.is_s3express_bucket) request.setIsS3ExpressBucket();
that basically sets checksum algorithm. and then (much higher on the stack) in
WriteBufferFromS3
we will have to set the appropriate checksum. and because there are multiple checksum algorithms (and consequently multipleSetChecksum*
methods) it looked really random to just call somecrc
-related methods out of nowhere. so I thought that since we're gonna use only one algorithm inside CH codebase anyway - it would be a good idea to abstract it away inside some class or just a namespace.if you still think it doesn't worth it - i'm ok to get rid of it.