Skip to content

Commit

Permalink
Configurable on C++ side
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom-Newton committed Feb 11, 2024
1 parent e7a5df8 commit 4e8e2ef
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
18 changes: 12 additions & 6 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,14 @@ class ObjectInputFile final : public io::RandomAccessFile {
public:
ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
const io::IOContext& io_context, AzureLocation location,
int64_t size = kNoSize)
const AzureOptions& options, int64_t size = kNoSize)
: blob_client_(std::move(blob_client)),
io_context_(io_context),
location_(std::move(location)),
content_length_(size) {}
content_length_(size),
initial_chunk_size_(options.initial_chunk_size),
chunk_size_(options.chunk_size),
concurrency_(options.concurrency) {}

Status Init() {
if (content_length_ != kNoSize) {
Expand Down Expand Up @@ -596,8 +599,8 @@ class ObjectInputFile final : public io::RandomAccessFile {

// Read the desired range of bytes
Http::HttpRange range{position, nbytes};
Storage::Blobs::DownloadBlobToOptions download_options;
download_options.Range = range;
Storage::Blobs::DownloadBlobToOptions download_options{
range, {initial_chunk_size_, chunk_size_, concurrency_}};
try {
return blob_client_
->DownloadTo(reinterpret_cast<uint8_t*>(out), nbytes, download_options)
Expand Down Expand Up @@ -649,6 +652,9 @@ class ObjectInputFile final : public io::RandomAccessFile {
int64_t pos_ = 0;
int64_t content_length_ = kNoSize;
std::shared_ptr<const KeyValueMetadata> metadata_;
int64_t initial_chunk_size_;
int64_t chunk_size_;
int32_t concurrency_;
};

Status CreateEmptyBlockBlob(const Blobs::BlockBlobClient& block_blob_client) {
Expand Down Expand Up @@ -1556,7 +1562,7 @@ class AzureFileSystem::Impl {
GetBlobClient(location.container, location.path));

auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
std::move(location));
std::move(location), options_);
RETURN_NOT_OK(ptr->Init());
return ptr;
}
Expand All @@ -1575,7 +1581,7 @@ class AzureFileSystem::Impl {
GetBlobClient(location.container, location.path));

auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
std::move(location), info.size());
std::move(location), options_, info.size());
RETURN_NOT_OK(ptr->Init());
return ptr;
}
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ struct ARROW_EXPORT AzureOptions {
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
std::shared_ptr<const KeyValueMetadata> default_metadata;

/// \brief Options for parallel transfer of each read call on ObjectInputFile.
///
/// Defaults are taken from the Azure SDK. See
/// Azure::Storage::Blobs::DownloadBlobToOptions::TransferOptions
int64_t initial_chunk_size = 256 * 1024 * 1024;
int64_t chunk_size = 4 * 1024 * 1024;
int32_t concurrency = 5;

private:
enum class CredentialKind {
kDefault,
Expand Down

0 comments on commit 4e8e2ef

Please sign in to comment.