Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize S3 downloading for TFRecord reader #5554

Merged
merged 2 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 77 additions & 44 deletions dali/operators/reader/loader/indexed_file_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,56 @@
#ifndef DALI_OPERATORS_READER_LOADER_INDEXED_FILE_LOADER_H_
#define DALI_OPERATORS_READER_LOADER_INDEXED_FILE_LOADER_H_

#include <vector>
#include <string>
#include <tuple>
#include <fstream>
#include <memory>
#include <queue>
#include <mutex>
#include <queue>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#include "dali/core/call_at_exit.h"
#include "dali/core/common.h"
#include "dali/core/mm/memory.h"
#include "dali/operators/reader/loader/loader.h"
#include "dali/util/uri.h"
#include "dali/pipeline/util/thread_pool.h"
#include "dali/util/file.h"
#include "dali/util/odirect_file.h"
#include "dali/core/call_at_exit.h"
#include "dali/util/uri.h"

namespace dali {

class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
struct IndexedFileLoaderSample {
Tensor<CPUBackend> tensor;
std::function<void(void)> work;
};

class IndexedFileLoader : public Loader<CPUBackend, IndexedFileLoaderSample, true> {
public:
explicit IndexedFileLoader(const OpSpec& spec)
: Loader(spec),
paths_(spec.GetRepeatedArgument<std::string>("path")),
index_paths_(spec.GetRepeatedArgument<std::string>("index_path")),
current_index_(0), current_file_index_(0), current_file_(nullptr),
use_o_direct_(spec.HasArgument("use_o_direct") && spec.GetArgument<bool>("use_o_direct")) {
DALI_ENFORCE(dont_use_mmap_ || !use_o_direct_, make_string("Cannot use use_o_direct with ",
"``dont_use_mmap=False``."));
if (use_o_direct_) {
o_direct_chunk_size_ = ODirectFileStream::GetChunkSize();
o_direct_alignm_ = ODirectFileStream::GetAlignment();
o_direct_read_len_alignm_ = ODirectFileStream::GetLenAlignment();
}
: Loader(spec),
paths_(spec.GetRepeatedArgument<std::string>("path")),
index_paths_(spec.GetRepeatedArgument<std::string>("index_path")),
current_index_(0),
current_file_index_(0),
current_file_(nullptr),
use_o_direct_(spec.HasArgument("use_o_direct") && spec.GetArgument<bool>("use_o_direct")) {
DALI_ENFORCE(dont_use_mmap_ || !use_o_direct_,
make_string("Cannot use use_o_direct with ", "``dont_use_mmap=False``."));
if (use_o_direct_) {
o_direct_chunk_size_ = ODirectFileStream::GetChunkSize();
o_direct_alignm_ = ODirectFileStream::GetAlignment();
o_direct_read_len_alignm_ = ODirectFileStream::GetLenAlignment();
}
}

void ReadSample(Tensor<CPUBackend>& tensor) override {
void PrepareEmpty(IndexedFileLoaderSample &sample) override {
PrepareEmptyTensor(sample.tensor);
sample.work = {};
}

void ReadSample(IndexedFileLoaderSample& sample) override {
MoveToNextShard(current_index_);

int64 seek_pos, size;
Expand All @@ -70,21 +83,26 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
opts.use_mmap = !copy_read_data_;
opts.use_odirect = use_o_direct_;

auto uri = URI::Parse(path, URI::ParseOpts::AllowNonEscaped);
bool local_file = !uri.valid() || uri.scheme() == "file";

if (file_index != current_file_index_) {
current_file_.reset();
current_file_ = FileStream::Open(path, opts);
current_file_sz_ = current_file_->Size();
current_file_index_ = file_index;
// invalidate the buffer
if (use_o_direct_) read_buffer_.reset();
if (use_o_direct_)
read_buffer_.reset();
}

// if image is cached, skip loading
if (ShouldSkipImage(image_key)) {
meta.SetSkipSample(true);
should_seek_ = true;
tensor.Reset();
tensor.SetMeta(meta);
tensor.Resize({0}, DALI_UINT8);
sample.tensor.Reset();
sample.tensor.SetMeta(meta);
sample.tensor.Resize({0}, DALI_UINT8);
return;
}

Expand All @@ -98,10 +116,10 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
auto p = current_file_->Get(size);
DALI_ENFORCE(p != nullptr, "Error reading from a file " + paths_[current_file_index_]);
// Wrap the raw data in the Tensor object.
tensor.ShareData(p, size, false, {size}, DALI_UINT8, CPU_ONLY_DEVICE_ID);
sample.tensor.ShareData(p, size, false, {size}, DALI_UINT8, CPU_ONLY_DEVICE_ID);
} else {
if (tensor.shares_data()) {
tensor.Reset();
if (sample.tensor.shares_data()) {
sample.tensor.Reset();
}
if (opts.use_odirect) {
/*
Expand All @@ -118,8 +136,8 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
*/
// read again if there is no buffer of the requested piece if outside of the it
bool after_buffer_start = seek_pos >= static_cast<int64>(read_buffer_pos_);
bool before_buffer_end = seek_pos + size <
static_cast<int64>(read_buffer_pos_ + read_buffer_data_size_);
bool before_buffer_end =
seek_pos + size < static_cast<int64>(read_buffer_pos_ + read_buffer_data_size_);
// buffer need to exists and the ata we look for needs to be inside it
if (!read_buffer_ || !(after_buffer_start && before_buffer_end)) {
// check how much we need to allocate to house the required sample, but no less than
Expand Down Expand Up @@ -150,13 +168,13 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
auto read_start = block_start + read_off;
// we should read either the chunk size or the reminder of the file
auto min_read = std::min(o_direct_chunk_size_tmp, seek_pos + size - read_start);
auto work = [tmp_file_ptr, file, dst_ptr, o_direct_chunk_size_tmp, min_read,
read_start, file_name]() {
auto work = [tmp_file_ptr, file, dst_ptr, o_direct_chunk_size_tmp, min_read, read_start,
file_name]() {
auto ret = file->ReadAt(dst_ptr, o_direct_chunk_size_tmp, read_start);
DALI_ENFORCE(ret >= min_read && ret <= o_direct_chunk_size_tmp,
make_string("Failed to read file: ", file_name,
", read: ", ret, " while it should be in range [", min_read,
", ", o_direct_chunk_size_tmp, "]"));
make_string("Failed to read file: ", file_name, ", read: ", ret,
" while it should be in range [", min_read, ", ",
o_direct_chunk_size_tmp, "]"));
};
// store the work lambda into queue so the prefetch thread can pick them up latter and
// execute in multiple threads
Expand All @@ -165,17 +183,30 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
}
shared_ptr<void> tmp_mem(read_buffer_, read_buffer_.get() + (seek_pos - read_buffer_pos_));
// make sure it is a big value in signed range
tensor.ShareData(tmp_mem, size, false, {size}, DALI_UINT8, -1);
sample.tensor.ShareData(tmp_mem, size, false, {size}, DALI_UINT8, -1);
} else if (!local_file) {
sample.tensor.Resize({size}, DALI_UINT8);
auto* out_data_ptr = static_cast<uint8_t*>(sample.tensor.raw_mutable_data());
auto file_sz = current_file_sz_;
auto work = [path, out_data_ptr, seek_pos, size, opts, file_sz]() {
auto file = FileStream::Open(path, opts, file_sz);
auto file_cleanup = AtScopeExit([&file] {
if (file)
file->Close();
});
file->SeekRead(seek_pos, SEEK_SET);
int64 n_read = file->Read(out_data_ptr, size);
DALI_ENFORCE(n_read == size, "Error reading from a file: " + path);
};
sample.work = std::move(work);
} else {
tensor.Resize({size}, DALI_UINT8);

sample.tensor.Resize({size}, DALI_UINT8);
int64 n_read =
current_file_->Read(static_cast<uint8_t*>(tensor.raw_mutable_data()), size);
current_file_->Read(static_cast<uint8_t*>(sample.tensor.raw_mutable_data()), size);
DALI_ENFORCE(n_read == size, "Error reading from a file " + paths_[current_file_index_]);
}
}

tensor.SetMeta(meta);
sample.tensor.SetMeta(meta);
return;
}

Expand All @@ -189,7 +220,7 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {

virtual void ReadIndexFile(const std::vector<std::string>& index_uris) {
DALI_ENFORCE(index_uris.size() == paths_.size(),
"Number of index files needs to match the number of data files");
"Number of index files needs to match the number of data files");
for (size_t i = 0; i < index_uris.size(); ++i) {
const auto& path = index_uris[i];
auto index_file = FileStream::Open(path);
Expand All @@ -215,8 +246,7 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {

void PrepareMetadataImpl() override {
if (!dont_use_mmap_) {
mmap_reserver_ = FileStream::MappingReserver(
static_cast<unsigned int>(initial_buffer_fill_));
mmap_reserver_ = FileStream::MappingReserver(static_cast<unsigned int>(initial_buffer_fill_));
}
copy_read_data_ = dont_use_mmap_ || !mmap_reserver_.CanShareMappedData();

Expand Down Expand Up @@ -244,9 +274,11 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
opts.use_mmap = !copy_read_data_;
opts.use_odirect = use_o_direct_;
current_file_ = FileStream::Open(path, opts);
current_file_sz_ = current_file_->Size();
current_file_index_ = file_index;
// invalidate the buffer
if (use_o_direct_) read_buffer_.reset();
if (use_o_direct_)
read_buffer_.reset();
}
current_file_->SeekRead(seek_pos);
}
Expand All @@ -269,6 +301,7 @@ class IndexedFileLoader : public Loader<CPUBackend, Tensor<CPUBackend>, true> {
size_t read_buffer_pos_ = 0;
size_t read_buffer_size_ = 0;
size_t read_buffer_data_size_ = 0;
size_t current_file_sz_ = 0;

typedef std::function<void(void)> ReadWork;
std::queue<ReadWork> jobs_;
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/reader/loader/loader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ TYPED_TEST(DataLoadStoreTest, RecordIOLoaderMmmap) {

reader->PrepareMetadata();
auto sample = reader->ReadOne(false, false);
EXPECT_EQ(sample->shares_data(), !dont_use_mmap);
EXPECT_EQ(sample->tensor.shares_data(), !dont_use_mmap);
}
}

Expand All @@ -115,7 +115,7 @@ TYPED_TEST(DataLoadStoreTest, TFRecordLoaderMmmap) {

reader->PrepareMetadata();
auto sample = reader->ReadOne(false, false);
EXPECT_EQ(sample->shares_data(), !dont_use_mmap);
EXPECT_EQ(sample->tensor.shares_data(), !dont_use_mmap);
}
}

Expand Down
22 changes: 11 additions & 11 deletions dali/operators/reader/loader/recordio_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class RecordIOLoader : public IndexedFileLoader {
index_file.close();
}

void ReadSample(Tensor<CPUBackend>& tensor) override {
void ReadSample(IndexedFileLoaderSample& sample) override {
// if we moved to next shard wrap up
MoveToNextShard(current_index_);

Expand All @@ -102,9 +102,9 @@ class RecordIOLoader : public IndexedFileLoader {
if (ShouldSkipImage(image_key)) {
meta.SetSkipSample(true);
should_seek_ = true;
tensor.Reset();
tensor.SetMeta(meta);
tensor.Resize({0}, DALI_UINT8);
sample.tensor.Reset();
sample.tensor.SetMeta(meta);
sample.tensor.Resize({0}, DALI_UINT8);
return;
}

Expand All @@ -117,27 +117,27 @@ class RecordIOLoader : public IndexedFileLoader {
int64 n_read = 0;
bool use_read = copy_read_data_ || !current_file_->CanMemoryMap();
if (use_read) {
tensor.Resize({size});
sample.tensor.Resize({size});
}
while (p == nullptr && n_read < size) {
if (!use_read) {
p = current_file_->Get(size);
// file is divided between two files, we need to fallback to read here
if (p == nullptr) {
if (tensor.shares_data()) {
tensor.Reset();
if (sample.tensor.shares_data()) {
sample.tensor.Reset();
}
tensor.Resize({size}, DALI_UINT8);
sample.tensor.Resize({size}, DALI_UINT8);
use_read = true;
} else {
n_read = size;
// Wrap the raw data in the Tensor object.
tensor.ShareData(p, size, false, {size}, DALI_UINT8, CPU_ONLY_DEVICE_ID);
sample.tensor.ShareData(p, size, false, {size}, DALI_UINT8, CPU_ONLY_DEVICE_ID);
next_seek_pos_ = seek_pos + size;
}
}
if (use_read) {
n_read += current_file_->Read(tensor.mutable_data<uint8_t>() + n_read,
n_read += current_file_->Read(sample.tensor.mutable_data<uint8_t>() + n_read,
size - n_read);
next_seek_pos_ = seek_pos + n_read;
}
Expand All @@ -155,7 +155,7 @@ class RecordIOLoader : public IndexedFileLoader {
continue;
}
}
tensor.SetMeta(meta);
sample.tensor.SetMeta(meta);
}
};

Expand Down
13 changes: 7 additions & 6 deletions dali/operators/reader/mxnet_reader_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,23 @@
#include "dali/operators/reader/parser/recordio_parser.h"

namespace dali {
class MXNetReader : public DataReader<CPUBackend, Tensor<CPUBackend>, Tensor<CPUBackend>, true> {
class MXNetReader
: public DataReader<CPUBackend, IndexedFileLoaderSample, Tensor<CPUBackend>, true> {
public:
explicit MXNetReader(const OpSpec& spec)
: DataReader<CPUBackend, Tensor<CPUBackend>, Tensor<CPUBackend>, true>(spec) {
: DataReader<CPUBackend, IndexedFileLoaderSample, Tensor<CPUBackend>, true>(spec) {
loader_ = InitLoader<RecordIOLoader>(spec);
parser_.reset(new RecordIOParser(spec));
this->SetInitialSnapshot();
}

void RunImpl(SampleWorkspace &ws) override {
const auto& tensor = GetSample(ws.data_idx());
ParseIfNeeded(tensor, &ws);
void RunImpl(SampleWorkspace& ws) override {
const auto& sample = GetSample(ws.data_idx());
ParseIfNeeded(sample.tensor, &ws);
}

protected:
USE_READER_OPERATOR_MEMBERS(CPUBackend, Tensor<CPUBackend>, Tensor<CPUBackend>, true);
USE_READER_OPERATOR_MEMBERS(CPUBackend, IndexedFileLoaderSample, Tensor<CPUBackend>, true);
};
} // namespace dali

Expand Down
8 changes: 4 additions & 4 deletions dali/operators/reader/parser/recordio_parser.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,11 +34,11 @@ class RecordIOParser : public Parser<Tensor<CPUBackend>> {
Parser<Tensor<CPUBackend>>(spec) {
}

void Parse(const Tensor<CPUBackend>& data, SampleWorkspace* ws) override {
void Parse(const Tensor<CPUBackend>& tensor, SampleWorkspace* ws) override {
auto& image = ws->Output<CPUBackend>(0);
auto& label = ws->Output<CPUBackend>(1);
ReadSingleImageRecordIO(image, label, data.data<uint8_t>());
image.SetSourceInfo(data.GetSourceInfo());
ReadSingleImageRecordIO(image, label, tensor.data<uint8_t>());
image.SetSourceInfo(tensor.GetSourceInfo());
}

private:
Expand Down
Loading
Loading