Skip to content

Commit

Permalink
Video reader label (#998)
Browse files Browse the repository at this point in the history
* Addeds Label support to VideoReader operator
* Removes video_example.py, adds video_label_example.py to test script
* Adds jupyter notebook VideoReader example for reading labelled videos

Signed-off-by: Abhishek Sansanwal <asansanwal@nvidia.com>
  • Loading branch information
a-sansanwal authored and JanuszL committed Jun 27, 2019
1 parent 0fd55c2 commit dcea67b
Show file tree
Hide file tree
Showing 9 changed files with 515 additions and 24 deletions.
86 changes: 83 additions & 3 deletions dali/pipeline/operators/reader/loader/video_loader.cc
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
#include "dali/pipeline/operators/reader/loader/video_loader.h"

#include <dirent.h>
#include <unistd.h>

#include <iomanip>
Expand Down Expand Up @@ -41,6 +42,79 @@ auto codecpar(AVStream* stream) -> decltype(stream->codec) {
}
#endif

inline void assemble_video_list(const std::string& path, const std::string& curr_entry, int label,
std::vector<std::pair<std::string, int>> &file_label_pairs) {
std::string curr_dir_path = path + "/" + curr_entry;
DIR *dir = opendir(curr_dir_path.c_str());
DALI_ENFORCE(dir != nullptr, "Directory " + curr_dir_path + " could not be opened");

struct dirent *entry;

while ((entry = readdir(dir))) {
std::string full_path = curr_dir_path + "/" + std::string{entry->d_name};
#ifdef _DIRENT_HAVE_D_TYPE
/*
* Regular files and symlinks supported. If FS returns DT_UNKNOWN,
* filename is validated.
*/
if (entry->d_type != DT_REG && entry->d_type != DT_LNK &&
entry->d_type != DT_UNKNOWN) {
continue;
}
#endif
file_label_pairs.push_back(std::make_pair(full_path, label));
}
closedir(dir);
}

vector<std::pair<string, int>> filesystem::get_file_label_pair(
const std::string& file_root,
const std::vector<std::string>& filenames) {
// open the root
std::vector<std::pair<std::string, int>> file_label_pairs;
std::vector<std::string> entry_name_list;

if (!file_root.empty()) {
DIR *dir = opendir(file_root.c_str());

DALI_ENFORCE(dir != nullptr,
"Directory " + file_root + " could not be opened.");

struct dirent *entry;

while ((entry = readdir(dir))) {
struct stat s;
std::string entry_name(entry->d_name);
std::string full_path = file_root + "/" + entry_name;
int ret = stat(full_path.c_str(), &s);
DALI_ENFORCE(ret == 0,
"Could not access " + full_path + " during directory traversal.");
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue;
if (S_ISDIR(s.st_mode)) {
entry_name_list.push_back(entry_name);
}
}
closedir(dir);
// sort directories to preserve class alphabetic order, as readdir could
// return unordered dir list. Otherwise file reader for training and validation
// could return directories with the same names in completely different order
std::sort(entry_name_list.begin(), entry_name_list.end());
for (unsigned dir_count = 0; dir_count < entry_name_list.size(); ++dir_count) {
assemble_video_list(file_root, entry_name_list[dir_count], dir_count, file_label_pairs);
}

// sort file names as well
std::sort(file_label_pairs.begin(), file_label_pairs.end());
} else {
for (unsigned file_count = 0; file_count < filenames.size(); ++file_count)
file_label_pairs.push_back(std::make_pair(filenames[file_count], 0));
}

LOG_LINE << "read " << file_label_pairs.size() << " files from "
<< entry_name_list.size() << " directories\n";

return file_label_pairs;
}

// Are these good numbers? Allow them to be set?
static constexpr auto frames_used_warning_ratio = 3.0f;
Expand Down Expand Up @@ -402,10 +476,14 @@ void VideoLoader::receive_frames(SequenceWrapper& sequence) {
sequence.wait();
}

std::pair<int, int> VideoLoader::load_width_height(const std::string& filename) {
std::pair<int, int> VideoLoader::load_width_height() {
av_register_all();

AVFormatContext* raw_fmt_ctx = nullptr;

DALI_ENFORCE(!file_label_pair_.empty(), "Could not read any files.");
std::string filename = file_label_pair_[0].first;

auto ret = avformat_open_input(&raw_fmt_ctx, filename.c_str(), NULL, NULL);
if (ret < 0) {
std::stringstream ss;
Expand Down Expand Up @@ -445,11 +523,13 @@ void VideoLoader::PrepareEmpty(SequenceWrapper &tensor) {

void VideoLoader::ReadSample(SequenceWrapper& tensor) {
// TODO(spanev) remove the async between the 2 following methods?
auto& fileidx_frame = frame_starts_[current_frame_idx_];
push_sequence_to_read(filenames_[fileidx_frame.first], fileidx_frame.second, count_);
auto& seq_meta = frame_starts_[current_frame_idx_];
push_sequence_to_read(file_label_pair_[seq_meta.filename_idx].first,
seq_meta.frame_idx, count_);
receive_frames(tensor);
++current_frame_idx_;

tensor.label = seq_meta.label;
MoveToNextShard(current_frame_idx_);
}

Expand Down
34 changes: 28 additions & 6 deletions dali/pipeline/operators/reader/loader/video_loader.h
Expand Up @@ -18,6 +18,9 @@
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <dirent.h>
#include <sys/stat.h>
#include <errno.h>
}

#include <algorithm>
Expand Down Expand Up @@ -50,6 +53,13 @@ auto codecpar(AVStream* stream) -> decltype(stream->codecpar);
auto codecpar(AVStream* stream) -> decltype(stream->codec);
#endif

namespace filesystem {

std::vector<std::pair<std::string, int>> get_file_label_pair(const std::string& path,
const std::vector<std::string>& filenames);

} // namespace filesystem

struct OpenFile {
bool open = false;
AVRational frame_base_;
Expand Down Expand Up @@ -97,12 +107,19 @@ struct VideoLoaderStats {
uint64_t frames_used;
};

struct sequence_meta {
size_t filename_idx;
int frame_idx;
int label;
};


class VideoLoader : public Loader<GPUBackend, SequenceWrapper> {
public:
explicit inline VideoLoader(const OpSpec& spec,
const std::vector<std::string>& filenames)
: Loader<GPUBackend, SequenceWrapper>(spec),
file_root_(spec.GetArgument<std::string>("file_root")),
count_(spec.GetArgument<int>("sequence_length")),
step_(spec.GetArgument<int>("step")),
stride_(spec.GetArgument<int>("stride")),
Expand All @@ -117,6 +134,9 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper> {
stop_(false) {
if (step_ < 0)
step_ = count_ * stride_;

file_label_pair_ = filesystem::get_file_label_pair(file_root_, filenames_);

DALI_ENFORCE(cuvidInitChecked(0),
"Failed to load libnvcuvid.so, needed by the VideoReader operator. "
"If you are running in a Docker container, please refer "
Expand Down Expand Up @@ -151,17 +171,18 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper> {
void read_file();
void push_sequence_to_read(std::string filename, int frame, int count);
void receive_frames(SequenceWrapper& sequence);
std::pair<int, int> load_width_height(const std::string& filename);
std::pair<int, int> load_width_height();

protected:
Index SizeImpl() override;

void PrepareMetadataImpl() override {
int total_count = 1 + (count_ - 1) * stride_;
for (size_t i = 0; i < filenames_.size(); ++i) {
int frame_count = get_or_open_file(filenames_[i]).frame_count_;

for (size_t i = 0; i < file_label_pair_.size(); ++i) {
int frame_count = get_or_open_file(file_label_pair_[i].first).frame_count_;
for (int s = 0; s < frame_count && s + total_count <= frame_count; s += step_) {
frame_starts_.emplace_back(i, s);
frame_starts_.emplace_back(sequence_meta{i, s, file_label_pair_[i].second});
}
}

Expand All @@ -187,6 +208,7 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper> {
}
}
// Params
std::string file_root_;
int count_;
int step_;
int stride_;
Expand All @@ -211,11 +233,11 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper> {

std::thread thread_file_reader_;

// pair -> (filename index, frame index)
std::vector<std::pair<int, int>> frame_starts_;
std::vector<struct sequence_meta> frame_starts_;
Index current_frame_idx_;

volatile bool stop_;
std::vector<std::pair<std::string, int>> file_label_pair_;
};

} // namespace dali
Expand Down
1 change: 1 addition & 0 deletions dali/pipeline/operators/reader/nvdecoder/sequencewrapper.h
Expand Up @@ -79,6 +79,7 @@ struct SequenceWrapper {
int height;
int width;
int channels;
int label;

private:
void wait_until_started_() const {
Expand Down
16 changes: 12 additions & 4 deletions dali/pipeline/operators/reader/video_reader_op.cc
Expand Up @@ -32,10 +32,18 @@ The video codecs can be contained in most of container file formats. FFmpeg is u
Returns a batch of sequences of `sequence_length` frames of shape [N, F, H, W, C] (N being the batch size and F the
number of frames).)code")
.NumInput(0)
.NumOutput(1)
.AddArg("filenames",
R"code(File names of the video files to load.)code",
DALI_STRING_VEC)
.OutputFn([](const OpSpec &spec) {
std::string file_root = spec.GetArgument<std::string>("file_root");
return file_root.empty() ? 1 : 2;
})
.AddOptionalArg("filenames",
R"code(File names of the video files to load.
This option is mutually exclusive with `file_root`.)code",
std::vector<std::string>{})
.AddOptionalArg("file_root",
R"code(Path to a directory containing data files.
This option is mutually exclusive with `filenames`.)code",
std::string())
.AddArg("sequence_length",
R"code(Frames to load per sequence.)code",
DALI_INT32)
Expand Down
32 changes: 31 additions & 1 deletion dali/pipeline/operators/reader/video_reader_op.h
Expand Up @@ -28,12 +28,17 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
explicit VideoReader(const OpSpec &spec)
: DataReader<GPUBackend, SequenceWrapper>(spec),
filenames_(spec.GetRepeatedArgument<std::string>("filenames")),
file_root_(spec.GetArgument<std::string>("file_root")),
count_(spec.GetArgument<int>("sequence_length")),
channels_(spec.GetArgument<int>("channels")),
output_scale_(spec.GetArgument<float>("scale")),
dtype_(spec.GetArgument<DALIDataType>("dtype")) {
DALIImageType image_type(spec.GetArgument<DALIImageType>("image_type"));

DALI_ENFORCE(filenames_.empty() ^ file_root_.empty(),
"Either `filenames` or `file_root` argument must be specified"
" but not both");

DALI_ENFORCE(image_type == DALI_RGB || image_type == DALI_YCbCr,
"Image type must be RGB or YCbCr.");

Expand All @@ -42,20 +47,28 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {


// TODO(spanev): support rescale
// TODO(spanev): Factor out the constructor body to make VideoReader compatible with lazy_init.
try {
loader_ = InitLoader<VideoLoader>(spec, filenames_);
auto w_h = dynamic_cast<VideoLoader*>(loader_.get())->load_width_height(filenames_[0]);
auto w_h = dynamic_cast<VideoLoader*>(loader_.get())->load_width_height();
width_ = static_cast<int>(w_h.first * output_scale_);
height_ = static_cast<int>(w_h.second * output_scale_);
} catch (std::exception &e) {
DALI_FAIL(std::string(e.what()));
}

std::vector<Index> t_shape({count_, height_, width_, channels_});
enable_file_root_ = !file_root_.empty();

for (int i = 0; i < batch_size_; ++i) {
tl_shape_.push_back(t_shape);
}

if (enable_file_root_) {
for (int i = 0; i < batch_size_; ++i) {
label_shape_.push_back({1});
}
}
}

inline ~VideoReader() override = default;
Expand All @@ -66,6 +79,8 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {

void RunImpl(DeviceWorkspace *ws, const int idx) override {
auto& tl_sequence_output = ws->Output<GPUBackend>(idx);
TensorList<GPUBackend> *label_output = NULL;

if (dtype_ == DALI_FLOAT) {
tl_sequence_output.set_type(TypeInfo::Create<float>());
} else { // dtype_ == DALI_UINT8
Expand All @@ -75,6 +90,12 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
tl_sequence_output.Resize(tl_shape_);
tl_sequence_output.SetLayout(DALI_NFHWC);

if (enable_file_root_) {
label_output = &ws->Output<GPUBackend>(idx + 1);
label_output->set_type(TypeInfo::Create<int>());
label_output->Resize(label_shape_);
}

for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto* sequence_output = tl_sequence_output.raw_mutable_tensor(data_idx);

Expand All @@ -83,12 +104,19 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
prefetched_sequence.sequence.raw_data(),
prefetched_sequence.sequence.size(),
ws->stream());

if (enable_file_root_) {
auto *label = label_output->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(label, &prefetched_sequence.label, sizeof(int),
cudaMemcpyDefault, ws->stream()));
}
}
}


private:
std::vector<std::string> filenames_;
std::string file_root_;
int count_;
int height_;
int width_;
Expand All @@ -97,8 +125,10 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
float output_scale_;

std::vector<std::vector<Index>> tl_shape_;
std::vector<std::vector<Index>> label_shape_;

DALIDataType dtype_;
bool enable_file_root_;

USE_READER_OPERATOR_MEMBERS(GPUBackend, SequenceWrapper);
};
Expand Down
1 change: 1 addition & 0 deletions docs/examples/dataloading.rst
Expand Up @@ -10,4 +10,5 @@ Data Loading
external_input.ipynb
coco_reader.ipynb
video/video_reader_simple_example.ipynb
video/video_reader_label_example.ipynb
sequence_reader_simple_example.ipynb

0 comments on commit dcea67b

Please sign in to comment.