Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Video reader resize #2097

Merged
merged 15 commits into from
Jul 10, 2020
14 changes: 13 additions & 1 deletion dali/operators/image/resize/resize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@
#include "dali/pipeline/data/views.h"

namespace dali {
namespace detail {
kernels::ResamplingParams2D GetResamplingParams(
const TransformMeta &meta, kernels::FilterDesc min_filter, kernels::FilterDesc mag_filter) {
kernels::ResamplingParams2D params;
params[0].output_size = meta.rsz_h;
params[1].output_size = meta.rsz_w;
params[0].min_filter = params[1].min_filter = min_filter;
params[0].mag_filter = params[1].mag_filter = mag_filter;
return params;
}
} // namespace detail

DALI_SCHEMA(ResizeAttr)
.AddOptionalArg("image_type",
Expand Down Expand Up @@ -83,7 +94,8 @@ template <>
void Resize<CPUBackend>::SetupSharedSampleParams(SampleWorkspace &ws) {
const int thread_idx = ws.thread_idx();
per_sample_meta_[thread_idx] = GetTransfomMeta(&ws, spec_);
resample_params_[thread_idx] = GetResamplingParams(per_sample_meta_[thread_idx]);
resample_params_[thread_idx] = detail::GetResamplingParams(
per_sample_meta_[thread_idx], min_filter_, mag_filter_);
}

template <>
Expand Down
3 changes: 2 additions & 1 deletion dali/operators/image/resize/resize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ void Resize<GPUBackend>::SetupSharedSampleParams(DeviceWorkspace &ws) {
DALI_ENFORCE(input_shape.size() == 3, "Expects 3-dimensional image input.");

per_sample_meta_[i] = GetTransformMeta(spec_, input_shape, &ws, i, ResizeInfoNeeded());
resample_params_[i] = GetResamplingParams(per_sample_meta_[i]);
resample_params_[i] = detail::GetResamplingParams(
per_sample_meta_[i], min_filter_, mag_filter_);
}
}

Expand Down
13 changes: 4 additions & 9 deletions dali/operators/image/resize/resize.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include "dali/kernels/imgproc/resample/params.h"

namespace dali {
namespace detail {
kernels::ResamplingParams2D GetResamplingParams(
const TransformMeta &meta, kernels::FilterDesc min_filter, kernels::FilterDesc mag_filter);
} // namespace detail

class ResizeAttr : protected ResizeCropMirrorAttr {
public:
Expand Down Expand Up @@ -61,15 +65,6 @@ class Resize : public Operator<Backend>
void RunImpl(Workspace<Backend> &ws) override;
void SetupSharedSampleParams(Workspace<Backend> &ws) override;

kernels::ResamplingParams2D GetResamplingParams(const TransformMeta &meta) const {
kernels::ResamplingParams2D params;
params[0].output_size = meta.rsz_h;
params[1].output_size = meta.rsz_w;
params[0].min_filter = params[1].min_filter = min_filter_;
params[0].mag_filter = params[1].mag_filter = mag_filter_;
return params;
}

USE_OPERATOR_MEMBERS();
using Operator<Backend>::RunImpl;
bool save_attrs_;
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/image/resize/resize_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class DLL_PUBLIC ResizeBase : public ResamplingFilterAttr {
std::vector<kernels::ResamplingParams2D> resample_params_;
TensorListShape<> out_shape_;

private:
protected:
kernels::KernelManager kmgr_;

struct MiniBatch {
Expand Down
15 changes: 8 additions & 7 deletions dali/operators/image/resize/resize_crop_mirror.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ enum t_idInfo : uint32_t {
t_mirrorVert
};

struct TransformMeta {
int H, W, C;
int rsz_h, rsz_w;
std::pair<int, int> crop;
int mirror;
};


/**
* @brief Stores parameters for resize+crop+mirror
*/
Expand All @@ -61,13 +69,6 @@ class ResizeCropMirrorAttr : protected CropAttr {
}
}

struct TransformMeta {
int H, W, C;
int rsz_h, rsz_w;
std::pair<int, int> crop;
int mirror;
};

protected:
inline const TransformMeta GetTransformMeta(const OpSpec &spec,
const TensorShape<> &input_shape,
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/sequence_reader_op.c

if(BUILD_NVDEC)
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_op.cc")
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_resize_op.cc")
endif()

if (BUILD_LMDB)
Expand Down
15 changes: 15 additions & 0 deletions dali/operators/reader/nvdecoder/sequencewrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ struct SequenceWrapper {
LOG_LINE << event_ << " synchronized!" << std::endl;
}

void share_frames(TensorList<GPUBackend> &frames) {
void *current_sequence = sequence.raw_mutable_data();
auto shape = TensorListShape<>::make_uniform(count, frame_shape());

frames.ShareData(
current_sequence,
sequence.type().size() * count * height * width * channels,
shape,
sequence.type());
}

TensorShape<3> frame_shape() const {
return TensorShape<3>{height, width, channels};
}

Tensor<GPUBackend> sequence;
int count;
int height;
Expand Down
14 changes: 1 addition & 13 deletions dali/operators/reader/video_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,7 @@ The video codecs can be contained in most of container file formats. FFmpeg is u
Returns a batch of sequences of `sequence_length` frames of shape [N, F, H, W, C] (N being the batch size and F the
number of frames). Supports only constant frame rate videos.)code")
.NumInput(0)
.OutputFn([](const OpSpec &spec) {
std::string file_root = spec.GetArgument<std::string>("file_root");
std::string file_list = spec.GetArgument<std::string>("file_list");
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
bool enable_timestamps = spec.GetArgument<bool>("enable_timestamps");
int num_outputs = 1;
if (!file_root.empty() || !file_list.empty()) {
num_outputs++;
if (enable_frame_num) num_outputs++;
if (enable_timestamps) num_outputs++;
}
return num_outputs;
})
.OutputFn(detail::VideoReaderOutputFn)
.AddOptionalArg("filenames",
R"code(File names of the video files to load.
This option is mutually exclusive with `file_root` and `file_list`.)code",
Expand Down
141 changes: 90 additions & 51 deletions dali/operators/reader/video_reader_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,23 @@
#include "dali/operators/reader/reader_op.h"
#include "dali/operators/reader/loader/video_loader.h"


namespace dali {
namespace detail {
inline int VideoReaderOutputFn(const OpSpec &spec) {
std::string file_root = spec.GetArgument<std::string>("file_root");
std::string file_list = spec.GetArgument<std::string>("file_list");
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
bool enable_timestamps = spec.GetArgument<bool>("enable_timestamps");
int num_outputs = 1;
if (!file_root.empty() || !file_list.empty()) {
num_outputs++;
if (enable_frame_num) num_outputs++;
if (enable_timestamps) num_outputs++;
}
return num_outputs;
}
} // namespace detail

class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
public:
Expand All @@ -34,7 +50,6 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
enable_timestamps_(spec.GetArgument<bool>("enable_timestamps")),
count_(spec.GetArgument<int>("sequence_length")),
channels_(spec.GetArgument<int>("channels")),
tl_shape_(batch_size_, sequence_dim),
dtype_(spec.GetArgument<DALIDataType>("dtype")) {
DALIImageType image_type(spec.GetArgument<DALIImageType>("image_type"));

Expand All @@ -58,7 +73,6 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
"timestamps can be enabled only when "
"`file_list` or `file_root` argument is passed");

// TODO(spanev): support rescale
// TODO(spanev): Factor out the constructor body to make VideoReader compatible with lazy_init.
try {
loader_ = InitLoader<VideoLoader>(spec, filenames_);
Expand All @@ -83,75 +97,97 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
void SetupSharedSampleParams(DeviceWorkspace &ws) override {
}

void RunImpl(DeviceWorkspace &ws) override {
auto& tl_sequence_output = ws.Output<GPUBackend>(0);
TensorList<GPUBackend> *label_output = NULL;
TensorList<GPUBackend> *frame_num_output = NULL;
TensorList<GPUBackend> *timestamp_output = NULL;

void SetOutputType(TensorList<GPUBackend> &output) {
if (dtype_ == DALI_FLOAT) {
tl_sequence_output.set_type(TypeInfo::Create<float>());
output.set_type(TypeTable::GetTypeInfoFromStatic<float>());
} else { // dtype_ == DALI_UINT8
tl_sequence_output.set_type(TypeInfo::Create<uint8>());
output.set_type(TypeTable::GetTypeInfoFromStatic<uint8>());
}
}

virtual void SetOutputShape(TensorList<GPUBackend> &output, DeviceWorkspace &ws) {
TensorListShape<> output_shape(batch_size_, sequence_dim);
for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto sequence_shape = GetSample(data_idx).sequence.shape();
tl_shape_.set_tensor_shape(data_idx, sequence_shape);
output_shape.set_tensor_shape(
data_idx, GetSample(data_idx).sequence.shape());
}
output.Resize(output_shape);
}

tl_sequence_output.Resize(tl_shape_);
tl_sequence_output.SetLayout("FHWC");
void PrepareVideoOutput(TensorList<GPUBackend> &output, DeviceWorkspace &ws) {
SetOutputType(output);
SetOutputShape(output, ws);
output.SetLayout("FHWC");
}

void PrepareAdditionalOutputs(DeviceWorkspace &ws) {
if (enable_label_output_) {
int output_index = 1;
label_output = &ws.Output<GPUBackend>(output_index++);
label_output->set_type(TypeInfo::Create<int>());
label_output->Resize(label_shape_);
label_output_ = &ws.Output<GPUBackend>(output_index++);
label_output_->set_type(TypeTable::GetTypeInfoFromStatic<int>());
label_output_->Resize(label_shape_);
if (enable_frame_num_) {
frame_num_output = &ws.Output<GPUBackend>(output_index++);
frame_num_output->set_type(TypeInfo::Create<int>());
frame_num_output->Resize(frame_num_shape_);
frame_num_output_ = &ws.Output<GPUBackend>(output_index++);
frame_num_output_->set_type(TypeTable::GetTypeInfoFromStatic<int>());
frame_num_output_->Resize(frame_num_shape_);
}

if (enable_timestamps_) {
timestamp_output = &ws.Output<GPUBackend>(output_index++);
timestamp_output->set_type(TypeInfo::Create<double>());
timestamp_output->Resize(timestamp_shape_);
timestamp_output_ = &ws.Output<GPUBackend>(output_index++);
timestamp_output_->set_type(TypeTable::GetTypeInfoFromStatic<double>());
timestamp_output_->Resize(timestamp_shape_);
}
}
}

for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto* sequence_output = tl_sequence_output.raw_mutable_tensor(data_idx);

auto& prefetched_sequence = GetSample(data_idx);
tl_sequence_output.type().Copy<GPUBackend, GPUBackend>(sequence_output,
prefetched_sequence.sequence.raw_data(),
prefetched_sequence.sequence.size(),
ws.stream());

if (enable_label_output_) {
auto *label = label_output->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(label, &prefetched_sequence.label, sizeof(int),
cudaMemcpyDefault, ws.stream()));
if (enable_frame_num_) {
auto *frame_num = frame_num_output->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(frame_num, &prefetched_sequence.first_frame_idx,
sizeof(int), cudaMemcpyDefault, ws.stream()));
}
if (enable_timestamps_) {
auto *timestamp = timestamp_output->mutable_tensor<double>(data_idx);
timestamp_output->type().Copy<GPUBackend, CPUBackend>(timestamp,
prefetched_sequence.timestamps.data(),
prefetched_sequence.timestamps.size(),
ws.stream());
}
}
virtual void ProcessSingleVideo(
int data_idx,
TensorList<GPUBackend> &video_output,
SequenceWrapper &prefetched_video,
DeviceWorkspace &ws) {
video_output.type().Copy<GPUBackend, GPUBackend>(
video_output.raw_mutable_tensor(data_idx),
prefetched_video.sequence.raw_data(),
prefetched_video.sequence.size(),
ws.stream());
}

void ProcessAdditionalOutputs(
int data_idx, SequenceWrapper &prefetched_video, cudaStream_t stream) {
if (enable_label_output_) {
auto *label = label_output_->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(
label, &prefetched_video.label, sizeof(int), cudaMemcpyDefault, stream));
if (enable_frame_num_) {
auto *frame_num = frame_num_output_->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(
frame_num, &prefetched_video.first_frame_idx, sizeof(int), cudaMemcpyDefault, stream));
}
if (enable_timestamps_) {
auto *timestamp = timestamp_output_->mutable_tensor<double>(data_idx);
timestamp_output_->type().Copy<GPUBackend, CPUBackend>(
timestamp,
prefetched_video.timestamps.data(),
prefetched_video.timestamps.size(),
stream);
}
}
}

void RunImpl(DeviceWorkspace &ws) override {
auto& video_output = ws.Output<GPUBackend>(0);

PrepareVideoOutput(video_output, ws);
PrepareAdditionalOutputs(ws);

for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto& prefetched_video = GetSample(data_idx);

ProcessSingleVideo(data_idx, video_output, prefetched_video, ws);
ProcessAdditionalOutputs(data_idx, prefetched_video, ws.stream());
}
}

private:
static constexpr int sequence_dim = 4;
std::vector<std::string> filenames_;
std::string file_root_;
Expand All @@ -161,11 +197,14 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
int count_;
int channels_;

TensorListShape<> tl_shape_;
TensorListShape<> label_shape_;
TensorListShape<> timestamp_shape_;
TensorListShape<> frame_num_shape_;

TensorList<GPUBackend> *label_output_ = NULL;
TensorList<GPUBackend> *frame_num_output_ = NULL;
TensorList<GPUBackend> *timestamp_output_ = NULL;

DALIDataType dtype_;
bool enable_label_output_;

Expand Down
41 changes: 41 additions & 0 deletions dali/operators/reader/video_reader_resize_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <vector>

#include "dali/core/common.h"
#include "dali/core/error_handling.h"
#include "dali/pipeline/operator/common.h"
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/operators/reader/video_reader_op.h"
#include "dali/operators/reader/video_reader_resize_op.h"

namespace dali {

DALI_REGISTER_OPERATOR(VideoReaderResize, VideoReaderResize, GPU);

DALI_SCHEMA(VideoReaderResize)
.DocStr(R"code(
Load and decode H264 video codec with FFmpeg and NVDECODE, NVIDIA GPU's hardware-accelerated video decoding.
The video codecs can be contained in most of container file formats. FFmpeg is used to parse video containers.
Returns a batch of sequences of `sequence_length` frames of shape [N, F, H, W, C] (N being the batch size and F the
number of frames). Supports only constant frame rate videos. It resizes video based on provided params. It supports
features of `Resize` operator.)code")
.NumInput(0)
.OutputFn(detail::VideoReaderOutputFn)
.AddParent("VideoReader")
.AddParent("ResizeAttr")
.AddParent("ResamplingFilterAttr");
} // namespace dali
Loading