-
Notifications
You must be signed in to change notification settings - Fork 608
/
optical_flow.h
238 lines (207 loc) · 9.05 KB
/
optical_flow.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DALI_OPERATORS_SEQUENCE_OPTICAL_FLOW_OPTICAL_FLOW_H_
#define DALI_OPERATORS_SEQUENCE_OPTICAL_FLOW_OPTICAL_FLOW_H_
#include <memory>
#include <utility>
#include <vector>
#include "dali/core/cuda_event.h"
#include "dali/operators/sequence/optical_flow/optical_flow_adapter/optical_flow_stub.h"
#include "dali/operators/sequence/optical_flow/optical_flow_impl/optical_flow_impl.h"
#include "dali/pipeline/data/backend.h"
#include "dali/pipeline/data/views.h"
#include "dali/pipeline/operator/checkpointing/stateless_operator.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/util/nvml.h"
namespace dali {
namespace detail {
template<typename Backend>
struct backend_to_compute {
using type = ComputeCPU;
};
template<>
struct backend_to_compute<GPUBackend> {
using type = ComputeGPU;
};
static const std::string kPresetArgName = "preset"; // NOLINT
static const std::string kOutputGridArgName = "output_grid"; // NOLINT
static const std::string kHintGridArgName = "hint_grid"; // NOLINT
static const std::string kEnableTemporalHintsArgName = "enable_temporal_hints"; // NOLINT
static const std::string kEnableExternalHintsArgName = "enable_external_hints"; // NOLINT
static const std::string kImageTypeArgName = "image_type"; // NOLINT
} // namespace detail
template<typename Backend>
class OpticalFlow : public StatelessOperator<Backend> {
using ComputeBackend = typename detail::backend_to_compute<Backend>::type;
public:
explicit OpticalFlow(const OpSpec &spec)
: StatelessOperator<Backend>(spec),
quality_factor_(spec.GetArgument<float>(detail::kPresetArgName)),
out_grid_size_(spec.GetArgument<int>(detail::kOutputGridArgName)),
hint_grid_size_(spec.GetArgument<int>(detail::kHintGridArgName)),
enable_temporal_hints_(spec.GetArgument<bool>(detail::kEnableTemporalHintsArgName)),
enable_external_hints_(spec.GetArgument<bool>(detail::kEnableExternalHintsArgName)),
of_params_({quality_factor_, out_grid_size_, hint_grid_size_, enable_temporal_hints_,
enable_external_hints_}),
optical_flow_(std::unique_ptr<optical_flow::OpticalFlowAdapter<ComputeBackend>>(
new optical_flow::OpticalFlowStub<ComputeBackend>(of_params_))),
image_type_(spec.GetArgument<DALIImageType>(detail::kImageTypeArgName)),
device_id_(spec.GetArgument<int>("device_id")) {
// In case external hints are enabled, we need 2 inputs
DALI_ENFORCE((enable_external_hints_ && spec.NumInput() == 2) || !enable_external_hints_,
"Incorrect number of inputs. Expected: 2, Obtained: " +
std::to_string(spec.NumInput()));
sync_ = CUDAEvent::Create(device_id_);
#if NVML_ENABLED
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
#endif
}
~OpticalFlow() {}
DISABLE_COPY_MOVE_ASSIGN(OpticalFlow);
protected:
bool SetupImpl(std::vector<OutputDesc> &output_desc, const Workspace &ws) override {
const auto &input = ws.Input<Backend>(0);
if (enable_external_hints_) {
const auto &hints = ws.Input<Backend>(1);
// Extract calculation params
ExtractParams(input, hints);
} else {
ExtractParams(input);
}
cudaStream_t of_stream = ws.stream();
#if NVML_ENABLED
{
static float driver_version = nvml::GetDriverVersion();
if (driver_version > 460 && driver_version < 470.21)
of_stream = 0;
}
#else
{
int driver_cuda_version = 0;
CUDA_CALL(cuDriverGetVersion(&driver_cuda_version));
if (driver_cuda_version >= 11030 && driver_cuda_version < 11040)
of_stream = 0;
}
#endif
auto input_sh = input.shape();
// sort all sequences by size, to make sure that samples are processed grouped by shape to avoid
// NV OF reconfiguration as much as possible
processing_order_.resize(nsequences_);
for (int sequence_idx = 0; sequence_idx < nsequences_; sequence_idx++) {
processing_order_[sequence_idx] = {{input_sh[sequence_idx][2], input_sh[sequence_idx][1]},
sequence_idx};
}
std::sort(processing_order_.begin(), processing_order_.end());
of_lazy_init(input_sh[0][2], input_sh[0][1], depth_, image_type_, device_id_, of_stream);
TensorListShape<> new_sizes(nsequences_, 4);
for (int i = 0; i < nsequences_; i++) {
auto out_shape = optical_flow_->CalcOutputShape(input_sh[i][1], input_sh[i][2]);
auto shape = shape_cat(sequence_sizes_[i] - 1, out_shape);
new_sizes.set_tensor_shape(i, shape);
}
output_desc.resize(1);
output_desc[0] = {new_sizes, DALI_FLOAT};
return true;
}
void RunImpl(Workspace &ws) override;
bool CanInferOutputs() const override {
return true;
}
private:
/**
* Optical flow lazy initialization
*/
void of_lazy_init(size_t width, size_t height, size_t channels, DALIImageType image_type,
int device_id, cudaStream_t stream) {
std::call_once(of_initialized_,
[&]() {
optical_flow_.reset(
new optical_flow::OpticalFlowImpl(of_params_,
width,
height,
channels,
image_type,
device_id,
stream));
optical_flow_->Init(of_params_);
});
}
/**
* Use input TensorList to extract calculation params
* Only NFHWC layout is supported
*/
void ExtractParams(const TensorList<Backend> &tl) {
auto shape = tl.shape();
nsequences_ = shape.size();
DALI_ENFORCE(shape.sample_dim() == 4, "Input for Optical Flow must be a sequence of frames.");
depth_ = shape[0][3];
sequence_sizes_.reserve(nsequences_);
for (int i = 0; i < nsequences_; i++) {
sequence_sizes_[i] = shape[i][0];
}
for (auto sz : sequence_sizes_) {
DALI_ENFORCE(sz >= 2, (sz == 1
? "One-frame sequence encountered. Make sure that all input sequences "
"for Optical Flow have at least 2 frames."
: "Empty sequence encountered. Make sure that all input sequences"
" for Optical Flow have at least 2 frames."));
}
}
/**
* Overload for operator that takes also hints as input
*/
void ExtractParams(const TensorList<Backend> &input, const TensorList<Backend> &hints) {
ExtractParams(input);
auto hints_shape = hints.shape();
DALI_ENFORCE(hints_shape.sample_dim() == 3, "Hint should have a sample dim equal 3.");
DALI_ENFORCE(hints_shape.size() == nsequences_,
"Number of input sequences and hints must match");
hints_height_ = hints_shape[0][1];
hints_width_ = hints_shape[0][2];
hints_depth_ = hints_shape[0][3];
DALI_ENFORCE(hints_depth_ == 2, "Hints shall have depth of 2: flow_x and flow_y");
DALI_ENFORCE(
hints_height_ == (frames_height_ + 3) / 4 && hints_width_ == (frames_width_ + 3) / 4,
"Hints resolution has to be 4 times smaller in each dimension (4x4 grid)");
DALI_ENFORCE(is_uniform(hints_shape),
"Width, height and depth must be equal for all hints");
}
struct DimsOrder {
std::pair<int, int> dims;
int idx;
bool operator<(const DimsOrder &rhs) const {
return dims < rhs.dims;
}
};
const float quality_factor_;
const int out_grid_size_;
const int hint_grid_size_;
const bool enable_temporal_hints_;
const bool enable_external_hints_;
std::once_flag of_initialized_;
optical_flow::OpticalFlowParams of_params_;
std::unique_ptr<optical_flow::OpticalFlowAdapter<ComputeBackend>> optical_flow_;
DALIImageType image_type_;
int device_id_;
int frames_width_ = -1, frames_height_ = -1, depth_ = -1, nsequences_ = -1;
int hints_width_ = -1, hints_height_ = -1, hints_depth_ = -1;
std::vector<int> sequence_sizes_;
std::vector<DimsOrder> processing_order_;
CUDAEvent sync_;
#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};
} // namespace dali
#endif // DALI_OPERATORS_SEQUENCE_OPTICAL_FLOW_OPTICAL_FLOW_H_