Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use true scalars + bug fixes #2318

Merged
merged 13 commits into from
Oct 4, 2020
3 changes: 2 additions & 1 deletion dali/kernels/imgproc/convolution/convolution_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,9 @@ struct ConvolutionCpu {
if (axis == ndim - has_channels - 1) {
int num_channels = has_channels ? in_shape[ndim - 1] : 1;
return num_channels * window_size;
} else {
return kStripSize * window_size;
}
return kStripSize * window_size;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,9 @@ class PositionPredicatedTileIterator<Shape_, Element_, layout::PitchLinear, Adva
CUTLASS_DEVICE int get_distance(int abs_window_element, int lo_offset) {
if (mirrored) {
return get_mirrored_element(abs_window_element) - lo_offset;
} else {
return abs_window_element - lo_offset;
}
return abs_window_element - lo_offset;
}

struct aligned_offset_data {
Expand Down
6 changes: 3 additions & 3 deletions dali/kernels/reduce/reduce_all_gpu_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ void ReduceAllGPUTest<Reduction>::TestReduceAllKernel(int min_size, int max_size

kernels::reduce::ReduceAllGPU<Out, In, Reduction> kernel;

auto out_shape = TensorListShape<1>::make_uniform(nsamples, TensorShape<1>{1});
TestTensorList<Out, 1> out;
out.reshape(out_shape.to_static<1>());
auto out_shape = TensorListShape<0>(nsamples);
TestTensorList<Out, 0> out;
out.reshape(out_shape);

auto in_view_gpu = in.gpu();
auto out_view_gpu = out.gpu();
Expand Down
4 changes: 2 additions & 2 deletions dali/kernels/reduce/reduce_all_kernel_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ class DLL_PUBLIC ReduceAllGPU {

KernelRequirements req;
req.scratch_sizes = se.sizes;
req.output_shapes = {TensorListShape<1>::make_uniform(num_samples, TensorShape<1>{1})};
req.output_shapes = {TensorListShape<0>(num_samples)};
return req;
}

DLL_PUBLIC void Run(KernelContext &context,
const OutListGPU<Out, 1> &out,
const OutListGPU<Out, 0> &out,
const InListGPU<In, DynamicDimensions> &in) {
DALI_ENFORCE(out.is_contiguous(), "Reduce all kernel expects the output to be contiguous");
auto* out_start = out[0].data;
Expand Down
2 changes: 1 addition & 1 deletion dali/kernels/signal/decibel/to_decibels_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ KernelRequirements ToDecibelsGpu<T>::Setup(KernelContext &context,
template <typename T>
void ToDecibelsGpu<T>::Run(KernelContext &context, const OutListGPU<T, DynamicDimensions> &out,
const InListGPU<T, DynamicDimensions> &in, const ToDecibelsArgs<T> &args,
InListGPU<T, 1> max_values) {
InListGPU<T, 0> max_values) {
DALI_ENFORCE(max_values.empty() || max_values.is_contiguous(),
"Reduce all kernel expects the output to be contiguous");
const T* max_values_data = max_values.empty() ? nullptr : max_values[0].data;
Expand Down
2 changes: 1 addition & 1 deletion dali/kernels/signal/decibel/to_decibels_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class DLL_PUBLIC ToDecibelsGpu {
const OutListGPU<T, DynamicDimensions> &out,
const InListGPU<T, DynamicDimensions> &in,
const ToDecibelsArgs<T> &args,
InListGPU<T, 1> max_values = {});
InListGPU<T, 0> max_values = {});
};

} // namespace signal
Expand Down
4 changes: 2 additions & 2 deletions dali/kernels/signal/decibel/to_decibels_gpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ TEST_P(ToDecibelsGpuTest, ToDecibelsGpuTest) {

std::vector<T> max_values(batch_size, 0.0);
memory::KernelUniquePtr<T> max_values_gpu;
InListGPU<T, 1> max_values_arg;
InListGPU<T, 0> max_values_arg;
if (args.ref_max) {
for (int b = 0; b < batch_size; ++b) {
int64_t sz = volume(data_shape_[b]);
Expand All @@ -106,7 +106,7 @@ TEST_P(ToDecibelsGpuTest, ToDecibelsGpuTest) {
cudaMemcpy(max_values_gpu.get(), max_values.data(), batch_size * sizeof(T),
cudaMemcpyHostToDevice);
max_values_arg = {max_values_gpu.get(),
TensorListShape<1>::make_uniform(batch_size, TensorShape<1>{1})};
TensorListShape<0>(batch_size)};
}

kernel.Run(ctx, out.gpu(), in_.gpu(), args, max_values_arg);
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/audio/nonsilence_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ bool NonsilenceOperatorCpu::SetupImpl(std::vector<OutputDesc> &output_desc,
AcquireArgs(spec_, ws);
TypeInfo output_type;
output_type.SetType<int>(TypeTable::GetTypeID<int>());
TensorShape<> scalar_shape = {1};
TensorShape<> scalar_shape = {};

output_desc.resize(detail::kNumOutputs);
for (int i = 0; i < detail::kNumOutputs; i++) {
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/decoder/audio/audio_decoder_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ AudioDecoderCpu::SetupImpl(std::vector<OutputDesc> &output_desc, const workspace
// Currently, metadata is only the sampling rate.
// On the event something else would emerge,
// this approach should be completely redefined
TensorListShape<> shape_rate(batch_size, 1);
TensorListShape<> shape_rate(batch_size, 0);
TensorListShape<> shape_data(batch_size, downmix_ ? 1 : 2);

for (int i = 0; i < batch_size; i++) {
Expand All @@ -84,7 +84,7 @@ AudioDecoderCpu::SetupImpl(std::vector<OutputDesc> &output_desc, const workspace
TensorShape<> data_sample_shape = DecodedAudioShape(
meta, use_resampling_ ? target_sample_rates_[i] : -1.0f, downmix_);
shape_data.set_tensor_shape(i, data_sample_shape);
shape_rate.set_tensor_shape(i, {1});
shape_rate.set_tensor_shape(i, {});
files_names_[i] = input[i].GetSourceInfo();
}

Expand Down Expand Up @@ -130,7 +130,7 @@ AudioDecoderCpu::DecodeSample(const TensorView<StorageCPU, OutputType, DynamicDi
template <typename OutputType, typename DecoderOutputType>
void AudioDecoderCpu::DecodeBatch(workspace_t<Backend> &ws) {
auto decoded_output = view<OutputType, DynamicDimensions>(ws.template OutputRef<Backend>(0));
auto sample_rate_output = view<float, 1>(ws.template OutputRef<Backend>(1));
auto sample_rate_output = view<float, 0>(ws.template OutputRef<Backend>(1));
int batch_size = decoded_output.shape.num_samples();
auto &tp = ws.GetThreadPool();

Expand Down
9 changes: 5 additions & 4 deletions dali/operators/generic/constant.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ class Constant : public Operator<Backend> {
using Workspace = workspace_t<Backend>;

explicit Constant(const OpSpec &spec) : Operator<Backend>(spec) {
bool has_shape = spec.ArgumentDefined("shape");
spec.TryGetRepeatedArgument<int>(shape_arg_, "shape");
output_type_ = spec.GetArgument<DALIDataType>("dtype");
if (spec.HasArgument("fdata")) {
DALI_ENFORCE(!spec.HasArgument("idata"), "Constant node: `fdata` and `idata` arguments are "
"mutually exclusive");
fdata_ = spec.GetRepeatedArgument<float>("fdata");
if (shape_arg_.empty()) {
shape_arg_.push_back(fdata_.size());
if (!has_shape) {
shape_arg_ = { static_cast<int>(fdata_.size()) };
} else {
DALI_ENFORCE(fdata_.size() == static_cast<size_t>(volume(shape_arg_)) || fdata_.size() == 1,
"The number of values does not match the shape specified");
Expand All @@ -56,8 +57,8 @@ class Constant : public Operator<Backend> {
output_type_ = DALI_INT32;

idata_ = spec.GetRepeatedArgument<int>("idata");
if (shape_arg_.empty()) {
shape_arg_.push_back(idata_.size());
if (!has_shape) {
shape_arg_ = { static_cast<int>(idata_.size()) };
} else {
DALI_ENFORCE(idata_.size() == static_cast<size_t>(volume(shape_arg_)) || idata_.size() == 1,
"The number of values does not match the shape specified");
Expand Down
6 changes: 4 additions & 2 deletions dali/operators/generic/slice/slice_attr.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class SliceAttr {
crop_anchor.type().id(), " and ", crop_shape.type().id()));
auto args_dtype = crop_anchor.type().id();
TYPE_SWITCH(args_dtype, type2id, ArgsType, SLICE_ARGS_TYPES, (
auto anchor_view = view<const ArgsType, 1>(crop_anchor);
auto shape_view = view<const ArgsType, 1>(crop_shape);
auto anchor_view = view<const ArgsType>(crop_anchor);
auto shape_view = view<const ArgsType>(crop_shape);
for (size_t data_idx = 0; data_idx < batch_size__; data_idx++) {
VerifyArgsShape(anchor_view.tensor_shape(data_idx), shape_view.tensor_shape(data_idx));
ProcessArgumentsHelper(data_idx,
Expand Down Expand Up @@ -152,6 +152,8 @@ class SliceAttr {
void VerifyArgsShape(const TensorShape<>& crop_anchor_shape,
const TensorShape<>& crop_shape_shape) {
DALI_ENFORCE(crop_anchor_shape == crop_shape_shape);
DALI_ENFORCE(crop_anchor_shape.sample_dim() <= 1,
"Anchor and shape must be 1D tensors or scalars");
size_t args_size = volume(crop_anchor_shape);
auto axes_size = !axis_names_.empty() ? axis_names_.size() : axes_.size();
DALI_ENFORCE(args_size == axes_size,
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/math/expressions/arithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ inline TensorListShape<> ShapePromotion(std::string op, span<const TensorListSha
*out_shape, ", ", *shapes[i], ")."));
}
}
return out_shape ? *out_shape : uniform_list_shape(batch_size, {1});
return out_shape ? *out_shape : TensorListShape<0>(batch_size);
}

/**
Expand All @@ -183,7 +183,7 @@ DLL_PUBLIC inline const TensorListShape<> &PropagateShapes(ExprNode &expr,
const workspace_t<Backend> &ws,
int batch_size) {
if (expr.GetNodeType() == NodeType::Constant) {
expr.SetShape(uniform_list_shape(batch_size, {1}));
expr.SetShape(TensorListShape<0>(batch_size));
return expr.GetShape();
}
if (expr.GetNodeType() == NodeType::Tensor) {
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/math/expressions/arithmetic_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,11 +712,11 @@ inline ArithmeticOp NameToOp(const std::string &op_name) {
/**
* @brief Check if input of given `shape` should be considered to represent (tensor of) scalars.
*
* A tensor of scalars is uniform tensor with sample dimension equal 1 and only 1 (scalar) element
* in every sample.
* As a backward compatibility, 1D 1-element tensors are considered scalars (in addition to true
* scalars).
*/
inline bool IsScalarLike(const TensorListShape<> &shape) {
return is_uniform(shape) && shape.sample_dim() == 1 && shape.tensor_shape_span(0)[0] == 1;
return is_uniform(shape) && shape.sample_dim() <= 1 && volume(shape.tensor_shape_span(0)) == 1;
}

} // namespace dali
Expand Down
19 changes: 11 additions & 8 deletions dali/operators/math/expressions/arithmetic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,21 @@ TEST(ArithmeticOpsTest, TreePropagation) {
EXPECT_EQ(func[1].GetOutputDesc(), "CC:int32");
}

TEST(ArithmeticOpsTest, PropagateScalarLike) {

TEST(ArithmeticOpsTest, PropagateScalarInput) {
std::string expr_str = "sub(&0 $1:int32))";
auto expr = ParseExpressionString(expr_str);
auto &expr_ref = *expr;
HostWorkspace ws;
std::shared_ptr<TensorVector<CPUBackend>> in[1];
for (auto &ptr : in) {
ptr = std::make_shared<TensorVector<CPUBackend>>();
ptr->Resize({{1}, {1}});
ptr->SetLayout(TensorLayout("HW"));
ptr->Resize({{}, {}});
}
ws.AddInput(in[0]);

auto result_shape = PropagateShapes<CPUBackend>(expr_ref, ws, 2);
auto expected_shpe = TensorListShape<>{{1}, {1}};
auto expected_shpe = TensorListShape<>{{}, {}};
EXPECT_EQ(result_shape, expected_shpe);
}

Expand Down Expand Up @@ -514,7 +514,7 @@ class ArithmeticOpsScalarTest : public ::testing::TestWithParam<shape_sequence>
for (int tensor_idx = 0; tensor_idx < result_shape.num_samples(); tensor_idx++) {
for (int j = 0; j < result_shape[tensor_idx].num_elements(); j++) {
auto is_scalar = [] (auto &shape, int tensor_idx) {
return shape[tensor_idx] == TensorShape<>{1};
return volume(shape[tensor_idx]) == 1;
};
int expected = data0[offset_in[0] + (is_scalar(s[0], tensor_idx) ? 0 : j)] +
data1[offset_in[1] + (is_scalar(s[1], tensor_idx) ? 0 : j)];
Expand All @@ -541,9 +541,12 @@ namespace {

std::array<TensorListShape<>, 3> GetShapesForSequence(int batch_size, int left_elems,
int right_elems) {
return {uniform_list_shape(batch_size, {left_elems}),
uniform_list_shape(batch_size, {right_elems}),
uniform_list_shape(batch_size, {std::max(left_elems, right_elems)})};
auto GetTensorOrScalar = [=](int elems) {
return elems != 1 ? uniform_list_shape(batch_size, {elems}) : TensorListShape<0>(batch_size);
};
return {GetTensorOrScalar(left_elems),
GetTensorOrScalar(right_elems),
GetTensorOrScalar(std::max(left_elems, right_elems))};
}

/**
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/random/coin_flip.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class CoinFlip : public Operator<CPUBackend> {

bool SetupImpl(std::vector<OutputDesc> &output_desc, const HostWorkspace &ws) override {
output_desc.resize(1);
output_desc[0].shape = uniform_list_shape(batch_size_, {1});
output_desc[0].shape = TensorListShape<0>(batch_size_);
output_desc[0].type = TypeTable::GetTypeInfo(DALI_INT32);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/random/normal_distribution_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class NormalDistribution : public Operator<Backend> {


TensorListShape<> ShapeForDefaultConfig(const workspace_t<Backend> &ws) {
return uniform_list_shape(batch_size_, {1});
return TensorListShape<0>(batch_size_);
}


Expand Down
2 changes: 1 addition & 1 deletion dali/operators/random/uniform.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Uniform : public Operator<CPUBackend> {
GetSingleOrRepeatedArg(spec, range, "range", 2);
dis_ = std::uniform_real_distribution<float>(range[0], range[1]);

std::vector<int> shape_arg{1};
std::vector<int> shape_arg{};
if (spec.HasArgument("shape"))
shape_arg = spec.GetRepeatedArgument<int>("shape");
shape_ = std::vector<int64_t>{std::begin(shape_arg), std::end(shape_arg)};
Expand Down
1 change: 0 additions & 1 deletion dali/operators/reader/loader/coco_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ void parse_annotations(
parser.NextArrayValue();
annotation.rle_.w_ = parser.GetInt();
parser.NextArrayValue();
RAPIDJSON_ASSERT(parser.PeekType() == -1);
} else if (0 == std::strcmp(another_key, "counts")) {
annotation.rle_.rle_ = parser.GetString();
}
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/reader/nemo_asr_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) {
int next_out_idx = 1;
if (read_sr_) {
auto &sample_rate = ws.Output<CPUBackend>(next_out_idx++);
sample_rate.Resize({1});
sample_rate.Resize({});
sample_rate.set_type(TypeTable::GetTypeInfo(DALI_FLOAT));
sample_rate.mutable_data<float>()[0] = sample.audio_meta().sample_rate;
sample_rate.SetMeta(meta);
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/signal/decibel/to_decibels_op_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void ToDecibelsImpl<T>::RunImpl(workspace_t<GPUBackend> &ws) {
if (args_.ref_max) {
max_out_.set_type(max_out_desc_[0].type);
max_out_.Resize(max_out_desc_[0].shape);
auto max_values_view = view<T, 1>(max_out_);
auto max_values_view = view<T, 0>(max_out_);
kmgr_max_.Run<MaxKernel>(0, 0, ctx, max_values_view, in_view);
kmgr_todb_.Run<ToDecibelsKernel>(0, 0, ctx, out_view, in_view, args_, max_values_view);
} else {
Expand Down
6 changes: 3 additions & 3 deletions dali/pipeline/data/tensor_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,11 @@ TYPED_TEST(TensorListTest, TestMultipleZeroSizeResize) {
}
}

TYPED_TEST(TensorListTest, TestScalarResize) {
TYPED_TEST(TensorListTest, TestFakeScalarResize) {
TensorList<TypeParam> tensor_list;

int num_scalar = this->RandInt(1, 128);
auto shape = uniform_list_shape(num_scalar, {1});
auto shape = uniform_list_shape(num_scalar, {1}); // {1} on purpose
tensor_list.Resize(shape);

ASSERT_NE(tensor_list.template mutable_data<float>(), nullptr);
Expand All @@ -283,7 +283,7 @@ TYPED_TEST(TensorListTest, TestScalarResize) {
ASSERT_FALSE(tensor_list.shares_data());

for (int i = 0; i < num_scalar; ++i) {
ASSERT_EQ(tensor_list.tensor_shape(i), TensorShape<>{1});
ASSERT_EQ(tensor_list.tensor_shape(i), TensorShape<>{1}); // {1} on purpose
ASSERT_EQ(tensor_list.tensor_offset(i), i);
}
}
Expand Down
6 changes: 3 additions & 3 deletions dali/pipeline/executor/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ void Executor<WorkspacePolicy, QueuePolicy>::RunCPU() {

// Run the cpu-ops in the thread
// Process each CPU Op in batch
for (int cpu_op_id = 0; cpu_op_id < graph_->NumOp(OpType::CPU); ++cpu_op_id) {
for (int cpu_op_id = 0; cpu_op_id < graph_->NumOp(OpType::CPU) && !exec_error_; ++cpu_op_id) {
OpNode &op_node = graph_->Node(OpType::CPU, cpu_op_id);
typename WorkspacePolicy::template ws_t<OpType::CPU> ws =
WorkspacePolicy::template GetWorkspace<OpType::CPU>(cpu_idxs, *graph_, cpu_op_id);
Expand Down Expand Up @@ -554,7 +554,7 @@ void Executor<WorkspacePolicy, QueuePolicy>::RunMixed() {

CUDA_CALL(cudaEventSynchronize(mixed_stage_event_));

for (int i = 0; i < graph_->NumOp(OpType::MIXED); ++i) {
for (int i = 0; i < graph_->NumOp(OpType::MIXED) && !exec_error_; ++i) {
OpNode &op_node = graph_->Node(OpType::MIXED, i);
try {
typename WorkspacePolicy::template ws_t<OpType::MIXED> ws =
Expand Down Expand Up @@ -615,7 +615,7 @@ void Executor<WorkspacePolicy, QueuePolicy>::RunGPU() {
// iterations of a stage of the pipeline.
CUDA_CALL(cudaEventSynchronize(gpu_stage_event_));

for (int i = 0; i < graph_->NumOp(OpType::GPU); ++i) {
for (int i = 0; i < graph_->NumOp(OpType::GPU) && !exec_error_; ++i) {
OpNode &op_node = graph_->Node(OpType::GPU, i);
try {
typename WorkspacePolicy::template ws_t<OpType::GPU> ws =
Expand Down
Loading