Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gridmask Cpu #2582

Merged
merged 16 commits into from
Jan 25, 2021
61 changes: 61 additions & 0 deletions dali/kernels/mask/grid_mask_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_KERNELS_MASK_GRID_MASK_CPU_H_
#define DALI_KERNELS_MASK_GRID_MASK_CPU_H_

#include <utility>
#include "dali/core/common.h"
#include "dali/core/convert.h"
#include "dali/core/error_handling.h"
#include "dali/kernels/kernel.h"

namespace dali {
namespace kernels {

template<typename Type>
class GridMaskCpu {
public:
KernelRequirements Setup(KernelContext &context, const TensorShape<> &shape) {
KernelRequirements req;
req.output_shapes = { TensorListShape<>{{shape}} };
return req;
}

void Run(KernelContext &context, const OutTensorCPU<Type> &out,
const InTensorCPU<Type> &in, int tile, float ratio, float angle,
float sx, float sy) {
auto in_ptr = in.data;
auto out_ptr = out.data;
float ca = cos(angle) / tile;
float sa = sin(angle) / tile;
sx /= tile;
sy /= tile;

for (int y = 0; y < in.shape[0]; y++) {
for (int x = 0; x < in.shape[1]; x++) {
float fx = -sx + y * -sa + x * ca;
float fy = -sy + y * ca + x * sa;
PawelA marked this conversation as resolved.
Show resolved Hide resolved
auto m = (fx - floor(fx) >= ratio) || (fy - floor(fy) >= ratio);
for (int c = 0; c < in.shape[2]; c++)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder what would be the perf gain when C is a template argument and we have a compile time generated variant for 1, 3, 4 and the generic one like here.
@mzient what do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The gain can be significant.

*out_ptr++ = *in_ptr++ * m;
}
}
}
};

} // namespace kernels
} // namespace dali

#endif // DALI_KERNELS_MASK_GRID_MASK_CPU_H_
1 change: 1 addition & 0 deletions dali/operators/image/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_subdirectory(resize)
add_subdirectory(paste)
add_subdirectory(remap)
add_subdirectory(peek_shape)
add_subdirectory(mask)

collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
Expand Down
18 changes: 18 additions & 0 deletions dali/operators/image/mask/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get all the source files and dump test files
collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
collect_test_sources(DALI_OPERATOR_TEST_SRCS PARENT_SCOPE)
90 changes: 90 additions & 0 deletions dali/operators/image/mask/grid_mask.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dali/core/static_switch.h"
#include "dali/operators/image/mask/grid_mask.h"
#include "dali/kernels/mask/grid_mask_cpu.h"
#include "dali/core/format.h"

#define TYPES (uint8_t, int16_t, int32_t, float)

namespace dali {

DALI_SCHEMA(GridMask)
.DocStr(R"(Performs the gridmask augumentation (https://arxiv.org/abs/2001.04086).
PawelA marked this conversation as resolved.
Show resolved Hide resolved
Zeroes out pixels of an image in a grid-like fashion. The grid
PawelA marked this conversation as resolved.
Show resolved Hide resolved
consists of squares repeating in x and y directions, with the same spacing in
both directions. Can be rotated around the origin.)")
.NumInput(1)
.NumOutput(1)
.AddOptionalArg("tile", R"(The length of a single tile, which is equal to
width of black squares plus the spacing between them.)",
100, true)
.AddOptionalArg("ratio",
"The ratio between black square width and tile width.",
0.5f, true)
.AddOptionalArg("angle",
JanuszL marked this conversation as resolved.
Show resolved Hide resolved
"Angle, in radians, by which the grid is rotated.",
0.0f, true)
.AddOptionalArg("shift_x",
"The x component of the translation vector, applied after rotation.",
0.0f, true)
.AddOptionalArg("shift_y",
"The y component of the translation vector, applied after rotation.",
0.0f, true);

bool GridMaskCpu::SetupImpl(std::vector<OutputDesc> &output_desc,
const workspace_t<CPUBackend> &ws) {
const auto &input = ws.template InputRef<CPUBackend>(0);
const auto &output = ws.template OutputRef<CPUBackend>(0);
output_desc.resize(1);
GetArguments(ws);
output_desc[0] = {input.shape(), input.type()};
kernel_manager_.Resize(num_threads_, max_batch_size_);
TYPE_SWITCH(input.type().id(), type2id, Type, TYPES, (
{
using Kernel = kernels::GridMaskCpu<Type>;
kernel_manager_.Initialize<Kernel>();
}
), DALI_FAIL(make_string("Unsupported input type: ", input.type().id()))) // NOLINT
return true;
}


void GridMaskCpu::RunImpl(workspace_t<CPUBackend> &ws) {
const auto &input = ws.template InputRef<CPUBackend>(0);
auto &output = ws.template OutputRef<CPUBackend>(0);
output.SetLayout(input.GetLayout());
auto out_shape = output.shape();
auto& tp = ws.GetThreadPool();
TYPE_SWITCH(input.type().id(), type2id, Type, TYPES, (
{
using Kernel = kernels::GridMaskCpu<Type>;
auto in_view = view<const Type>(input);
auto out_view = view<Type>(output);
for (int sid = 0; sid < input.shape().num_samples(); sid++) {
tp.AddWork([&, sid](int tid) {
JanuszL marked this conversation as resolved.
Show resolved Hide resolved
kernels::KernelContext ctx;
kernel_manager_.Run<Kernel>(tid, sid, ctx, out_view[sid], in_view[sid],
tile_[sid], ratio_[sid], angle_[sid], shift_x_[sid], shift_y_[sid]);
}, out_shape.tensor_size(sid));
}
tp.RunAll();
}
), DALI_FAIL(make_string("Unsupported input type: ", input.type().id()))) // NOLINT
}

DALI_REGISTER_OPERATOR(GridMask, GridMaskCpu, CPU);

} // namespace dali
62 changes: 62 additions & 0 deletions dali/operators/image/mask/grid_mask.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_
#define DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_

#include <vector>
#include "dali/kernels/kernel_manager.h"
#include "dali/pipeline/operator/common.h"
#include "dali/pipeline/operator/operator.h"

namespace dali {

template <typename Backend>
class GridMask : public Operator<Backend> {
public:
explicit GridMask(const OpSpec &spec) : Operator<Backend>(spec) { }

protected:
bool CanInferOutputs() const override { return true; }
void GetArguments(const workspace_t<Backend> &ws) {
int batch_size = ws.GetInputBatchSize(0);
this->GetPerSampleArgument(tile_, "tile", ws, batch_size);
this->GetPerSampleArgument(ratio_, "ratio", ws, batch_size);
this->GetPerSampleArgument(angle_, "angle", ws, batch_size);
this->GetPerSampleArgument(shift_x_, "shift_x", ws, batch_size);
this->GetPerSampleArgument(shift_y_, "shift_y", ws, batch_size);
for (auto t : tile_)
DALI_ENFORCE(t > 0, "Tile argument must be positive");
}
std::vector<int> tile_;
std::vector<float> ratio_;
std::vector<float> angle_;
std::vector<float> shift_x_;
std::vector<float> shift_y_;
kernels::KernelManager kernel_manager_;
};


class GridMaskCpu : public GridMask<CPUBackend> {
public:
explicit GridMaskCpu(const OpSpec &spec) : GridMask(spec) { }

protected:
bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<CPUBackend> &ws) override;
void RunImpl(workspace_t<CPUBackend> &ws) override;
};

} // namespace dali

#endif // DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_
82 changes: 82 additions & 0 deletions dali/test/python/test_operator_gridmask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nvidia.dali.pipeline import Pipeline
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import numpy as np
import math
import os
import cv2
from test_utils import get_dali_extra_path

data_root = get_dali_extra_path()
img_dir = os.path.join(data_root, 'db', 'single', 'jpeg')

def get_pipeline(batch_size, tile, ratio, angle):
pipe = Pipeline(batch_size, 4, None)
with pipe:
input, _ = fn.file_reader(file_root=img_dir)
decoded = fn.image_decoder(input, device='cpu', output_type=types.RGB)
grided = fn.grid_mask(decoded, device='cpu', tile=tile, ratio=ratio, angle=angle)
pipe.set_outputs(grided, decoded)
return pipe

def get_mask(w, h, tile, ratio, angle):
black = round(tile * ratio)
diag = math.sqrt(w**2 + h**2)
nrep = int(math.ceil(diag / tile))

angle_deg = angle * 180 / math.pi
R = cv2.getRotationMatrix2D((tile * nrep, tile * nrep), angle_deg, 1)
R[0,2] -= tile * nrep
R[1,2] -= tile * nrep

mask = np.ones((tile, tile))
mask[0:black,0:black] = 0
mask = np.tile(mask, (2 * nrep, 2 * nrep))
mask = cv2.warpAffine(mask, R, (w, h))

return mask


def check(result, input, tile, ratio, angle):
result = np.uint8(result)
input = np.uint8(input)
mask = get_mask(result.shape[1], result.shape[0], tile, ratio, angle)
ker = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

# inside of squares should be black
mask2 = np.uint8(1 - cv2.dilate(mask, ker))
result2 = cv2.bitwise_and(result, result, mask=mask2)
assert not np.any(result2)

# outside of squares should be same as input
mask2 = np.uint8(cv2.erode(mask, ker))
result2 = cv2.bitwise_and(result, result, mask=mask2)
input2 = cv2.bitwise_and(input, input, mask=mask2)
assert np.all(result2 == input2)

def check_grid_mask(batch_size, tile, ratio, angle):
pipe = get_pipeline(batch_size, tile, ratio, angle)
pipe.build()
results, inputs = pipe.run()
for i in range(batch_size):
check(results[i], inputs[i], tile, ratio, angle)

def test_cpu_vs_cv():
for tile in [40, 100, 200]:
for ratio in [0.2, 0.5, 0.8]:
for angle in [0.0, 0.34, -0.62]:
PawelA marked this conversation as resolved.
Show resolved Hide resolved
yield check_grid_mask, 4, tile, ratio, angle