NVIDIA · JanuszL · Jan 25, 2021 · Jan 2, 2021 · Jan 2, 2021 · Jan 5, 2021
diff --git a/dali/kernels/mask/grid_mask_cpu.h b/dali/kernels/mask/grid_mask_cpu.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_KERNELS_MASK_GRID_MASK_CPU_H_
+#define DALI_KERNELS_MASK_GRID_MASK_CPU_H_
+
+#include <utility>
+#include "dali/core/common.h"
+#include "dali/core/convert.h"
+#include "dali/core/error_handling.h"
+#include "dali/kernels/kernel.h"
+
+namespace dali {
+namespace kernels {
+
+template<typename Type>
+class GridMaskCpu {
+ public:
+  KernelRequirements Setup(KernelContext &context, const TensorShape<> &shape) {
+    KernelRequirements req;
+    req.output_shapes = { TensorListShape<>{{shape}} };
+    return req;
+  }
+
+  void Run(KernelContext &context, const OutTensorCPU<Type> &out,
+           const InTensorCPU<Type> &in, int tile, float ratio, float angle,
+           float sx, float sy) {
+    auto in_ptr = in.data;
+    auto out_ptr = out.data;
+    float ca = cos(angle) / tile;
+    float sa = sin(angle) / tile;
+    sx /= tile;
+    sy /= tile;
+
+    for (int y = 0; y < in.shape[0]; y++) {
+      for (int x = 0; x < in.shape[1]; x++) {
+        float fx = -sx + y * -sa + x * ca;
+        float fy = -sy + y * ca + x * sa;
+        auto m = (fx - floor(fx) >= ratio) || (fy - floor(fy) >= ratio);
+        for (int c = 0; c < in.shape[2]; c++)
+          *out_ptr++ = *in_ptr++ * m;
+      }
+    }
+  }
+};
+
+}  // namespace kernels
+}  // namespace dali
+
+#endif  // DALI_KERNELS_MASK_GRID_MASK_CPU_H_
diff --git a/dali/operators/image/CMakeLists.txt b/dali/operators/image/CMakeLists.txt
@@ -21,6 +21,7 @@ add_subdirectory(resize)
 add_subdirectory(paste)
 add_subdirectory(remap)
 add_subdirectory(peek_shape)
+add_subdirectory(mask)
 
 collect_headers(DALI_INST_HDRS PARENT_SCOPE)
 collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)

diff --git a/dali/operators/image/mask/CMakeLists.txt b/dali/operators/image/mask/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Get all the source files and dump test files
+collect_headers(DALI_INST_HDRS PARENT_SCOPE)
+collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
+collect_test_sources(DALI_OPERATOR_TEST_SRCS PARENT_SCOPE)
diff --git a/dali/operators/image/mask/grid_mask.cc b/dali/operators/image/mask/grid_mask.cc
@@ -0,0 +1,90 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dali/core/static_switch.h"
+#include "dali/operators/image/mask/grid_mask.h"
+#include "dali/kernels/mask/grid_mask_cpu.h"
+#include "dali/core/format.h"
+
+#define TYPES (uint8_t, int16_t, int32_t, float)
+
+namespace dali {
+
+DALI_SCHEMA(GridMask)
+    .DocStr(R"(Performs the gridmask augumentation (https://arxiv.org/abs/2001.04086).
+Zeroes out pixels of an image in a grid-like fashion. The grid
+consists of squares repeating in x and y directions, with the same spacing in
+both directions. Can be rotated around the origin.)")
+    .NumInput(1)
+    .NumOutput(1)
+    .AddOptionalArg("tile", R"(The length of a single tile, which is equal to
+width of black squares plus the spacing between them.)",
+                    100, true)
+    .AddOptionalArg("ratio",
+                    "The ratio between black square width and tile width.",
+                    0.5f, true)
+    .AddOptionalArg("angle",
+                    "Angle, in radians, by which the grid is rotated.",
+                    0.0f, true)
+    .AddOptionalArg("shift_x",
+                    "The x component of the translation vector, applied after rotation.",
+                    0.0f, true)
+    .AddOptionalArg("shift_y",
+                    "The y component of the translation vector, applied after rotation.",
+                    0.0f, true);
+
+bool GridMaskCpu::SetupImpl(std::vector<OutputDesc> &output_desc,
+                            const workspace_t<CPUBackend> &ws) {
+  const auto &input = ws.template InputRef<CPUBackend>(0);
+  const auto &output = ws.template OutputRef<CPUBackend>(0);
+  output_desc.resize(1);
+  GetArguments(ws);
+  output_desc[0] = {input.shape(), input.type()};
+  kernel_manager_.Resize(num_threads_, max_batch_size_);
+  TYPE_SWITCH(input.type().id(), type2id, Type, TYPES, (
+      {
+          using Kernel = kernels::GridMaskCpu<Type>;
+          kernel_manager_.Initialize<Kernel>();
+      }
+  ), DALI_FAIL(make_string("Unsupported input type: ", input.type().id()))) // NOLINT
+  return true;
+}
+
+
+void GridMaskCpu::RunImpl(workspace_t<CPUBackend> &ws) {
+  const auto &input = ws.template InputRef<CPUBackend>(0);
+  auto &output = ws.template OutputRef<CPUBackend>(0);
+  output.SetLayout(input.GetLayout());
+  auto out_shape = output.shape();
+  auto& tp = ws.GetThreadPool();
+  TYPE_SWITCH(input.type().id(), type2id, Type, TYPES, (
+      {
+          using Kernel = kernels::GridMaskCpu<Type>;
+          auto in_view = view<const Type>(input);
+          auto out_view = view<Type>(output);
+          for (int sid = 0; sid < input.shape().num_samples(); sid++) {
+            tp.AddWork([&, sid](int tid) {
+              kernels::KernelContext ctx;
+              kernel_manager_.Run<Kernel>(tid, sid, ctx, out_view[sid], in_view[sid],
+                tile_[sid], ratio_[sid], angle_[sid], shift_x_[sid], shift_y_[sid]);
+            }, out_shape.tensor_size(sid));
+          }
+          tp.RunAll();
+      }
+  ), DALI_FAIL(make_string("Unsupported input type: ", input.type().id()))) // NOLINT
+}
+
+DALI_REGISTER_OPERATOR(GridMask, GridMaskCpu, CPU);
+
+}  // namespace dali
diff --git a/dali/operators/image/mask/grid_mask.h b/dali/operators/image/mask/grid_mask.h
@@ -0,0 +1,62 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_
+#define DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_
+
+#include <vector>
+#include "dali/kernels/kernel_manager.h"
+#include "dali/pipeline/operator/common.h"
+#include "dali/pipeline/operator/operator.h"
+
+namespace dali {
+
+template <typename Backend>
+class GridMask : public Operator<Backend> {
+ public:
+  explicit GridMask(const OpSpec &spec) : Operator<Backend>(spec) { }
+
+ protected:
+  bool CanInferOutputs() const override { return true; }
+  void GetArguments(const workspace_t<Backend> &ws) {
+    int batch_size = ws.GetInputBatchSize(0);
+    this->GetPerSampleArgument(tile_, "tile", ws, batch_size);
+    this->GetPerSampleArgument(ratio_, "ratio", ws, batch_size);
+    this->GetPerSampleArgument(angle_, "angle", ws, batch_size);
+    this->GetPerSampleArgument(shift_x_, "shift_x", ws, batch_size);
+    this->GetPerSampleArgument(shift_y_, "shift_y", ws, batch_size);
+    for (auto t : tile_)
+      DALI_ENFORCE(t > 0, "Tile argument must be positive");
+  }
+  std::vector<int> tile_;
+  std::vector<float> ratio_;
+  std::vector<float> angle_;
+  std::vector<float> shift_x_;
+  std::vector<float> shift_y_;
+  kernels::KernelManager kernel_manager_;
+};
+
+
+class GridMaskCpu : public GridMask<CPUBackend> {
+ public:
+  explicit GridMaskCpu(const OpSpec &spec) : GridMask(spec) { }
+
+ protected:
+  bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<CPUBackend> &ws) override;
+  void RunImpl(workspace_t<CPUBackend> &ws) override;
+};
+
+}  // namespace dali
+
+#endif  // DALI_OPERATORS_IMAGE_MASK_GRID_MASK_H_
diff --git a/dali/test/python/test_operator_gridmask.py b/dali/test/python/test_operator_gridmask.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nvidia.dali.pipeline import Pipeline
+import nvidia.dali.fn as fn
+import nvidia.dali.types as types
+import numpy as np
+import math
+import os
+import cv2
+from test_utils import get_dali_extra_path
+
+data_root = get_dali_extra_path()
+img_dir = os.path.join(data_root, 'db', 'single', 'jpeg')
+
+def get_pipeline(batch_size, tile, ratio, angle):
+  pipe = Pipeline(batch_size, 4, None)
+  with pipe:
+    input, _ = fn.file_reader(file_root=img_dir)
+    decoded = fn.image_decoder(input, device='cpu', output_type=types.RGB)
+    grided = fn.grid_mask(decoded, device='cpu', tile=tile, ratio=ratio, angle=angle)
+    pipe.set_outputs(grided, decoded)
+  return pipe
+
+def get_mask(w, h, tile, ratio, angle):
+  black = round(tile * ratio)
+  diag = math.sqrt(w**2 + h**2)
+  nrep = int(math.ceil(diag / tile))
+
+  angle_deg = angle * 180 / math.pi
+  R = cv2.getRotationMatrix2D((tile * nrep, tile * nrep), angle_deg, 1)
+  R[0,2] -= tile * nrep
+  R[1,2] -= tile * nrep
+
+  mask = np.ones((tile, tile))
+  mask[0:black,0:black] = 0
+  mask = np.tile(mask, (2 * nrep, 2 * nrep))
+  mask = cv2.warpAffine(mask, R, (w, h))
+
+  return mask
+
+
+def check(result, input, tile, ratio, angle):
+  result = np.uint8(result)
+  input = np.uint8(input)
+  mask = get_mask(result.shape[1], result.shape[0], tile, ratio, angle)
+  ker = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+
+  # inside of squares should be black
+  mask2 = np.uint8(1 - cv2.dilate(mask, ker))
+  result2 = cv2.bitwise_and(result, result, mask=mask2)
+  assert not np.any(result2)
+
+  # outside of squares should be same as input
+  mask2 = np.uint8(cv2.erode(mask, ker))
+  result2 = cv2.bitwise_and(result, result, mask=mask2)
+  input2 = cv2.bitwise_and(input, input, mask=mask2)
+  assert np.all(result2 == input2)
+
+def check_grid_mask(batch_size, tile, ratio, angle):
+  pipe = get_pipeline(batch_size, tile, ratio, angle)
+  pipe.build()
+  results, inputs = pipe.run()
+  for i in range(batch_size):
+    check(results[i], inputs[i], tile, ratio, angle)
+
+def test_cpu_vs_cv():
+  for tile in [40, 100, 200]:
+    for ratio in [0.2, 0.5, 0.8]:
+      for angle in [0.0, 0.34, -0.62]:
+        yield check_grid_mask, 4, tile, ratio, angle