-
Notifications
You must be signed in to change notification settings - Fork 611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add segmentation.RandomMaskPixel operator #2445
Changes from 5 commits
2e557ac
4ca693f
75f9c22
f55d968
8f62e10
bfafcb4
f211454
fb91cf9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,203 @@ | ||||||||||||
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. | ||||||||||||
// | ||||||||||||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||
// you may not use this file except in compliance with the License. | ||||||||||||
// You may obtain a copy of the License at | ||||||||||||
// | ||||||||||||
// http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||
// | ||||||||||||
// Unless required by applicable law or agreed to in writing, software | ||||||||||||
// distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||
// See the License for the specific language governing permissions and | ||||||||||||
// limitations under the License. | ||||||||||||
|
||||||||||||
#include <random> | ||||||||||||
#include <utility> | ||||||||||||
#include "dali/core/static_switch.h" | ||||||||||||
#include "dali/pipeline/operator/operator.h" | ||||||||||||
#include "dali/operators/segmentation/utils/searchable_rle_mask.h" | ||||||||||||
#include "dali/kernels/common/utils.h" | ||||||||||||
#include "dali/core/boundary.h" | ||||||||||||
|
||||||||||||
#define MASK_SUPPORTED_TYPES (uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, \ | ||||||||||||
uint64_t, int64_t, float) | ||||||||||||
|
||||||||||||
namespace dali { | ||||||||||||
|
||||||||||||
DALI_SCHEMA(segmentation__RandomMaskPixel) | ||||||||||||
.DocStr(R"(Selects random pixel coordinates in a mask, sampled from a uniform distribution. | ||||||||||||
|
||||||||||||
Based on run-time argument ``foreground``, it returns either only foreground pixels or any pixels. | ||||||||||||
|
||||||||||||
Pixels are classificed as foreground either when their value exceeds a given ``threshold`` or when | ||||||||||||
it's equal to a specific ``value``. | ||||||||||||
)") | ||||||||||||
.AddOptionalArg<int>("value", | ||||||||||||
R"code(All pixels equal to this value are interpreted as foreground. | ||||||||||||
|
||||||||||||
This argument is mutually exclusive with ``threshold`` argument and is meant to be used only | ||||||||||||
with integer inputs. | ||||||||||||
)code", nullptr, true) | ||||||||||||
.AddOptionalArg<float>("threshold", | ||||||||||||
R"code(All pixels with a value above this threshold are interpreted as foreground. | ||||||||||||
|
||||||||||||
This argument is mutually exclusive with ``value`` argument. | ||||||||||||
)code", 0.0f, true) | ||||||||||||
.AddOptionalArg("foreground", | ||||||||||||
R"code(If different than 0, the pixel position is sampled uniformly from all foreground pixels. | ||||||||||||
If 0, the pixel position is sampled uniformly from all available pixels.)code", | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
0, true) | ||||||||||||
.NumInput(1) | ||||||||||||
.NumOutput(1); | ||||||||||||
|
||||||||||||
class RandomMaskPixelCPU : public Operator<CPUBackend> { | ||||||||||||
public: | ||||||||||||
explicit RandomMaskPixelCPU(const OpSpec &spec); | ||||||||||||
bool CanInferOutputs() const override { return true; } | ||||||||||||
bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<CPUBackend> &ws) override; | ||||||||||||
void RunImpl(workspace_t<CPUBackend> &ws) override; | ||||||||||||
|
||||||||||||
private: | ||||||||||||
template <typename T> | ||||||||||||
void RunImplTyped(workspace_t<CPUBackend> &ws); | ||||||||||||
|
||||||||||||
int64_t seed_; | ||||||||||||
std::vector<std::mt19937_64> rng_; | ||||||||||||
std::vector<SearchableRLEMask> rle_; | ||||||||||||
|
||||||||||||
std::vector<int> foreground_; | ||||||||||||
std::vector<int> value_; | ||||||||||||
std::vector<float> threshold_; | ||||||||||||
|
||||||||||||
bool has_value_ = false; | ||||||||||||
|
||||||||||||
USE_OPERATOR_MEMBERS(); | ||||||||||||
}; | ||||||||||||
|
||||||||||||
RandomMaskPixelCPU::RandomMaskPixelCPU(const OpSpec &spec) | ||||||||||||
: Operator<CPUBackend>(spec), | ||||||||||||
seed_(spec.GetArgument<int64_t>("seed")), | ||||||||||||
has_value_(spec.ArgumentDefined("value")) { | ||||||||||||
if (has_value_) { | ||||||||||||
DALI_ENFORCE(!spec.ArgumentDefined("threshold"), | ||||||||||||
"Arguments ``value`` and ``threshold`` can not be provided together"); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
bool RandomMaskPixelCPU::SetupImpl(std::vector<OutputDesc> &output_desc, | ||||||||||||
const workspace_t<CPUBackend> &ws) { | ||||||||||||
const auto &in_masks = ws.template InputRef<CPUBackend>(0); | ||||||||||||
int nsamples = in_masks.size(); | ||||||||||||
auto in_masks_shape = in_masks.shape(); | ||||||||||||
int ndim = in_masks_shape.sample_dim(); | ||||||||||||
output_desc.resize(1); | ||||||||||||
output_desc[0].shape = uniform_list_shape(nsamples, {ndim}); | ||||||||||||
output_desc[0].type = TypeTable::GetTypeInfo(DALI_INT64); | ||||||||||||
|
||||||||||||
foreground_.resize(nsamples); | ||||||||||||
value_.clear(); | ||||||||||||
threshold_.clear(); | ||||||||||||
|
||||||||||||
for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { | ||||||||||||
foreground_[sample_idx] = spec_.template GetArgument<int>("foreground", &ws, sample_idx); | ||||||||||||
} | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sample-indexed
Suggested change
|
||||||||||||
|
||||||||||||
if (spec_.ArgumentDefined("value")) { | ||||||||||||
value_.resize(nsamples); | ||||||||||||
for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { | ||||||||||||
value_[sample_idx] = spec_.template GetArgument<int>("value", &ws, sample_idx); | ||||||||||||
} | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
} else { | ||||||||||||
threshold_.resize(nsamples, 0.0f); | ||||||||||||
if (spec_.ArgumentDefined("threshold")) { | ||||||||||||
for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { | ||||||||||||
threshold_[sample_idx] = spec_.template GetArgument<float>("threshold", &ws, sample_idx); | ||||||||||||
} | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
} | ||||||||||||
} | ||||||||||||
return true; | ||||||||||||
} | ||||||||||||
|
||||||||||||
template <typename T> | ||||||||||||
void RandomMaskPixelCPU::RunImplTyped(workspace_t<CPUBackend> &ws) { | ||||||||||||
const auto &in_masks = ws.template InputRef<CPUBackend>(0); | ||||||||||||
auto &out_pixel_pos = ws.template OutputRef<CPUBackend>(0); | ||||||||||||
int nsamples = in_masks.size(); | ||||||||||||
auto in_masks_shape = in_masks.shape(); | ||||||||||||
int ndim = in_masks_shape.sample_dim(); | ||||||||||||
auto masks_view = view<const T>(in_masks); | ||||||||||||
auto pixel_pos_view = view<int64_t>(out_pixel_pos); | ||||||||||||
auto& thread_pool = ws.GetThreadPool(); | ||||||||||||
|
||||||||||||
if (rng_.empty()) { | ||||||||||||
for (int i = 0; i < thread_pool.size(); i++) { | ||||||||||||
rng_.emplace_back(seed_ + i); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
assert(rng_.size() == static_cast<size_t>(thread_pool.size())); | ||||||||||||
|
||||||||||||
if (rle_.empty()) { | ||||||||||||
rle_.resize(thread_pool.size()); | ||||||||||||
} | ||||||||||||
assert(rle_.size() == static_cast<size_t>(thread_pool.size())); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: resize won't do much more than a similar check, so:
Suggested change
|
||||||||||||
|
||||||||||||
for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { | ||||||||||||
thread_pool.AddWork( | ||||||||||||
[&, sample_idx](int thread_id) { | ||||||||||||
auto &rng = rng_[thread_id]; | ||||||||||||
auto mask = masks_view[sample_idx]; | ||||||||||||
auto pixel_pos = pixel_pos_view[sample_idx]; | ||||||||||||
const auto &mask_sh = mask.shape; | ||||||||||||
if (foreground_[sample_idx]) { | ||||||||||||
int64_t flat_idx = -1; | ||||||||||||
auto &rle_mask = rle_[thread_id]; | ||||||||||||
if (has_value_) { | ||||||||||||
T value = static_cast<T>(value_[sample_idx]); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have mixed feelings about that one - consider that: if (static_cast<int>(value) != values_[sample_idx])
// behave as if there was no foregound at all There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||||||||||||
rle_mask.Init( | ||||||||||||
mask, [value](const T &x) { return x == value; }); | ||||||||||||
if (rle_mask.count() > 0) { | ||||||||||||
auto dist = std::uniform_int_distribution<int64_t>(0, rle_mask.count() - 1); | ||||||||||||
flat_idx = rle_mask.find(dist(rng)); | ||||||||||||
} | ||||||||||||
} else { | ||||||||||||
T threshold = static_cast<T>(threshold_[sample_idx]); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't do that. This will cause wrong results for signed integer input and fractional threshold (and also when the value wraps after casting).
Suggested change
|
||||||||||||
rle_mask.Init( | ||||||||||||
mask, [threshold](const T &x) { return x > threshold; }); | ||||||||||||
if (rle_mask.count() > 0) { | ||||||||||||
auto dist = std::uniform_int_distribution<int64_t>(0, rle_mask.count() - 1); | ||||||||||||
flat_idx = rle_mask.find(dist(rng)); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
if (flat_idx >= 0) { | ||||||||||||
// Convert from flat_idx to per-dim indices | ||||||||||||
auto mask_strides = kernels::GetStrides(mask_sh); | ||||||||||||
for (int d = 0; d < ndim - 1; d++) { | ||||||||||||
pixel_pos.data[d] = flat_idx / mask_strides[d]; | ||||||||||||
flat_idx = flat_idx % mask_strides[d]; | ||||||||||||
} | ||||||||||||
pixel_pos.data[ndim - 1] = flat_idx; | ||||||||||||
return; | ||||||||||||
} | ||||||||||||
} | ||||||||||||
// Either foreground == 0 or no foreground pixels found. Get a random center | ||||||||||||
for (int d = 0; d < ndim; d++) { | ||||||||||||
pixel_pos.data[d] = std::uniform_int_distribution<int64_t>(0, mask_sh[d] - 1)(rng); | ||||||||||||
} | ||||||||||||
}, in_masks_shape.tensor_size(sample_idx)); | ||||||||||||
} | ||||||||||||
thread_pool.RunAll(); | ||||||||||||
} | ||||||||||||
|
||||||||||||
void RandomMaskPixelCPU::RunImpl(workspace_t<CPUBackend> &ws) { | ||||||||||||
const auto &in_masks = ws.template InputRef<CPUBackend>(0); | ||||||||||||
TYPE_SWITCH(in_masks.type().id(), type2id, T, MASK_SUPPORTED_TYPES, ( | ||||||||||||
RunImplTyped<T>(ws); | ||||||||||||
), ( // NOLINT | ||||||||||||
DALI_FAIL(make_string("Unexpected data type: ", in_masks.type().id())); | ||||||||||||
)); // NOLINT | ||||||||||||
} | ||||||||||||
|
||||||||||||
DALI_REGISTER_OPERATOR(segmentation__RandomMaskPixel, RandomMaskPixelCPU, CPU); | ||||||||||||
|
||||||||||||
} // namespace dali |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,7 +45,9 @@ class SearchableRLEMask { | |
* determine the mask values that are considered foreground | ||
*/ | ||
template <typename T, typename Predicate = is_positive> | ||
explicit SearchableRLEMask(span<const T> mask_view, Predicate &&is_foreground = {}) { | ||
void Init(span<const T> mask_view, Predicate &&is_foreground = {}) { | ||
groups_.clear(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would export this as a clear() method. |
||
count_ = 0; | ||
int64_t idx = 0; | ||
int64_t sz = mask_view.size(); | ||
while (idx < sz) { | ||
|
@@ -63,9 +65,10 @@ class SearchableRLEMask { | |
} | ||
|
||
template <typename T, typename Predicate = is_positive> | ||
explicit SearchableRLEMask(TensorView<StorageCPU, T> mask_view, Predicate &&is_foreground = {}) | ||
: SearchableRLEMask(span<const T>{mask_view.data, volume(mask_view.shape)}, | ||
std::forward<Predicate>(is_foreground)) {} | ||
void Init(TensorView<StorageCPU, T> mask_view, Predicate &&is_foreground = {}){ | ||
Init(span<const T>{mask_view.data, volume(mask_view.shape)}, | ||
std::forward<Predicate>(is_foreground)); | ||
} | ||
|
||
/** | ||
* @brief Returns the position of the i-th foreground pixel. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import numpy as np | ||
import nvidia.dali as dali | ||
import nvidia.dali.fn as fn | ||
import nvidia.dali.types as types | ||
from test_utils import check_batch, dali_type | ||
import random | ||
from segmentation_test_utils import make_batch_select_masks | ||
from nose.tools import assert_raises | ||
|
||
np.random.seed(4321) | ||
|
||
def check_random_mask_pixel(ndim=2, batch_size=3, | ||
min_extent=20, max_extent=50): | ||
pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) | ||
with pipe: | ||
# Input mask | ||
in_shape_dims = [fn.cast(fn.uniform(range=(min_extent, max_extent + 1), shape=(1,), device='cpu'), | ||
dtype=types.INT32) for d in range(ndim)] | ||
in_shape = fn.cat(*in_shape_dims, axis=0) | ||
in_mask = fn.cast(fn.uniform(range=(0, 2), device='cpu', shape=in_shape), dtype=types.INT32) | ||
|
||
fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # > 0 | ||
fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # > 0.99 | ||
fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) # == 2 | ||
rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) | ||
coin_flip = fn.coin_flip(probability=0.7) | ||
fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) | ||
|
||
# Demo purposes: Taking a random pixel and produce a valid anchor to feed slice | ||
crop_shape = in_shape - 2 # We want to force the center adjustment, therefore the large crop shape | ||
anchor = fg_pixel1 - crop_shape // 2 | ||
anchor = min(max(0, anchor), in_shape - crop_shape) | ||
Comment on lines
+30
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it is up to the user to make sure that window is inside the image. Maybe it could be mentioned in the documentation - that it is possible and how to deal with that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now this operator is not advertised to be used for cropping. It's a general purpose pixel selector. Do you think we should add this suggestion of usage in the operator documentation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe not if we are not advertising it that way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we could add an example using this operator to calculate a crop center, maybe in one of the existing notebooks |
||
out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) | ||
|
||
pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, | ||
anchor, crop_shape, out_mask) | ||
pipe.build() | ||
for iter in range(3): | ||
outputs = pipe.run() | ||
for idx in range(batch_size): | ||
in_mask = outputs[0].at(idx) | ||
fg_pixel1 = outputs[1].at(idx).tolist() | ||
fg_pixel2 = outputs[2].at(idx).tolist() | ||
fg_pixel3 = outputs[3].at(idx).tolist() | ||
rnd_pixel = outputs[4].at(idx).tolist() | ||
coin_flip = outputs[5].at(idx).tolist() | ||
fg_biased = outputs[6].at(idx).tolist() | ||
anchor = outputs[7].at(idx).tolist() | ||
crop_shape = outputs[8].at(idx).tolist() | ||
out_mask = outputs[9].at(idx) | ||
|
||
assert in_mask[tuple(fg_pixel1)] > 0 | ||
assert in_mask[tuple(fg_pixel2)] > 0.99 | ||
print(in_mask[tuple(fg_pixel3)]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that print required? |
||
assert in_mask[tuple(fg_pixel3)] == 2 | ||
assert in_mask[tuple(fg_biased)] > 0 or not coin_flip | ||
|
||
for d in range(ndim): | ||
assert 0 <= anchor[d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] | ||
assert out_mask.shape == tuple(crop_shape) | ||
|
||
def test_random_mask_pixel(): | ||
for ndim in (2, 3): | ||
yield check_random_mask_pixel, ndim |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.