Skip to content

Commit

Permalink
Coverity fixes: fix fn.normalize handling of batch of empty samples, …
Browse files Browse the repository at this point in the history
…fix broken assertion in copy_with_stride (#5223)

* Fix GPU fn.normalize crash on batch of empty samples
* Fix broken assertion in copy with stride (fn.python_function strided tensor copy kernel)
---------

Signed-off-by: Kamil Tokarski <ktokarski@nvidia.com>
  • Loading branch information
stiepan committed Dec 7, 2023
1 parent 40759f4 commit 3b53d18
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 7 deletions.
17 changes: 11 additions & 6 deletions dali/kernels/normalize/normalize_gpu_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ class NormalizeImplGPU {

template <typename Desc>
std::pair<dim3, dim3> GetLaunchParams(const TensorListShape<> &data_shape, int max_block) const {
assert(max_block > 0);
int optimum_block = std::is_same<Desc, Op_Scalar>::value ? 1024 : 256;
int64_t block = std::min(max_block, optimum_block);
int64_t max_size = 0;
Expand All @@ -432,7 +433,7 @@ class NormalizeImplGPU {
}
if (max_size < block)
block = max_size;
int max_blocks_per_sample = div_ceil(max_size, block);
int max_blocks_per_sample = max_size == 0 ? 0 : div_ceil(max_size, block);
dim3 grid(std::min(max_blocks_per_sample, std::max(32, 2048 / num_samples_)), num_samples_);
return { grid, dim3(block) };
}
Expand All @@ -448,8 +449,10 @@ class NormalizeImplGPU {
dim3 grid, block;
int max_block = MaxThreadsPerBlockStatic(NormalizeKernel<Desc>);
std::tie(grid, block) = GetLaunchParams<Desc>(in.shape, max_block);
NormalizeKernel<<<grid, block, 0, ctx.gpu.stream>>>(gpu_descs, global_scale, shift);
CUDA_CALL(cudaGetLastError());
if (grid.x > 0) {
NormalizeKernel<<<grid, block, 0, ctx.gpu.stream>>>(gpu_descs, global_scale, shift);
CUDA_CALL(cudaGetLastError());
}
}

template <typename Desc, typename BaseParam, typename ScaleParam>
Expand All @@ -463,9 +466,11 @@ class NormalizeImplGPU {
dim3 grid, block;
int max_block = MaxThreadsPerBlockStatic(NormalizeInvStdDevKernel<Desc>);
std::tie(grid, block) = GetLaunchParams<Desc>(in.shape, max_block);
NormalizeInvStdDevKernel<<<grid, block, 0, ctx.gpu.stream>>>(
gpu_descs, epsilon, global_scale, shift);
CUDA_CALL(cudaGetLastError());
if (grid.x > 0) {
NormalizeInvStdDevKernel<<<grid, block, 0, ctx.gpu.stream>>>(gpu_descs, epsilon, global_scale,
shift);
CUDA_CALL(cudaGetLastError());
}
}

std::string axes_str() const {
Expand Down
2 changes: 1 addition & 1 deletion dali/pipeline/util/copy_with_stride.cu
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void FillSampleAlignmentInfo(StridedCopyDesc &sample) {
assert(0 <= sample.aligned.skip_left && sample.aligned.skip_left < vec_len);
sample.aligned.skip_left = std::min<int64_t>(sample.size, sample.aligned.skip_left);
int64_t remaining_size = sample.size - sample.aligned.skip_left;
assert(0 <= remaining_size && remaining_size < sample.size);
assert(0 <= remaining_size && remaining_size <= sample.size);
sample.aligned.size = align_down(remaining_size, vec_len);
sample.aligned.skip_right = remaining_size - sample.aligned.size;
assert(0 <= sample.aligned.skip_right && sample.aligned.skip_right < vec_len);
Expand Down
16 changes: 16 additions & 0 deletions dali/test/python/operator_1/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import nvidia.dali.ops as ops
import numpy as np
from test_utils import dali_type
from nvidia.dali import fn, pipeline_def, types
from nose2.tools import params


def normalize(x, axes=None, mean=None, stddev=None, ddof=0, eps=0):
Expand Down Expand Up @@ -501,3 +503,17 @@ def test_types():
shift,
scale,
)


@params("cpu", "gpu")
def test_batch_of_empty_samples(device):
@pipeline_def
def pipeline():
empty_sample = types.Constant([])
if device == "gpu":
empty_sample = empty_sample.gpu()
return fn.normalize(empty_sample, mean=5, stddev=1)

p = pipeline(batch_size=4, device_id=0, num_threads=4)
p.build()
p.run()

0 comments on commit 3b53d18

Please sign in to comment.