Rename AddWork to AddTask.

Signed-off-by: Michal Zientkiewicz <michalz@nvidia.com>
NVIDIA · Jun 13, 2023 · 466d38d · 466d38d
1 parent 200eb97
commit 466d38d
Show file tree

Hide file tree

Showing 77 changed files with 202 additions and 195 deletions.
diff --git a/dali/benchmark/thread_pool_bench.cc b/dali/benchmark/thread_pool_bench.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ static void ThreadPoolArgs(benchmark::internal::Benchmark *b) {
   b->Args({batch_size, work_size_min, work_size_max, nthreads});
 }
 
-BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
+BENCHMARK_DEFINE_F(ThreadPoolBench, AddTask)(benchmark::State& st) {
   int batch_size = st.range(0);
   int work_size_min = st.range(1);
   int work_size_max = st.range(2);
@@ -40,7 +40,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
   while (st.KeepRunning()) {
     for (int i = 0; i < batch_size; i++) {
         auto size = this->RandInt(work_size_min, work_size_max);
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [&data, size, &total_count](int thread_id){
             std::vector<uint8_t> other_data;
             for (int i = 0; i < size; i++) {
@@ -59,13 +59,13 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
   std::cout << total_count << std::endl;
 }
 
-BENCHMARK_REGISTER_F(ThreadPoolBench, AddWork)->Iterations(1000)
+BENCHMARK_REGISTER_F(ThreadPoolBench, AddTask)->Iterations(1000)
 ->Unit(benchmark::kMicrosecond)
 ->UseRealTime()
 ->Apply(ThreadPoolArgs);
 
 
-BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
+BENCHMARK_DEFINE_F(ThreadPoolBench, AddTaskDeferred)(benchmark::State& st) {
   int batch_size = st.range(0);
   int work_size_min = st.range(1);
   int work_size_max = st.range(2);
@@ -78,7 +78,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
   while (st.KeepRunning()) {
     for (int i = 0; i < batch_size; i++) {
         auto size = this->RandInt(work_size_min, work_size_max);
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [&data, size, &total_count](int thread_id){
             std::vector<uint8_t> other_data;
             for (int i = 0; i < size; i++) {
@@ -98,7 +98,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
 }
 
 
-BENCHMARK_REGISTER_F(ThreadPoolBench, AddWorkDeferred)->Iterations(1000)
+BENCHMARK_REGISTER_F(ThreadPoolBench, AddTaskDeferred)->Iterations(1000)
 ->Unit(benchmark::kMicrosecond)
 ->UseRealTime()
 ->Apply(ThreadPoolArgs);

diff --git a/dali/core/exec/thread_pool_base.cc b/dali/core/exec/thread_pool_base.cc
@@ -125,7 +125,7 @@ void ThreadPoolBase::PopAndRunTask(std::unique_lock<std::mutex> &lock) {
   TaskFunc t = std::move(tasks_.front());
   tasks_.pop();
   lock.unlock();
-  t();
+  t(this_thread_idx());
   lock.lock();
 }
 

diff --git a/dali/core/exec/thread_pool_base_test.cc b/dali/core/exec/thread_pool_base_test.cc
@@ -20,9 +20,10 @@ namespace dali {
 
 struct SerialExecutor {
   template <typename Runnable>
-  std::enable_if_t<std::is_convertible_v<Runnable, std::function<void()>>>
+  std::enable_if_t<std::is_convertible_v<Runnable, std::function<void(int)>>>
   AddTask(Runnable &&runnable) {
-    runnable();
+    const int idx = 0;
+    runnable(idx);
   }
 };
 

diff --git a/dali/imgcodec/decoders/decoder_parallel_impl.h b/dali/imgcodec/decoders/decoder_parallel_impl.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl {
     ROI no_roi;
     for (int i = 0; i < in.size(); i++) {
       auto roi = rois.empty() ? no_roi : rois[i];
-      ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable {
+      ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable {
           try {
             promise.set(i, DecodeImplTask(tid, out, in, opts, roi));
           } catch (...) {
@@ -128,7 +128,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl {
     ROI no_roi;
     for (int i = 0; i < in.size(); i++) {
       auto roi = rois.empty() ? no_roi : rois[i];
-      ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable {
+      ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable {
           try {
             promise.set(i, DecodeImplTask(tid, ctx.stream, out, in, opts, roi));
           } catch (...) {

diff --git a/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc b/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc
@@ -99,7 +99,7 @@ void NvJpegLosslessDecoderInstance::Parse(DecodeResultsPromise &promise,
   DecodeResultsPromise parse_promise(nsamples);
   for (int i = 0; i < nsamples; i++) {
     int tid = 0;
-    ctx.tp->AddWork(
+    ctx.tp->AddTask(
         [&, i](int tid) {
           auto &jpeg_stream = per_thread_resources_[tid].jpeg_stream;
           auto *sample = in[i];

diff --git a/dali/imgcodec/future_decode_result_test.cc b/dali/imgcodec/future_decode_result_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ TEST(FutureDecodeResultsTest, WaitNew) {
 
   DecodeResultsPromise pro(3);
   auto fut = pro.get_future();
-  tp.AddWork([pro](int tidx) mutable {
+  tp.AddTask([pro](int tidx) mutable {
     std::this_thread::sleep_for(std::chrono::milliseconds(10));
     pro.set(1, DecodeResult::Success());
     pro.set(0, DecodeResult::Success());
@@ -59,7 +59,7 @@ TEST(FutureDecodeResultsTest, Benchmark) {
   auto start = std::chrono::high_resolution_clock::now();
   for (int iter = 0; iter < num_iter; iter++) {
     DecodeResultsPromise res(100);
-    tp.AddWork([&](int tidx) {
+    tp.AddTask([&](int tidx) {
       for (int i = 0; i < res.num_samples(); i++)
         res.set(i, DecodeResult::Success());
     }, 0, true);

diff --git a/dali/imgcodec/tools/imagemagick_test.cc b/dali/imgcodec/tools/imagemagick_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -205,7 +205,7 @@ void run(Env &env) {
 
   while (directory_it != fs::end(directory_it)) {
     auto batch = get_batch(env, directory_it);
-    pool.AddWork([=, &env](int tid){
+    pool.AddTask([=, &env](int tid){
       process(env, batch);
     });
   }

diff --git a/dali/kernels/common/scatter_gather.h b/dali/kernels/common/scatter_gather.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -170,7 +170,7 @@ class DLL_PUBLIC ScatterGatherCPU : public ScatterGatherBase {
     } else {
       MakeBlocks(exec_engine.NumThreads() * kTasksMultiplier);
       for (auto &r : blocks_) {
-        exec_engine.AddWork([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size);
+        exec_engine.AddTask([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size);
       }
       exec_engine.RunAll();
     }

diff --git a/dali/kernels/imgproc/structure/connected_components.h b/dali/kernels/imgproc/structure/connected_components.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -208,7 +208,7 @@ void LabelSlice(OutLabel *label_base,
   for (int64_t i = 0; i < n; i++) {
     auto out_slice = out.slice(i);
     auto in_slice = in.slice(i);
-    engine.AddWork([=, &seq_engn](int){
+    engine.AddTask([=, &seq_engn](int){
       LabelSlice(label_base, out_slice, in_slice, background, seq_engn);
     });
   }
@@ -220,7 +220,7 @@ void LabelSlice(OutLabel *label_base,
       auto in_slice = in.slice(i);
       auto prev_out = out.slice(i-1);
       auto prev_in = in.slice(i-1);
-      engine.AddWork([=](int){
+      engine.AddTask([=](int){
         MergeSlices(label_base, prev_out, out_slice, prev_in, in_slice);
       });
     }
@@ -313,7 +313,7 @@ int64_t CompactLabels(OutLabel *labels,
       int64_t chunk_start = volume * chunk / num_chunks;
       int64_t chunk_end = volume * (chunk + 1) / num_chunks;
 
-      engine.AddWork([=, &tmp_sets, &lock](int thread) {
+      engine.AddTask([=, &tmp_sets, &lock](int thread) {
         OutLabel prev = old_bg_label;
         OutLabel remapped = old_bg_label;
         for (int64_t i = chunk_start; i < chunk_end; i++) {
@@ -354,7 +354,7 @@ int64_t CompactLabels(OutLabel *labels,
   for (int chunk = 0; chunk < num_chunks; chunk++) {
     int64_t chunk_start = volume * chunk / num_chunks;
     int64_t chunk_end = volume * (chunk + 1) / num_chunks;
-    engine.AddWork([=, &label_map](int) {
+    engine.AddTask([=, &label_map](int) {
       RemapChunk(make_span(labels + chunk_start, chunk_end - chunk_start), label_map);
     });
   }

diff --git a/dali/kernels/imgproc/structure/label_bbox.h b/dali/kernels/imgproc/structure/label_bbox.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -245,7 +245,7 @@ void GetLabelBoundingBoxes(span<Box<ndim, Coord>> boxes,
       part_boxes = make_span(&tmp_boxes[(i - 1)*boxes.size()], boxes.size());
     part.size[max_d] = end - start;
     part.data = in.data + start * stride;
-    engine.AddWork([=](int) {
+    engine.AddTask([=](int) {
       i64vec<ndim> origin = {};
       origin[dim_mapping[max_d]] = start;
       GetLabelBoundingBoxes(part_boxes, part, dim_mapping, background, origin);

diff --git a/dali/kernels/slice/slice_cpu.h b/dali/kernels/slice/slice_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -313,7 +313,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
       int64_t b_start = prev_b_end;
       int64_t b_end = prev_b_end = total_sz * (b + 1) / nblocks;
       int64_t b_nbytes = (b_end - b_start) * sizeof(OutputType);
-      exec_engine.AddWork([=](int tid) {
+      exec_engine.AddTask([=](int tid) {
         std::memcpy(out_data + b_start, in_data + b_start, b_nbytes);
       }, b_nbytes, false);  // do not start work immediately
     }
@@ -325,7 +325,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
   int nblocks = split_shape(split_factor, out_shape, req_nblocks, min_blk_sz, skip_dim_mask);
 
   if (nblocks == 1) {
-    exec_engine.AddWork([=](int) {
+    exec_engine.AddTask([=](int) {
       SliceKernel(out_data, in_data, out_strides, in_strides, out_shape, in_shape,
                   args.anchor, GetPtr<OutputType>(args.fill_values), args.channel_dim);
     }, kSliceCost * volume(out_shape), false);  // do not start work immediately
@@ -345,7 +345,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
         blk_shape[d] = blk_end[d] - blk_start[d];
         blk_anchor[d] = args.anchor[d] + blk_start[d];
       }
-      exec_engine.AddWork([=](int) {
+      exec_engine.AddTask([=](int) {
         SliceKernel(output_ptr, in_data, out_strides, in_strides, blk_shape, in_shape,
                     blk_anchor, GetPtr<OutputType>(args.fill_values), args.channel_dim);
       }, kSliceCost * volume(blk_shape), false);  // do not start work immediately

diff --git a/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h b/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -260,7 +260,7 @@ void SliceFlipNormalizePermutePadKernel(
                             req_nblocks > 0 ? req_nblocks : exec_engine.NumThreads() * 8,
                             min_blk_sz, skip_dim_mask);
   if (nblocks == 1) {
-    exec_engine.AddWork([=](int) {
+    exec_engine.AddTask([=](int) {
       SliceFlipNormalizePermutePadKernel(output, input, args.in_strides, args.out_strides,
                                          args.anchor, args.in_shape, args.out_shape,
                                          GetPtr<OutputType>(fill_values),
@@ -288,7 +288,7 @@ void SliceFlipNormalizePermutePadKernel(
         blk_anchor[d] = args.anchor[d] + blk_start[d];
       }
 
-      exec_engine.AddWork([=](int) {
+      exec_engine.AddTask([=](int) {
         SliceFlipNormalizePermutePadKernel(output_ptr, input_ptr, args.in_strides, args.out_strides,
                                            blk_anchor, args.in_shape, blk_shape,
                                            GetPtr<OutputType>(fill_values),

diff --git a/dali/operators/audio/mel_scale/mel_filter_bank.cc b/dali/operators/audio/mel_scale/mel_filter_bank.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ void MelFilterBank<CPUBackend>::RunImpl(Workspace &ws) {
   TYPE_SWITCH(input.type(), type2id, T, MEL_FBANK_SUPPORTED_TYPES, (
     using MelFilterBankKernel = kernels::audio::MelFilterBankCpu<T>;
     for (int i = 0; i < input.shape().num_samples(); i++) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [this, &input, &output, i](int thread_id) {
           auto in_view = view<const T>(input[i]);
           auto out_view = view<T>(output[i]);

diff --git a/dali/operators/audio/mfcc/mfcc.cc b/dali/operators/audio/mfcc/mfcc.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -160,7 +160,7 @@ void MFCC<CPUBackend>::RunImpl(Workspace &ws) {
     VALUE_SWITCH(in_shape.sample_dim(), Dims, MFCC_SUPPORTED_NDIMS, (
       using DctKernel = kernels::signal::dct::Dct1DCpu<T, T, Dims>;
       for (int i = 0; i < input.shape().num_samples(); i++) {
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [this, &input, &output, i](int thread_id) {
             kernels::KernelContext ctx;
             auto in_view = view<const T, Dims>(input[i]);

diff --git a/dali/operators/audio/nonsilence_op.cc b/dali/operators/audio/nonsilence_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -102,7 +102,7 @@ class NonsilenceOperatorCpu : public NonsilenceOperator<CPUBackend> {
     auto &tp = ws.GetThreadPool();
     auto in_shape = input.shape();
     for (int sample_id = 0; sample_id < curr_batch_size; sample_id++) {
-      tp.AddWork(
+      tp.AddTask(
               [&, sample_id](int thread_id) {
                   detail::Args<InputType> args;
                   args.input = view<const InputType, 1>(input[sample_id]);

diff --git a/dali/operators/audio/preemphasis_filter_op.cc b/dali/operators/audio/preemphasis_filter_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -63,7 +63,7 @@ void PreemphasisFilterCPU::RunImplTyped(Workspace &ws) {
   auto nsamples = shape.num_samples();
 
   for (int sample_id = 0; sample_id < nsamples; sample_id++) {
-    tp.AddWork(
+    tp.AddTask(
       [this, &output, &input, sample_id](int thread_id) {
         const auto *in_ptr = input.tensor<InputType>(sample_id);
         auto *out_ptr = output.mutable_tensor<OutputType>(sample_id);

diff --git a/dali/operators/audio/resample.cc b/dali/operators/audio/resample.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -136,7 +136,7 @@ class ResampleCPU : public ResampleBase<CPUBackend> {
     auto &tp = ws.GetThreadPool();
     in_fp32.resize(tp.NumThreads());
     for (int s = 0; s < N; s++) {
-      tp.AddWork([&, this, s](int thread_idx) {
+      tp.AddTask([&, this, s](int thread_idx) {
         InTensorCPU<float> in_view;
         TYPE_SWITCH(in.type(), type2id, T, (AUDIO_RESAMPLE_TYPES),
           (in_view = ConvertInput(in_fp32[thread_idx], view<const T>(in[s]));),

diff --git a/dali/operators/bbox/bb_flip.cc b/dali/operators/bbox/bb_flip.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ void BbFlipCPU::RunImpl(Workspace &ws) {
   TensorLayout layout = ltrb_ ? "xyXY" : "xyWH";
 
   for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
-    tp.AddWork(
+    tp.AddTask(
       [&, sample_idx](int thread_id) {
         bool vertical = vert_[sample_idx].data[0];
         bool horizontal = horz_[sample_idx].data[0];

diff --git a/dali/operators/decoder/audio/audio_decoder_op.cc b/dali/operators/decoder/audio/audio_decoder_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -148,7 +148,7 @@ void AudioDecoderCpu::DecodeBatch(Workspace &ws) {
   scratch_resampler_.resize(tp.NumThreads());
 
   for (int i = 0; i < batch_size; i++) {
-    tp.AddWork([&, i](int thread_id) {
+    tp.AddTask([&, i](int thread_id) {
       try {
         DecodeSample<OutputType>(decoded_output[i], thread_id, i);
         sample_rate_output[i].data[0] = use_resampling_