CUB operators (#158)

NVIDIA · Mar 24, 2022 · 9e9c5c2 · 9e9c5c2
1 parent d1d5feb
commit 9e9c5c2
Show file tree

Hide file tree

Showing 18 changed files with 733 additions and 474 deletions.
diff --git a/bench/00_operators/reduction.cu b/bench/00_operators/reduction.cu
@@ -55,11 +55,11 @@ void reduce_0d_cub(nvbench::state &state, nvbench::type_list<ValueType>)
   auto xv2 = make_tensor<ValueType>();
   xv.PrefetchDevice(0);
 
-  cub_reduce<decltype(xv2), decltype(xv), CustomSum>(xv2, xv, 0.0f, 0);
+  sum(xv2, xv, 0);
 
   state.exec( 
     [&xv, &xv2](nvbench::launch &launch) {
-      cub_reduce<decltype(xv2), decltype(xv), CustomSum>(xv2, xv, 0.0f, (cudaStream_t)launch.get_stream());
+      sum(xv2, xv, (cudaStream_t)launch.get_stream());
     });
 
 }

diff --git a/examples/convolution.cu b/examples/convolution.cu
@@ -66,11 +66,11 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   using FilterType = float;
 
   // Create data objects
-  auto inView = make_static_tensor<InType, batches, numSamples>();
-  auto outView = make_static_tensor<OutType, batches, numSamples + filterLen - 1>();
-  auto solView = make_static_tensor<InType, batches, numSamples + filterLen - 1>();
+  auto inView = make_tensor<InType>({batches, numSamples});
+  auto outView = make_tensor<OutType>({batches, numSamples + filterLen - 1});
+  auto solView = make_tensor<InType>({batches, numSamples + filterLen - 1});
+  auto filterView = make_tensor<FilterType>({filterLen});
 
-  auto filterView = make_static_tensor<FilterType, filterLen >();
 
   // initialize input data
   for (index_t b = 0; b < batches; b++) {

diff --git a/examples/spectrogram.cu b/examples/spectrogram.cu
@@ -80,15 +80,15 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   std::array<index_t, 1> half_win{nfft / 2 + 1};
   std::array<index_t, 1> s_time_shape{(N - noverlap) / nstep};
 
-  auto time = make_static_tensor<float, N>();
-  auto modulation = make_static_tensor<float, N>();
-  auto carrier = make_static_tensor<float, N>();
-  auto noise = make_static_tensor<float, N>();
-  auto x = make_static_tensor<float, N>();
+  auto time = make_tensor<float>({N});
+  auto modulation = make_tensor<float>({N});
+  auto carrier = make_tensor<float>({N});
+  auto noise = make_tensor<float>({N});
+  auto x = make_tensor<float>({N});
 
-  auto freqs = make_static_tensor<float, nfft / 2 + 1>();
+  auto freqs = make_tensor<float>({nfft / 2 + 1});
   auto fftStackedMatrix = make_tensor<complex>({(N - noverlap) / nstep, nfft / 2 + 1});
-  auto s_time = make_static_tensor<float,(N - noverlap) / nstep>();
+  auto s_time = make_tensor<float>({(N - noverlap) / nstep});
 
   randomGenerator_t<float> randData({N}, 0);
   auto randDataView = randData.GetTensorView<1>(num_samps, NORMAL);