Resampling decoder (#1582)

* Add unit tests for resampling functions. * Fix window function generator. * Fix multi-channel resampling. * Add chunked resampling. * Add output conversion to resampling. * Essentially rewrite the operator: * Add per-sample sampling rate * Add explicit downmixing option * Add multi-channel resampling * Rework the operator * Add configurable resampling quality. * Check sampling rates when resampling. * Add python test. Signed-off-by: Michal Zientkiewicz <michalz@nvidia.com> Co-authored-by: Michał Szołucha <mszolucha@nvidia.com>
NVIDIA · Dec 19, 2019 · ae6e0ee · ae6e0ee
1 parent 89c6bed
commit ae6e0ee
Show file tree

Hide file tree

Showing 12 changed files with 873 additions and 70 deletions.
diff --git a/dali/kernels/imgproc/resample/resampling_windows.h b/dali/kernels/imgproc/resample/resampling_windows.h
@@ -18,6 +18,7 @@
 #include <cuda_runtime.h>
 #include <functional>
 #include "dali/kernels/kernel.h"
+#include "dali/core/math_util.h"
 
 namespace dali {
 namespace kernels {
@@ -40,10 +41,6 @@ inline __host__ __device__ float RectangularWindow(float x) {
   return -0.5f <= x && x < 0.5f ? 1 : 0;
 }
 
-inline __host__ __device__ float sinc(float x) {
-  return x ? sinf(x * M_PI) / (x * M_PI) : 1;
-}
-
 inline __host__ __device__ float LanczosWindow(float x, float a) {
   if (fabsf(x) >= a)
     return 0.0f;

diff --git a/dali/kernels/signal/downmixing.h b/dali/kernels/signal/downmixing.h
@@ -0,0 +1,135 @@
+// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_KERNELS_SIGNAL_DOWNMIXING_H_
+#define DALI_KERNELS_SIGNAL_DOWNMIXING_H_
+
+#include <cassert>
+#include <vector>
+#include "dali/core/convert.h"
+#include "dali/core/small_vector.h"
+#include "dali/core/span.h"
+#include "dali/core/static_switch.h"
+
+namespace dali {
+namespace kernels {
+namespace signal {
+
+/**
+ * @brief Downmix interleaved signals to a single channel.
+ *
+ * @param out               output buffer (single channel)
+ * @param in                input buffer (interleaved multiple channels)
+ * @param num_samples       number of samples in each channel
+ * @param channels          number of input channels
+ * @param weights           weights used for downmixing
+ * @param normalize_weights if true, the weights are normalized so their sum is 1
+ * @tparam Out output sample type - if integral, the intermediate floating point representation
+ *         is stretched so that 0..1 or -1..1 range occupies the whole Out range.
+ * @tparam In input sample type - if integral, it's normalized to 0..1 or -1..1 range
+ * @tparam static_channels compile-time number of channels
+ *
+ * Downmix interleaved signals to a single channel, using the weights provided.
+ * If `normalize_weights` is true, the weights are copied into intermediate buffer
+ * and divided by their sum.
+ *
+ * @remarks The operation can be done in place if output and input are of the same type.
+ */
+template <int static_channels = -1, typename Out, typename In>
+void DownmixChannels(
+    Out *out, const In *in, int64_t samples, int channels,
+    const float *weights, bool normalize_weights = false) {
+  SmallVector<float, 8> normalized_weights;  // 8 channels should be enough for 7.1 audio
+  static_assert(static_channels != 0, "Number of channels cannot be zero."
+                                      "Use negative values to use run-time value");
+  int actual_channels = static_channels < 0 ? channels : static_channels;
+  assert(actual_channels == channels);
+  assert(actual_channels > 0);
+  if (normalize_weights) {
+    double sum = 0;
+    for (int i = 0; i < channels; i++)
+      sum += weights[i];
+    normalized_weights.resize(channels);
+    for (int i = 0; i < channels; i++) {
+      normalized_weights[i] = weights[i] / sum;
+    }
+    weights = normalized_weights.data();  // use this pointer now
+  }
+  for (int64_t o = 0, i = 0; o < samples; o++, i += channels) {
+    float sum = ConvertNorm<float>(in[i]) * weights[0];
+    for (int c = 1; c < channels; c++) {
+      sum += ConvertNorm<float>(in[i + c]) * weights[c];
+    }
+    out[o] = ConvertSatNorm<Out>(sum);
+  }
+}
+
+/**
+ * @brief Downmix data to a single channel.
+ *
+ * @param out               output buffer (single channel)
+ * @param in                input buffer (interleaved multiple channels)
+ * @param num_samples       number of samples in each channel
+ * @param channels          number of input channels
+ * @param weights           weights used for downmixing
+ * @param normalize_weights if true, the weights are normalized so their sum is 1
+ * @tparam Out output sample type - if integral, the intermediate floating point representation
+ *         is stretched so that 0..1 or -1..1 range occupies the whole Out range.
+ * @tparam In input sample type - if integral, it's normalized to 0..1 or -1..1 range
+ *
+ * Downmix interleaved signals to a single channel, using the weights provided.
+ * If `normalize_weights` is true, the weights are copied into intermediate buffer
+ * and divided by their sum.
+ *
+ * @remarks The operation can be done in place if output and input are of the same type.
+ */
+template <typename Out, typename In>
+void Downmix(
+    Out *out, const In *in, int64_t samples, int channels,
+    const float *weights, bool normalize_weights = false) {
+  VALUE_SWITCH(channels, static_channels, (1, 2, 3, 4, 5, 6, 7, 8),
+    (DownmixChannels<static_channels>(out, in, samples, static_channels,
+                                      weights, normalize_weights);),
+    (DownmixChannels(out, in, samples, channels, weights, normalize_weights);)
+  );  // NOLINT
+}
+
+template <typename Out, typename In>
+void Downmix(Out *out, const In *in, int64_t num_samples, int num_channels) {
+  SmallVector<float, 8> weights;
+  weights.resize(num_channels, 1.0f / num_channels);
+  Downmix(out, in, num_samples, num_channels, weights.data());
+}
+
+
+template <typename Out, typename In>
+void Downmix(span<Out> out, span<const In> in,
+             const std::vector<float> &weights, bool normalize_weights = false) {
+  int num_channels = weights.size();
+  assert(in.size() % num_channels == 0);
+  Downmix(out.data(), in.data(), in.size() / num_channels, weights, normalize_weights);
+}
+
+
+template <typename Out, typename In>
+void Downmix(span<Out> out, span<const In> in, int num_channels) {
+  assert(in.size() % num_channels == 0);
+  Downmix(out.data(), in.data(), in.size() / num_channels, num_channels);
+}
+
+}  // namespace signal
+}  // namespace kernels
+}  // namespace dali
+
+#endif  // DALI_KERNELS_SIGNAL_DOWNMIXING_H_
diff --git a/dali/kernels/signal/downmixing_test.cc b/dali/kernels/signal/downmixing_test.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <numeric>
+#include "dali/kernels/signal/downmixing.h"
+
+namespace dali {
+namespace kernels {
+namespace signal {
+
+TEST(SignalDownmixingTest, RawPointer_Weighted) {
+  std::vector<float> in = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  int nchannels = 3;
+  std::vector<float> weights = {3, 2, 1};
+  float sum = std::accumulate(weights.begin(), weights.end(), 0);
+  std::vector<float> ref = {
+     (1 * 3 +  2 * 2 +  3) / sum,
+     (4 * 3 +  5 * 2 +  6) / sum,
+     (7 * 3 +  8 * 2 +  9) / sum,
+    (10 * 3 + 11 * 2 + 12) / sum
+  };
+  std::vector<float> out;
+  out.resize(ref.size());
+
+  Downmix(out.data(), in.data(), in.size() / nchannels, nchannels, weights.data(), true);
+
+  for (size_t i = 0; i < ref.size(); i++) {
+    EXPECT_FLOAT_EQ(out[i], ref[i]);
+  }
+}
+
+TEST(SignalDownmixingTest, Span_DefaultWeights) {
+  std::vector<float> in = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  int nchannels = 3;
+  std::vector<float> ref = {2, 5, 8, 11};
+
+  Downmix(make_span(in), make_cspan(in), nchannels);
+
+  for (size_t i = 0; i < ref.size(); i++) {
+    EXPECT_FLOAT_EQ(in[i], ref[i]);
+  }
+}
+
+}  // namespace signal
+}  // namespace kernels
+}  // namespace dali