Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PowerSpectrum CPU operator #1460

Merged
merged 33 commits into from
Nov 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
85b89a3
[WIP] FFT CPU kernel impl
jantonguirao Oct 31, 2019
354c463
WIP
jantonguirao Oct 31, 2019
dfb9a5d
FFT CPU kernel impl (ongoing)
jantonguirao Nov 4, 2019
c0632dc
Complete FFT tests against naive DFT implementation. Lift constraint …
jantonguirao Nov 5, 2019
cecc7d4
Support different data layouts for FFT (transformation axis can now b…
jantonguirao Nov 6, 2019
5260ce3
Move FFT1D to kernels::signal
jantonguirao Nov 7, 2019
a04c990
Rework Get1DSlices
jantonguirao Nov 7, 2019
4074f0e
Add pimpl based implementation for FFT 1D CPU
jantonguirao Nov 7, 2019
b18eeda
Code review fixes
jantonguirao Nov 8, 2019
aa6ef1c
Using std::complex as OutputType in FFT kernel (when calculating comp…
jantonguirao Nov 8, 2019
f24ed28
Code review fixes
jantonguirao Nov 12, 2019
fe50a5f
Removing log power spectrum case
jantonguirao Nov 12, 2019
6d59748
Fix typo
jantonguirao Nov 12, 2019
b40ace7
Fix typo
jantonguirao Nov 13, 2019
4bbbf82
Code review fixes
jantonguirao Nov 14, 2019
f09f8db
Introduce ForAxis
jantonguirao Nov 14, 2019
2bb2839
Moving utils
jantonguirao Nov 14, 2019
7ff9bd5
Review fixes
jantonguirao Nov 14, 2019
b33ebf7
Fix lint
jantonguirao Nov 14, 2019
718dd94
Avoid leaking ffts symbols
jantonguirao Nov 15, 2019
6cef17a
[WIP] FFT operator
jantonguirao Nov 6, 2019
c805211
WIP FFT CPU operator implementation
jantonguirao Nov 7, 2019
9c7852b
Adjust to kernels::signal namespace
jantonguirao Nov 7, 2019
6e418e7
WIP
jantonguirao Nov 8, 2019
0ab471b
Add PowerSpectrum (with tests)
jantonguirao Nov 12, 2019
bbfa604
lint
jantonguirao Nov 15, 2019
45d5bb6
Fix previous rebase
jantonguirao Nov 20, 2019
d18d3f5
Move audio/fft to signal/fft
jantonguirao Nov 20, 2019
023f414
Add power as integer, add support for 1D input
jantonguirao Nov 20, 2019
eea5e0d
Minor fixes
jantonguirao Nov 20, 2019
2348acf
Code review fixes
jantonguirao Nov 25, 2019
1994b38
Merge remote-tracking branch 'upstream/master' into fft_op_impl
jantonguirao Nov 26, 2019
a21a95a
Apply fixes from code review
jantonguirao Nov 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dali/kernels/signal/fft/fft_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ void Fft1DCpu<OutputType, InputType, Dims>::Run(
impl_->Run(context, out, in, args);
}

// 1 Dim, typically input (time), producing output (frequency)
template class Fft1DCpu<std::complex<float>, float, 1>; // complex fft
template class Fft1DCpu<float, float, 1>; // magnitude

// 2 Dims, typically input (channels, time), producing output (channels, frequency)
template class Fft1DCpu<std::complex<float>, float, 2>; // complex fft
template class Fft1DCpu<float, float, 2>; // magnitude
Expand Down
5 changes: 5 additions & 0 deletions dali/kernels/signal/fft/fft_cpu_impl_ffts.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,14 @@ void Fft1DImplFfts<OutputType, InputType, Dims>::Run(
});
}

// 1 Dim, typically input (time), producing output (frequency)
template class Fft1DImplFfts<std::complex<float>, float, 1>; // complex fft
template class Fft1DImplFfts<float, float, 1>; // magnitude

// 2 Dims, typically input (channels, time), producing output (channels, frequency)
template class Fft1DImplFfts<std::complex<float>, float, 2>;
template class Fft1DImplFfts<float, float, 2>;

// 3 Dims, typically input (channels, frames, time), producing output (channels, frames, frequency)
template class Fft1DImplFfts<std::complex<float>, float, 3>;
template class Fft1DImplFfts<float, float, 3>;
Expand Down
1 change: 1 addition & 0 deletions dali/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_subdirectory(paste)
add_subdirectory(reader)
add_subdirectory(resize)
add_subdirectory(sequence)
add_subdirectory(signal)
add_subdirectory(support)
add_subdirectory(transpose)
add_subdirectory(util)
Expand Down
21 changes: 21 additions & 0 deletions dali/operators/signal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if (BUILD_FFTS)
add_subdirectory(fft)
endif()
szalpal marked this conversation as resolved.
Show resolved Hide resolved

collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
collect_test_sources(DALI_OPERATOR_TEST_SRCS PARENT_SCOPE)
17 changes: 17 additions & 0 deletions dali/operators/signal/fft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
collect_test_sources(DALI_OPERATOR_TEST_SRCS PARENT_SCOPE)
104 changes: 104 additions & 0 deletions dali/operators/signal/fft/power_spectrum.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dali/operators/signal/fft/power_spectrum.h"
#include "dali/core/static_switch.h"
#include "dali/kernels/signal/fft/fft_cpu.h"
#include "dali/pipeline/data/views.h"

#define FFT_SUPPORTED_NDIMS (1, 2, 3)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only these particular dims are supported? Is it kernel's restrictions?

Copy link
Contributor Author

@jantonguirao jantonguirao Nov 26, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is restricted at the kernels side. Also, this is what would (and a little beyond) make sense for PowerSpectrum. You are calculating the power spectrum on either a 1D signal, or 2D with the second dimension being the number of channels. 3 dims is actually quite unlikely but one might have the windows already extracted (or provided externally) and use the PowerSpectrum operator to generate the Spectrogram


static constexpr int kNumInputs = 1;
static constexpr int kNumOutputs = 1;

namespace dali {

DALI_SCHEMA(PowerSpectrum)
.DocStr(R"code(Power spectrum of signal.)code")
.NumInput(kNumInputs)
.NumOutput(kNumOutputs)
.AddOptionalArg("nfft",
R"code(Size of the FFT. By default nfft is selected to match the lenght of the data in the
transformation axis. The number of bins created in the output is `nfft // 2 + 1` (positive part
of the spectrum only).)code",
-1)
.AddOptionalArg("axis",
R"code(Index of the dimension to be transformed to the frequency domain. By default, the
last dimension is selected.)code",
-1)
.AddOptionalArg("power",
R"code(Exponent of the fft magnitude: Supported values are `2` for power spectrum
(`real*real + imag*imag`) and `1` for complex magnitude (`sqrt(real*real + imag*imag)`).)code",
2);

template <>
bool PowerSpectrum<CPUBackend>::SetupImpl(std::vector<OutputDesc> &output_desc,
const workspace_t<CPUBackend> &ws) {
output_desc.resize(kNumOutputs);
const auto &input = ws.InputRef<CPUBackend>(0);
auto &output = ws.OutputRef<CPUBackend>(0);
kernels::KernelContext ctx;
auto in_shape = input.shape();
int nsamples = input.size();
auto nthreads = ws.GetThreadPool().size();

// Other types not supported for now
using InputType = float;
using OutputType = float;
VALUE_SWITCH(in_shape.sample_dim(), Dims, FFT_SUPPORTED_NDIMS, (
using FftKernel = kernels::signal::fft::Fft1DCpu<OutputType, InputType, Dims>;
kmgr_.Initialize<FftKernel>();
kmgr_.Resize<FftKernel>(nthreads, nsamples);
output_desc[0].type = TypeInfo::Create<OutputType>();
output_desc[0].shape.resize(nsamples, Dims);
for (int i = 0; i < nsamples; i++) {
const auto in_view = view<const InputType, Dims>(input[i]);
auto &req = kmgr_.Setup<FftKernel>(i, ctx, in_view, fft_args_);
output_desc[0].shape.set_tensor_shape(i, req.output_shapes[0][0].shape);
}
), DALI_FAIL(make_string("Unsupported number of dimensions ", in_shape.size()))); // NOLINT

return true;
}

template <>
void PowerSpectrum<CPUBackend>::RunImpl(workspace_t<CPUBackend> &ws) {
const auto &input = ws.InputRef<CPUBackend>(0);
auto &output = ws.OutputRef<CPUBackend>(0);
auto in_shape = input.shape();
int nsamples = input.size();
auto& thread_pool = ws.GetThreadPool();
// Other types not supported for now
using InputType = float;
using OutputType = float;
VALUE_SWITCH(in_shape.sample_dim(), Dims, FFT_SUPPORTED_NDIMS, (
using FftKernel = kernels::signal::fft::Fft1DCpu<OutputType, InputType, Dims>;

for (int i = 0; i < input.shape().num_samples(); i++) {
thread_pool.DoWorkWithID(
[this, &input, &output, i](int thread_id) {
kernels::KernelContext ctx;
auto in_view = view<const InputType, Dims>(input[i]);
auto out_view = view<OutputType, Dims>(output[i]);
kmgr_.Run<FftKernel>(thread_id, i, ctx, out_view, in_view, fft_args_);
});
}
), DALI_FAIL(make_string("Not supported number of dimensions: ", in_shape.size()))); // NOLINT

thread_pool.WaitForWork();
}

DALI_REGISTER_OPERATOR(PowerSpectrum, PowerSpectrum<CPUBackend>, CPU);

} // namespace dali
63 changes: 63 additions & 0 deletions dali/operators/signal/fft/power_spectrum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_OPERATORS_SIGNAL_FFT_POWER_SPECTRUM_H_
#define DALI_OPERATORS_SIGNAL_FFT_POWER_SPECTRUM_H_

#include <string>
#include <vector>
#include "dali/core/common.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/kernels/signal/fft/fft_cpu.h"
#include "dali/pipeline/operator/common.h"
#include "dali/pipeline/operator/operator.h"

namespace dali {

template <typename Backend>
class PowerSpectrum : public Operator<Backend> {
public:
explicit PowerSpectrum(const OpSpec &spec)
: Operator<Backend>(spec) {
fft_args_.nfft = spec.GetArgument<int>("nfft");
fft_args_.transform_axis = spec.GetArgument<int>("axis");
int power = spec.GetArgument<int>("power");
szalpal marked this conversation as resolved.
Show resolved Hide resolved
switch (power) {
case 1:
fft_args_.spectrum_type = kernels::signal::fft::FFT_SPECTRUM_MAGNITUDE;
break;
case 2:
fft_args_.spectrum_type = kernels::signal::fft::FFT_SPECTRUM_POWER;
break;
default:
DALI_FAIL(make_string("Power argument should be either `2` for power spectrum or `1` "
"for complex magnitude. Received: ", power));
}
}

protected:
bool CanInferOutputs() const override { return true; }
bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<Backend> &ws) override;
void RunImpl(workspace_t<CPUBackend> &ws) override;

USE_OPERATOR_MEMBERS();
using Operator<Backend>::RunImpl;

kernels::KernelManager kmgr_;
kernels::signal::fft::FftArgs fft_args_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
kernels::signal::fft::FftArgs fft_args_;
const kernels::signal::fft::FftArgs fft_args_;

This would provide good sanity check, that the args are consistent through all the calls

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as you can see in the constructor, I need to set the members individually with a switch. I'd have to create temporaries instead if I marked this const. Doable but it'd decrease readability, I think

};

} // namespace dali

#endif // DALI_OPERATORS_SIGNAL_FFT_POWER_SPECTRUM_H_
106 changes: 106 additions & 0 deletions dali/test/python/test_operator_power_spectrum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import nvidia.dali as dali
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose
from functools import partial
from test_utils import check_batch
from test_utils import compare_pipelines
from test_utils import RandomDataIterator

class PowerSpectrumPipeline(Pipeline):
def __init__(self, device, batch_size, iterator, axis, nfft, num_threads=1, device_id=0):
super(PowerSpectrumPipeline, self).__init__(batch_size, num_threads, device_id)
self.device = device
self.iterator = iterator
self.inputs = ops.ExternalSource()
self.fft = ops.PowerSpectrum(device=self.device, axis=axis, nfft=nfft)

def define_graph(self):
self.data = self.inputs()
out = self.data.gpu() if self.device == 'gpu' else self.data
out = self.fft(out)
return out

def iter_setup(self):
data = self.iterator.next()
self.feed_input(self.data, data)

def power_spectrum_numpy(nfft, axis, waveform):
fft_out = np.fft.fft(waveform, axis=axis, n=nfft)
power_spectrum = fft_out.real ** 2 + fft_out.imag ** 2
shape = waveform.shape

out_shape = list(shape)
out_shape[axis] = nfft//2+1
out_shape = tuple(out_shape)

if len(out_shape) == 1:
out = power_spectrum[0:out_shape[0]]
elif len(out_shape) == 2:
out = power_spectrum[0:out_shape[0], 0:out_shape[1]]
elif len(out_shape) == 3:
out = power_spectrum[0:out_shape[0], 0:out_shape[1], 0:out_shape[2]]
return out

class PowerSpectrumNumpyPipeline(Pipeline):
def __init__(self, device, batch_size, iterator, axis, nfft,
num_threads=1, device_id=0):
super(PowerSpectrumNumpyPipeline, self).__init__(
batch_size, num_threads, device_id,
seed=12345, exec_async=False, exec_pipelined=False)
self.device = "cpu"
self.iterator = iterator
self.inputs = ops.ExternalSource()

function = partial(power_spectrum_numpy, nfft, axis)
self.power_spectrum = ops.PythonFunction(function=function)

def define_graph(self):
self.data = self.inputs()
out = self.power_spectrum(self.data)
return out

def iter_setup(self):
data = self.iterator.next()
self.feed_input(self.data, data)

def check_operator_power_spectrum(device, batch_size, input_shape, nfft, axis):
eii1 = RandomDataIterator(batch_size, shape=input_shape, dtype=np.float32)
eii2 = RandomDataIterator(batch_size, shape=input_shape, dtype=np.float32)
compare_pipelines(
PowerSpectrumPipeline(device, batch_size, iter(eii1), axis=axis, nfft=nfft),
PowerSpectrumNumpyPipeline(device, batch_size, iter(eii2), axis=axis, nfft=nfft),
batch_size=batch_size, N_iterations=5, eps=1e-04)

def test_operator_power_spectrum():
for device in ['cpu']:
for batch_size in [3]:
for nfft, axis, shape in [(16, 1, (2, 16)),
(1024, 1, (1, 1024)),
(1024, 0, (1024,)),
(128, 1, (1, 100)),
(128, 0, (100,)),
(16, 0, (16, 2)),
(8, 1, (2, 8, 2))]:
yield check_operator_power_spectrum, device, batch_size, shape, nfft, axis

if __name__ == "__main__":
check_operator_power_spectrum(device='cpu', batch_size=3, input_shape=(2, 1024),
nfft=1024, axis=1)
7 changes: 4 additions & 3 deletions dali/test/python/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,14 @@ def compare_pipelines(pipe1, pipe2, batch_size, N_iterations, eps = 1e-07):

class RandomDataIterator(object):
import_numpy()
def __init__(self, batch_size, shape=(10, 600, 800, 3)):
def __init__(self, batch_size, shape=(10, 600, 800, 3), dtype=np.uint8):
self.batch_size = batch_size
self.test_data = []
for _ in range(self.batch_size):
np.random.seed(0)
self.test_data.append(np.array(np.random.rand(*shape) * 255,
dtype = np.uint8 ) )
self.test_data.append(
np.array(np.random.rand(*shape) * (1.0 if dtype == np.float32 else 255),
dtype=dtype ) )

def __iter__(self):
self.i = 0
Expand Down