<a href="https://colab.research.google.com/github/NeuroLife77/short_code_optimization_experiments/blob/main/envelope_band_pass_runtime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Testing the runtime of computing the envelope of a band-passed signal using scipy vs pytorch

If we have N signals of length M then:

* Conclusion for band-pass operation:

 Scipy is a better choice for N=1 and M < 1e4, it might still be a better option (marginally) for N < 3 and M < 1e3 if we compare it with pytorch on CPU but not GPU. This is likely due to scipy convolve not allowing vectorized execution (forcing a python loop).

* Conclusion for hilbert transform operation:

 Scipy is a better choice for N=1 and M < 8e5. It is still a better choice for N < 2 and M < 1e4. The signal size at which it remains faster with increasing N decreases quickly with it being faster at M < 3e2 with N < 250, but after that pytorch is a better option, even the CPU version.


 * Conclusion for both operations together:

 Scipy is a better choice for N = 1 and M < 6e2 and  N < 1 and M < 3e2 otherwise pytorch is a better choice. As N increases even the pytorch CPU version gets much faster.






In [6]:
from scipy.signal import firwin
from math import ceil
from numpy import pi as npi
import numpy as np
import torch
def get_fir_window(band, ripple, width,  fs, scale=True, **kwargs):
    """
    Shamelessly stolen from
    https://github.com/scipy/scipy/blob/v0.14.0/scipy/signal/fir_filter_design.py#L85
    and
    https://github.com/scipy/scipy/blob/v0.14.0/scipy/signal/fir_filter_design.py#L29
    """
    width_norm = width/(0.5*fs)
    a = abs(ripple)
    if a < 8:
        # Formula for N is not valid in this range.
        raise ValueError("Requested maximum ripple attentuation %f is too "
                            "small for the Kaiser formula." % a)
    if a > 50:
        beta = 0.1102 * (a - 8.7)
    elif a > 21:
        beta = 0.5842 * (a - 21) ** 0.4 + 0.07886 * (a - 21)
    else:
        beta = 0.0
    beta = round(beta,4)
    numtaps = (a - 7.95) / 2.285 / (npi * width_norm) + 1
    ntaps = int(ceil(numtaps))
    ntaps = ntaps + (1-ntaps%2)
    centered_band = [1e-5,fs//2 - 1]
    if band[0] is not None:
        centered_band[0] = band[0]-width/2
    if band[1] is not None:
        centered_band[1] = band[1]+width/2
    return firwin(ntaps, centered_band, window=("kaiser", beta), scale = scale, pass_zero=False,fs=fs,**kwargs)

def ensure_torch(x, type_float = False):
    try:
        x = torch.as_tensor(x)
        if type_float:
            x = x.float()
    except:
        try:
            x = torch.from_numpy(x)
        except:
            pass
    if type_float:
        try:
            x = x.float()
        except:
            pass
    return x

def ensure_numpy(x,**kwargs):

    try:
        x = x.detach()
    except:
        pass

    try:
        x = x.to('cpu')
    except:
        pass

    try:
        x = x.numpy()
    except:
        pass

    return x


In [38]:
from scipy.signal import convolve as spconvolve
from torchaudio.functional import convolve as tconvolve
from torchaudio.functional import fftconvolve as tfftconvolve
def band_pass_torchaudio(signal,band,ripple,width,fs, return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 1, return_signal_mask = True):

    win = ensure_torch(get_fir_window(band,ripple,width=width, fs = fs))
    if convolve_type == "fft" or (convolve_type == "auto" and len(win)*signal.shape[-1]>1e5 and abs(len(win)-signal.shape[-1])>1e2):
        convolve = tfftconvolve
    else:
        convolve = tconvolve

    win = win.to(device)

    E = win[win.shape[-1]//2:] ** 2
    E = E.cumsum(dim=-1)
    E = E / E.amax()
    iE99 = ((E-0.99).abs().argmin() / fs).item()
    edge_percent = 2*iE99 / (signal.shape[-1]/fs)
    if edge_percent>0.1:
        if verbose > 0:
            print(f"Start up and end transients represent {round(edge_percent*100,2)}% of your data.")

    pad_size = win.shape[0]//2
    mean_centered_signal = signal
    if len(mean_centered_signal.shape)<2:
        mean_centered_signal = mean_centered_signal.unsqueeze(0)
    signal_size = mean_centered_signal.shape[-1]
    mean_signal = mean_centered_signal.mean(-1, keepdims=True)
    mean_centered_signal = mean_centered_signal - mean_signal
    padded_signal = torch.cat([torch.zeros(*mean_centered_signal.shape[:-1],pad_size, device=device),mean_centered_signal,torch.zeros(*mean_centered_signal.shape[:-1],pad_size, device=device)], dim = -1).to(device)
    #print(padded_signal.shape)
    if len(padded_signal.shape)<2:
        padded_signal = padded_signal.unsqueeze(0)
    if return_pad is not None:
        if return_pad > 1:
            return_pad = 1/return_pad
        signal_start = 2*int(pad_size)
        remainin_pad_size = int(pad_size*return_pad)
        remaining_margin_start = signal_start-remainin_pad_size
        remaining_margin_end = signal_start + signal_size + remainin_pad_size
    else:
        signal_start = 2*int(pad_size)
        remainin_pad_size = 2*int(pad_size)
        remaining_margin_start = None
        remaining_margin_end = None
    win = win.view(*[1 for _ in range(len(padded_signal.shape[:-1]))],-1)
    filtered_signal = convolve(padded_signal,win, mode = "full")[...,remaining_margin_start:remaining_margin_end]
    signal_end = signal_size+remainin_pad_size
    signal_mask = torch.arange(remainin_pad_size,signal_end, dtype=int, device=device)
    if band[-1] is None:
        filtered_signal[...,signal_mask] = filtered_signal[...,signal_mask] + mean_signal
    if len(signal.shape)<2:
        filtered_signal = filtered_signal
    return filtered_signal.cpu(), signal_mask.cpu()

from torch.fft import fft, ifft, fftfreq
def get_hilbert_torch(signal, fs, device="cpu", use_pow2 = False, signal_mask = None):

    fft_fn = fft
    ifft_fn = ifft
    fftfreq_fn = fftfreq
    signal_size = signal.shape[-1]
    if use_pow2:
        dist_from_pow = (torch.log2(torch.tensor(signal_size)).ceil().exp2() - signal_size).detach().item()
        pad_start = int(dist_from_pow//2)
        pad_start_odd = int((dist_from_pow/2)>0)
        signal = torch.cat([torch.zeros(*signal.shape[:-1],pad_start, device=device),signal,torch.zeros(*signal.shape[:-1],pad_start+pad_start_odd, device=device)], dim = -1).to(device)
    else:
        pad_start = 0

    freqs = fftfreq_fn(signal.shape[-1],d=1/fs)
    signal_fft = fft_fn(signal)
    signal_fft[...,freqs<0] = 0
    signal_fft[...,freqs>0] = signal_fft[...,freqs>0]*2

    analytical_signal = ifft_fn(signal_fft)[...,pad_start:signal_size]
    if signal_mask is not None:
        analytical_signal = analytical_signal[...,signal_mask]

    return analytical_signal.cpu()


In [23]:
from scipy.fft import fft as spfft
from scipy.fft import ifft as spifft
from scipy.fft import fftfreq as spfftfreq

def band_pass_scipy(signal,band,ripple,width,fs, return_pad = 0.2, verbose = 0, axis = -1):
    if len(signal.shape) == 1:
        return band_pass_single(signal,band,ripple,width,fs,return_pad = return_pad, verbose = verbose)
    band_passed_signals = []
    band_passed_signals_masks = []
    temp_signal = signal
    if axis != -1:
        temp_signal = np.swapaxes(signal,axis, -1)
    for signal_index in range(temp_signal.shape[0]):
        signal_element = temp_signal[signal_index]
        if len(signal_element.shape)>1:
            signal_bandpassed, signal_bandpass_mask = band_pass_scipy(signal_element,band,ripple,width,fs, return_pad = return_pad, verbose = verbose, axis = -1)
        else:
            signal_bandpassed, signal_bandpass_mask = band_pass_single(signal_element,band,ripple,width,fs,return_pad = return_pad, verbose = verbose)
        signal_bandpassed = signal_bandpassed[None,...]
        signal_bandpass_mask = signal_bandpass_mask[None,...]
        band_passed_signals.append(signal_bandpassed)
        band_passed_signals_masks.append(signal_bandpass_mask)
    band_passed_signals = np.concatenate(band_passed_signals, axis = 0)
    band_passed_signals_masks = np.concatenate(band_passed_signals_masks, axis = 0)
    if axis != -1:
        band_passed_signals = np.swapaxes(band_passed_signals,axis, -1)
        band_passed_signals_masks = np.swapaxes(band_passed_signals_masks,axis, -1)
    return band_passed_signals, band_passed_signals_masks


def band_pass_single(signal,band,ripple,width,fs, return_pad = 0.2, verbose = 1):
    win = get_fir_window(band,ripple,width=width, fs = fs)

    E = win[win.shape[-1]//2:] ** 2
    E = np.cumsum(E, axis=-1)
    E = E / E.max()
    iE99 = (np.abs(E-0.99).argmin() / fs).item()
    edge_percent = 2*iE99 / (signal.shape[-1]/fs)
    if edge_percent>0.1:
        if verbose > 0:
            print(f"Start up and end transients represent {round(edge_percent*100,2)}% of your data.")

    pad_size = win.shape[0]//2
    mean_centered_signal = signal
    if len(mean_centered_signal.shape)<2:
        mean_centered_signal = mean_centered_signal[None,...]
    signal_size = mean_centered_signal.shape[-1]
    mean_signal = mean_centered_signal.mean(-1, keepdims=True)
    mean_centered_signal = mean_centered_signal - mean_signal
    padded_signal = np.concatenate([np.zeros((*mean_centered_signal.shape[:-1],pad_size)),mean_centered_signal,np.zeros((*mean_centered_signal.shape[:-1],pad_size))], axis = -1)
    #print(padded_signal.shape)
    if len(padded_signal.shape)<2:
        padded_signal = padded_signal[None,...]
    if return_pad is not None:
        if return_pad > 1:
            return_pad = 1/return_pad
        signal_start = 2*int(pad_size)
        remainin_pad_size = int(pad_size*return_pad)
        remaining_margin_start = signal_start-remainin_pad_size
        remaining_margin_end = signal_start + signal_size + remainin_pad_size
    else:
        signal_start = 2*int(pad_size)
        remainin_pad_size = 2*int(pad_size)
        remaining_margin_start = None
        remaining_margin_end = None
    win = win.reshape((*[1 for _ in range(len(padded_signal.shape[:-1]))],-1))
    filtered_signal = spconvolve(padded_signal,win, mode = "full")[...,remaining_margin_start:remaining_margin_end]
    signal_end = signal_size+remainin_pad_size
    signal_mask = np.arange(remainin_pad_size,signal_end, dtype=int)
    if band[-1] is None:
        filtered_signal[...,signal_mask] = filtered_signal[...,signal_mask] + mean_signal
    if len(signal.shape)<2:
        filtered_signal = np.squeeze(filtered_signal)
    return filtered_signal, signal_mask


def get_hilbert_scipy(signal, fs, use_pow2 = False, signal_mask = None, return_envelope = False):
    signal_size = signal.shape[-1]
    if use_pow2:
        dist_from_pow = (np.exp2(np.ceil(np.log2(signal.shape[-1]))) - signal.shape[-1]).item()
        pad_start = int(dist_from_pow//2)
        pad_start_odd = int((dist_from_pow/2)>0)
        signal = np.concatenate([np.zeros((*signal.shape[:-1],pad_start)),signal,np.zeros((*signal.shape[:-1],pad_start+pad_start_odd))], axis = -1)
    else:
        pad_start = 0

    freqs = spfftfreq(signal.shape[-1],d=1/fs)
    signal_fft = spfft(signal)
    signal_fft[...,freqs<0] = 0
    signal_fft[...,freqs>0] = signal_fft[...,freqs>0]*2

    analytical_signal = spifft(signal_fft)[...,pad_start:signal_size]
    if signal_mask is not None:
        analytical_signal = analytical_signal[...,signal_mask]

    if return_envelope:
        return np.abs(analytical_signal)
    return analytical_signal

In [27]:
signal_size = int(1e4)
signal = np.random.normal(size = (signal_size,))
%timeit _ = band_pass_scipy(signal,[10,20],60,1,600, return_pad = 0.2, verbose = 0)
signal = ensure_torch(signal)
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 0, return_signal_mask = False)
signal = signal.to("cuda")
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cuda", verbose = 0, return_signal_mask = False)

1.42 ms ± 316 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.84 ms ± 603 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.54 ms ± 90.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
signal_size = int(1e4)
signal = np.random.normal(size = (2,signal_size))
%timeit _ = band_pass_scipy(signal,[10,20],60,1,600, return_pad = 0.2, verbose = 0)
signal = ensure_torch(signal)
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 0, return_signal_mask = False)
signal = signal.to("cuda")
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cuda", verbose = 0, return_signal_mask = False)

2.74 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.84 ms ± 494 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.61 ms ± 151 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [29]:
signal_size = int(3e2)
signal = np.random.normal(size = (2,signal_size))
%timeit _ = band_pass_scipy(signal,[10,20],60,1,600, return_pad = 0.2, verbose = 0)
signal = ensure_torch(signal)
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 0, return_signal_mask = False)
signal = signal.to("cuda")
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cuda", verbose = 0, return_signal_mask = False)

1.84 ms ± 170 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.05 ms ± 273 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.82 ms ± 145 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [31]:
signal_size = int(1e2)
signal = np.random.normal(size = (5,signal_size))
%timeit _ = band_pass_scipy(signal,[10,20],60,1,600, return_pad = 0.2, verbose = 0)
signal = ensure_torch(signal)
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 0, return_signal_mask = False)
signal = signal.to("cuda")
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cuda", verbose = 0, return_signal_mask = False)

4.4 ms ± 52.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.76 ms ± 178 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.78 ms ± 194 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [32]:
signal_size = int(1e4)
signal = np.random.normal(size = (25,signal_size))
%timeit _ = band_pass_scipy(signal,[10,20],60,1,600, return_pad = 0.2, verbose = 0)
signal = ensure_torch(signal)
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cpu", verbose = 0, return_signal_mask = False)
signal = signal.to("cuda")
%timeit _ = band_pass_torchaudio(signal,[10,20],60,1,600,return_pad = 0.2, convolve_type = "auto", device="cuda", verbose = 0, return_signal_mask = False)

31.8 ms ± 1.16 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
8.35 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.63 ms ± 369 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [34]:
def get_band_envelope_scipy(signal, fs = 600):
    signal_bandpassed, signal_bandpass_mask = band_pass_scipy(signal,[10,20],60,1,fs, return_pad = 0.2, verbose = 0)
    return get_hilbert_scipy(signal_bandpassed,fs, signal_mask=signal_bandpass_mask, return_envelope=True)

def get_band_envelope_torch(signal, fs = 600, device="cpu", use_pow2 = False):
    signal_bandpassed, signal_bandpass_mask = band_pass_torchaudio(signal,[10,20],60,1,fs, return_pad = 0.2, device=device,  convolve_type = "auto", verbose = 0, return_signal_mask = True)
    return get_hilbert_torch(signal_bandpassed, fs, device=device, use_pow2 = use_pow2, signal_mask = signal_bandpass_mask).abs()



In [55]:
signal_size = int(3e2)
signal = np.random.normal(size = (signal_size,))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
%timeit _ = get_hilbert_torch(signal, 600, use_pow2 = True).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()
%timeit _ = get_hilbert_torch(signal,600, device="cuda", use_pow2 = True).abs().cpu()


51.3 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
197 µs ± 39.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
344 µs ± 73.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
388 µs ± 14.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
643 µs ± 35.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [62]:
signal_size = int(8e4)
signal = np.random.normal(size = (signal_size,))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()


4.18 ms ± 35.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.13 ms ± 1.23 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.73 ms ± 96.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [68]:
signal_size = int(5e4)
signal = np.random.normal(size = (2,signal_size))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()


4.72 ms ± 143 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.51 ms ± 996 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.9 ms ± 370 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [70]:
signal_size = int(1e4)
signal = np.random.normal(size = (2,signal_size))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()


1.06 ms ± 199 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.18 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.25 ms ± 14.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [59]:
signal_size = int(3e2)
signal = np.random.normal(size = (250,signal_size))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()


2.06 ms ± 846 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.42 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.51 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [71]:
signal_size = int(6e2)
signal = np.random.normal(size = (250,signal_size))
%timeit _ = get_hilbert_scipy(signal, 600,  return_envelope=True)
signal = ensure_torch(signal)
%timeit _ = get_hilbert_torch(signal, 600).abs()
signal = signal.to("cuda")
%timeit _ = get_hilbert_torch(signal,600, device="cuda").abs().cpu()


4.49 ms ± 957 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.66 ms ± 100 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.46 ms ± 55.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [72]:
signal_size = int(3e2)
signal = np.random.normal(size = (signal_size,))
%timeit _ = get_band_envelope_scipy(signal)
signal = ensure_torch(signal)
%timeit _ = get_band_envelope_torch(signal)
signal = signal.to("cuda")
%timeit _ = get_band_envelope_torch(signal, device="cuda")


1.08 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.62 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.55 ms ± 218 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [82]:
signal_size = int(5e5)
signal = np.random.normal(size = (signal_size,))
%timeit _ = get_band_envelope_scipy(signal)
signal = ensure_torch(signal)
%timeit _ = get_band_envelope_torch(signal)
signal = signal.to("cuda")
%timeit _ = get_band_envelope_torch(signal, device="cuda")


132 ms ± 20.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
305 ms ± 18.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
104 ms ± 13.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [81]:
signal_size = int(3e2)
signal = np.random.normal(size = (2,signal_size))
%timeit _ = get_band_envelope_scipy(signal)
signal = ensure_torch(signal)
%timeit _ = get_band_envelope_torch(signal)
signal = signal.to("cuda")
%timeit _ = get_band_envelope_torch(signal, device="cuda")


2.03 ms ± 106 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.23 ms ± 255 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.4 ms ± 84.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [83]:
signal_size = int(3e2)
signal = np.random.normal(size = (3,signal_size))
%timeit _ = get_band_envelope_scipy(signal)
signal = ensure_torch(signal)
%timeit _ = get_band_envelope_torch(signal)
signal = signal.to("cuda")
%timeit _ = get_band_envelope_torch(signal, device="cuda")


3.01 ms ± 99.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.49 ms ± 119 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.47 ms ± 90.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [84]:
signal_size = int(3e3)
signal = np.random.normal(size = (100,signal_size))
%timeit _ = get_band_envelope_scipy(signal)
signal = ensure_torch(signal)
%timeit _ = get_band_envelope_torch(signal)
signal = signal.to("cuda")
%timeit _ = get_band_envelope_torch(signal, device="cuda")


421 ms ± 58 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
47.6 ms ± 2.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
33.3 ms ± 690 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
