### Numba Vs Numpy

*Notes:*
- Cells need to be ran twice to avoid jit compilation overhead (or call the functions in a cell above).

In [3]:
# Append rolling_ta to sys path.
import sys
import os

# Assuming rolling_ta is in the same level as tests/
module_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

# Add the path to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

In [6]:
from rolling_ta.env import NUMBA_DISK_CACHING
from rolling_ta.extras.numba import _prefix_sum, _mean
from rolling_ta.logging import logger
import numpy as np

from time import time

In [7]:
NUMBA_DISK_CACHING

True

In [11]:
sample_data = np.random.rand(1_000_000_000)
dummy_data = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14], dtype=np.float64)

In [9]:

# Precompile
_prefix_sum(dummy_data)
_mean(dummy_data)

2.0

In [14]:
%%timeit
window = np.roll(dummy_data, 1)

6.91 μs ± 224 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [6]:
%%timeit
np.empty(1_000_000)

10.1 μs ± 123 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [7]:
%%timeit
_empty(1_000_000, dtype=np.float64)

66.1 μs ± 8.9 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [9]:
%%timeit
np.zeros(1_000_000, dtype=np.float64)

8.35 μs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [10]:
%%timeit
sample_data.cumsum()

4.11 s ± 394 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
_prefix_sum(sample_data)

3.02 s ± 52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%%timeit
sample_data.mean()

1.36 s ± 27.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit
_mean(sample_data)

323 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
np.roll(sample_data, -1)

1.68 s ± 39.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
_shift(sample_data)

1.75 s ± 76.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
sample_data[1:] = sample_data[:-1]
sample_data[-1] = 0

958 ms ± 22.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
