### Numba Vs Numpy

*Notes:*
- Cells need to be ran twice to avoid jit compilation overhead.

In [1]:
# Append rolling_ta to sys path.
import sys
import os

# Assuming rolling_ta is in the same level as tests/
module_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

# Add the path to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from rolling_ta.env import NUMBA_DISK_CACHING
from rolling_ta.extras.numba import _empty, _shift, _prefix_sum, _mean
from rolling_ta.logging import logger
import numpy as np

from time import time

In [3]:
NUMBA_DISK_CACHING

False

In [4]:
sample_data = np.random.rand(1_000_000_000)
dummy_data = np.array([1,2,3], dtype=np.float64)

In [5]:

# Precompile
_empty(1_000_000, dtype=np.float64)
_empty(1_000_000, 1_000_000, dtype=np.float64)
_prefix_sum(dummy_data)
_shift(dummy_data)
_mean(dummy_data)

DEBUG - bytecode dump:
>          0	NOP(arg=None, lineno=101)
           2	RESUME(arg=0, lineno=101)
           4	LOAD_GLOBAL(arg=1, lineno=112)
          14	LOAD_ATTR(arg=2, lineno=112)
          34	LOAD_FAST(arg=0, lineno=112)
          36	LOAD_FAST(arg=2, lineno=112)
          38	KW_NAMES(arg=1, lineno=112)
          40	CALL(arg=2, lineno=112)
          48	STORE_FAST(arg=3, lineno=112)
          50	LOAD_GLOBAL(arg=5, lineno=113)
          60	LOAD_ATTR(arg=6, lineno=113)
          80	LOAD_FAST(arg=1, lineno=113)
          82	CALL(arg=1, lineno=113)
          90	GET_ITER(arg=None, lineno=113)
>         92	FOR_ITER(arg=7, lineno=113)
          96	STORE_FAST(arg=4, lineno=113)
          98	LOAD_CONST(arg=2, lineno=114)
         100	LOAD_FAST(arg=3, lineno=114)
         102	LOAD_FAST(arg=4, lineno=114)
         104	STORE_SUBSCR(arg=None, lineno=114)
         108	JUMP_BACKWARD(arg=9, lineno=114)
>        110	END_FOR(arg=None, lineno=113)
         112	LOAD_FAST(arg=3, lineno=115)
         

2.0

In [6]:
%%timeit
np.empty(1_000_000)

10.1 μs ± 123 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [7]:
%%timeit
_empty(1_000_000, dtype=np.float64)

66.1 μs ± 8.9 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [9]:
%%timeit
np.zeros(1_000_000, dtype=np.float64)

8.35 μs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [10]:
%%timeit
sample_data.cumsum()

4.11 s ± 394 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
_prefix_sum(sample_data)

3.02 s ± 52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%%timeit
sample_data.mean()

1.36 s ± 27.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit
_mean(sample_data)

323 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
np.roll(sample_data, -1)

1.68 s ± 39.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
_shift(sample_data)

1.75 s ± 76.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
