In [1]:
from rolling_ta.data import CSVLoader


import numpy as np
import numba as nb
from numba.types import Array, i4, f8

import pandas as pd



In [2]:
csv_loader = CSVLoader()
btc_data = csv_loader.read_resource()

DEBUG - CSVLoader: Loading from resources/btc_ohlcv.csv


In [3]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  10000 non-null  float64
 1   open       10000 non-null  float64
 2   high       10000 non-null  float64
 3   low        10000 non-null  float64
 4   close      10000 non-null  float64
 5   volume     10000 non-null  float64
dtypes: float64(6)
memory usage: 468.9 KB


## Native Python Test
*With pandas / numpy*

In [4]:
# Define function
def sma_native(data: pd.Series, period=14):
    n = data.shape[0]
    sma = list(0 for _ in range(n))
    
    for i in range(n):
        sma[i] = sum(data[i:i + period]) / period
        
    return sma
    

In [5]:
%%timeit
sma_native(btc_data["close"])

126 ms ± 2.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Pandas Vectorized Test

In [6]:
def sma_vectorized(data: pd.Series, period=14):
    return data.rolling(period, min_periods=period).sum()

In [7]:
%%timeit
sma_vectorized(btc_data["close"])

239 μs ± 6.46 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Numba Test
*Jesus take the wheel*

In [8]:
@nb.njit
def sma_numba(data: np.ndarray[f8], period: i4 = 14) -> np.ndarray[f8]:
    sma = np.zeros_like(data, dtype=np.float64)
    
    for i in range(sma.shape[0] - period):
        sma[i + period] = np.sum(data[i:i+period]) / period
    
    return sma

In [9]:
%%timeit
sma_numba(btc_data["close"].values)

DEBUG - bytecode dump:
>          0	NOP(arg=None, lineno=1)
           2	RESUME(arg=0, lineno=1)
           4	LOAD_GLOBAL(arg=0, lineno=3)
          14	LOAD_ATTR(arg=3, lineno=3)
          34	LOAD_FAST(arg=0, lineno=3)
          36	LOAD_GLOBAL(arg=0, lineno=3)
          46	LOAD_ATTR(arg=4, lineno=3)
          66	KW_NAMES(arg=1, lineno=3)
          68	CALL(arg=2, lineno=3)
          76	STORE_FAST(arg=2, lineno=3)
          78	LOAD_GLOBAL(arg=7, lineno=5)
          88	LOAD_FAST(arg=2, lineno=5)
          90	LOAD_ATTR(arg=8, lineno=5)
         110	LOAD_CONST(arg=2, lineno=5)
         112	BINARY_SUBSCR(arg=None, lineno=5)
         116	LOAD_FAST(arg=1, lineno=5)
         118	BINARY_OP(arg=10, lineno=5)
         122	CALL(arg=1, lineno=5)
         130	GET_ITER(arg=None, lineno=5)
>        132	FOR_ITER(arg=38, lineno=5)
         136	STORE_FAST(arg=3, lineno=5)
         138	LOAD_GLOBAL(arg=0, lineno=6)
         148	LOAD_ATTR(arg=11, lineno=6)
         168	LOAD_FAST(arg=0, lineno=6)
         170

71.2 μs ± 27.1 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
