[Reference](https://towardsdatascience.com/this-decorator-will-make-python-30-times-faster-715ca5a66d5f)

In [1]:
pip install numba

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import random

# import njit from numba
from numba import njit


def monte_carlo_pi_without_numba(nsamples):
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x ** 2 + y ** 2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

# Add numba's decorator to make the function faster
@njit
def monte_carlo_pi_with_numba(nsamples):
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x ** 2 + y ** 2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

In [3]:
%timeit monte_carlo_pi_with_numba(100_000)
# 1.24 ms ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit monte_carlo_pi_without_numba(100_000)
# 40.6 ms ± 814 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

1.24 ms ± 14.8 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
44.2 ms ± 5.51 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
import numpy as np
import pandas as pd

n = 1_000_000

df = pd.DataFrame({
    'height': 1 + 1.3 * np.random.random(n),
    'weight': 40 + 260 * np.random.random(n),
    'hip_circumference': 94 + 14 * np.random.random(n)
})

In [5]:
from numba import vectorize

def get_squared_height_without_numba(height):
  return height ** 2

@vectorize
def get_squared_height_with_numba(height):
  return height ** 2


%timeit df['height'].apply(get_squared_height_without_numba)
# 279 ms ± 7.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


%timeit df['height'] ** 2
# 2.04 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

# We convert the column to a numpy array first
# since numba is compatible with numpy ( not pandas )
%timeit get_squared_height_with_numba(df['height'].to_numpy())
# 1.6 ms ± 51.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

521 ms ± 203 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.44 ms ± 626 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.44 ms ± 325 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
from numba import njit

@njit
def get_bmi(weight_col, height_col):
  n = len(weight_col)
  result = np.empty(n, dtype="float64")

  # numba's loops are very fast compared to python loops
  for i, (weight, height) in enumerate(zip(weight_col, height_col)):
    result[i] = weight / (height ** 2)

  return result


# don't forget to convert columns to numpy 
%timeit df['bmi'] = get_bmi(df['weight'].to_numpy(), df['height'].to_numpy())
# 6.77 ms ± 230 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

%timeit df['bmi'] = df['weight']  / (df['height'] ** 2)
# 8.63 ms ± 316 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

The slowest run took 5.26 times longer than the fastest. This could mean that an intermediate result is being cached.
14.7 ms ± 9.93 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
9.24 ms ± 1.72 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
