# comparing different approaches to `Distributions.py` with `numba`

In [62]:
import distcan as dc

from numba import jitclass, jit, vectorize
from numba import float32

import numpy as np
import timeit

import _rmath_ffi
from numba import cffi_support

cffi_support.register_module(_rmath_ffi)

## baseline `distcan` package

In [63]:
x = 2
y = np.linspace(-2,2,20)

In [64]:
N_dc = dc.Normal(0,1)

print("distcan -- pdf evaluated at one point:")
%timeit N_dc.pdf(x)

print("distcan -- pdf evaluated in a vector:")
%timeit N_dc.pdf(y)

distcan -- pdf evaluated at one point:
1000 loops, best of 3: 262 µs per loop
distcan -- pdf evaluated in a vector:
10000 loops, best of 3: 250 µs per loop


## top level `@jit` and `@vectorize` with `rmath` functions

In [65]:
dnorm = _rmath_ffi.lib.dnorm

In [66]:
@jit(nopython=True)
def pdf_jit(mu, sigma, x):
    """The pdf value(s) evaluated at x."""
    return dnorm(x, mu, sigma, 0)

print("pure @jitted pdf evaluated at one point:")
%timeit pdf_jit(0, 1, x)

pure @jitted pdf evaluated at one point:
The slowest run took 89706.90 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 497 ns per loop


In [43]:
@vectorize(nopython=True)
def pdf_vect(mu, sigma, x):
    """The pdf value(s) evaluated at x."""
    return dnorm(x, mu, sigma, 0)

print("pure @vectorized pdf evaluated at a vector:")
%timeit pdf_vect(0, 1, x)

pure jitted pdf evaluated at a vector:
The slowest run took 22637.88 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 12.2 µs per loop




## `@jitclass`

In [67]:
spec = [
    ('mu', float32),
    ('sigma', float32),
]

@jitclass(spec)
class Normal_jitclass():
    """
    The Normal distribution with mean mu and standard deviation sigma.

    Parameters
    ----------
    mu : scalar(float)
        Mean of the normal distribution
    sigma : scalar(float)
        Standard deviaton of the normal distribution
    """

    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma

    def pdf(self, x):
        """The pdf value(s) evaluated at x."""
        return dnorm(x, self.mu, self.sigma, 0)
    
    def pdf_v(self, x):
        """The pdf value(s) evaluated at x."""
        y = np.empty_like(x)
        for k in range(x.shape[0]):
            y[k] = dnorm(x[k], self.mu, self.sigma, 0)
         
        return y

N_jc = Normal_jitclass(0,1)
print("@jitclass pdf evaluated at one point:")
%timeit N_jc.pdf(x)
print("@jitclass pdf evaluated in a vector:")
%timeit N_jc.pdf_v(y)

@jitclass pdf evaluated at one point:
The slowest run took 37356.11 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.41 µs per loop
@jitclass pdf evaluated in a vector:
The slowest run took 39021.84 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 6.37 µs per loop


## `@jitclass` calling top level `rmath`

In [68]:
spec = [
    ('mu', float32),
    ('sigma', float32),
]

@jitclass(spec)
class Normal_jitclass_top():
    """
    The Normal distribution with mean mu and standard deviation sigma.

    Parameters
    ----------
    mu : scalar(float)
        Mean of the normal distribution
    sigma : scalar(float)
        Standard deviaton of the normal distribution
    """

    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma

    def pdf(self, x):
        """The pdf value(s) evaluated at x."""
        return pdf_jit(self.mu, self.sigma, x)
    
    def pdf_v(self, x):
        """The pdf value(s) evaluated at x."""
        return pdf_vect(self.mu, self.sigma, x)

N_jc_top = Normal_jitclass_top(0,1)
print("@jitclass calling top level -- pdf evaluated at one point:")
%timeit N_jc_top.pdf(x)
print("@jitclass calling top level -- pdf evaluated in a vector:")
%timeit N_jc_top.pdf_v(y)

@jitclass calling top level -- pdf evaluated at one point:
The slowest run took 51972.50 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.48 µs per loop
@jitclass calling top level -- pdf evaluated in a vector:
The slowest run took 32453.04 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.08 µs per loop


## comparison in table

In [73]:
import warnings
warnings.filterwarnings("ignore")

print("=================================== distcan ===================================")
print("distcan -- pdf evaluated at one point:")
%timeit N_dc.pdf(x)

print("\ndistcan -- pdf evaluated in a vector:")
%timeit N_dc.pdf(y)

print("\n============================== @jit, @vectorize ===============================")
print("pure @jitted pdf evaluated at one point:")
%timeit pdf_jit(0, 1, x)
print("\npure @vectorized pdf evaluated at a vector:")
%timeit pdf_vect(0, 1, x)

print("\n================================== @jitclass ==================================")
print("@jitclass pdf evaluated at one point:")
%timeit N_jc.pdf(x)
print("\n@jitclass pdf evaluated in a vector:")
%timeit N_jc.pdf_v(y)

print("\n============================ @jitclass top level ==============================")
print("@jitclass calling top level -- pdf evaluated at one point:")
%timeit N_jc_top.pdf(x)
print("\n@jitclass calling top level -- pdf evaluated in a vector:")
%timeit N_jc_top.pdf_v(y)

distcan -- pdf evaluated at one point:
1000 loops, best of 3: 317 µs per loop

distcan -- pdf evaluated in a vector:
1000 loops, best of 3: 296 µs per loop

pure @jitted pdf evaluated at one point:
The slowest run took 20.66 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 517 ns per loop

pure @vectorized pdf evaluated at a vector:
The slowest run took 7.19 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 10.6 µs per loop

@jitclass pdf evaluated at one point:
The slowest run took 5.97 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.52 µs per loop

@jitclass pdf evaluated in a vector:
The slowest run took 4.31 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.86 µs per loop

@jitclass calling top level -- pdf eva