In [None]:
import numba
import numpy as np
from numba import jit, prange, njit
import random
from itertools import combinations
import statsmodels.api as sm
import pandas as pd
import timeit
from sklearn.neural_network import MLPClassifier
import warnings
from multiprocessing.pool import Pool
import multiprocessing as mp
warnings.filterwarnings("ignore")

# Parallel Computing in Python
Get the number of cores in your computer

In [None]:
c = mp.cpu_count()
print('We are working on {} cores'.format(c))

We are working on 2 cores


## Matrix multiplication
The `@jit` decorator works best on numerical functions that use `numpy`. It has two modes: nopython mode and object mode. Setting nopython=True tell the compiler to overlook the involvement of the Python interpreter when running the entire decorated function.

In [None]:
# a function that does multiple matrix multiplication
@jit(nopython=True)
def matrix_multiplication(A, x):
    b = np.empty(shape=(x.shape[0],1), dtype=np.float64)
    for i in range(x.shape[0]):
        b[i] = np.dot(A[i,:], x)
    return b

Generate some random matrices and time the function `matrix_multiplication`


In [None]:
A = np.random.rand(10, 10)
x = np.random.rand(10, 1)
%timeit matrix_multiplication(A,x)

3.99 µs ± 66.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Numba has a special `.py_func` attribute that effectively allows the decorated function to run as the original uncompiled Python function.

In [None]:
%timeit matrix_multiplication.py_func(A,x)

20 µs ± 101 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Normal distribution
The `@jit` decorator can also be used to enable inline parallelization by setting `parallel=True`. Parallelization in `numba` is done via multi-threading, i.e. creates threads of code that are distributed over all cores.

In [None]:
# simulate mus and stds
means = np.random.uniform(-1,1, size=10**8)
sds = np.random.uniform(0.1, 0.2, size=10**8)

@jit(nopython=True, parallel=True)
def normal_parallel(x, means, sds):
    n = means.shape[0]
    result = np.exp(-0.5*((x - means)/sds)**2)
    return (1 / (sds * np.sqrt(2*np.pi))) * result

In [None]:
%timeit -r 10 -n 5 normal_parallel(0.6, means, sds)
%timeit -r 10 -n 5 normal_parallel.py_func(0.6, means, sds)

1.86 s ± 177 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)
2.75 s ± 12 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)


## Fibonacci sequence

In [None]:
@njit(parallel=True)
def fibonacci(n):
    a, b = 1, 1
    for i in range(n):
        a, b = a+b, a
    return a

In [None]:
%timeit -n 1000 fibonacci(500)
%timeit -n 1000 fibonacci.py_func(500)

910 ns ± 120 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
40.9 µs ± 1.56 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Approximating $\pi$
Let's approximate $\pi$ using `numba` 

In [None]:
@njit(parallel=True)
def approx_pi(N):
    M = 0
    for i in range(N):
        # Simulate impact coordinates
        x = random.uniform(0,1)
        y = random.uniform(0,1)

        # True if impact happens inside the circle
        if x**2 + y**2 <= 1:
            M += 1
    print('pi',4*M/N)

In [None]:
sim = 10**7
%timeit -r 1 -n 1 approx_pi(sim)
%timeit -r 1 -n 1 approx_pi.py_func(sim)

pi 3.1427248
303 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
pi 3.14128
8.13 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


Now let's use `multiprocessing`

In [None]:
def approx_pi(N):
    M = 0
    for i in range(N):
        # Simulate impact coordinates
        x = random.uniform(0,1)
        y = random.uniform(0,1)

        # True if impact happens inside the circle
        if x**2 + y**2 <= 1:
            M += 1
    return M

In [None]:
def f():
  # split sim equally among cores
  chunks = [int(sim/c) for i in range(c)]
  start = timeit.default_timer()
  with Pool(c) as pool:
    count = pool.map(approx_pi,chunks)
  print('elapsed time {} secs'.format(round(timeit.default_timer()-start,2)))
  print('pi',4*sum(count)/sim)

In [None]:
f()

elapsed time 9.75 secs
pi 3.141016
