# High-performance computing in Python


### Missing

* Numba

In [9]:
from functools import partial
import multiprocessing as mp
import itertools
import sys
import os

sys.path.insert(0, "../02-sensitivity-analysis/python")
sys.path.insert(0, "python")

import matplotlib.pyplot as plt
import numpy as np

from ishigami import compute_simulation_total_effect
from ishigami import compute_simulation_main_effect
from ishigami import evaluate_ishigami_readable
from ishigami import evaluate_ishigami

from auxiliary import task_mp_no_communication
from auxiliary import evaluate_ishigami_loop
from auxiliary import task_mp_management
from auxiliary import task_mp_queue
from functools import partial

## Vectorization

We can compare the implementation of a for loop with a vectorized counterparts for a set of random input parameters.

In [10]:
num_draws = 10
inputs = np.random.uniform(low=-np.pi, high=np.pi, size=(num_draws, 3))

We can use some build-in functionality for some basic benchmarking.

In [11]:
%timeit evaluate_ishigami_loop(inputs)
%timeit evaluate_ishigami(inputs)

69.1 µs ± 1.44 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.87 µs ± 302 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


We need to set up tests wherever possible.

In [12]:
np.testing.assert_equal(evaluate_ishigami_loop(inputs), evaluate_ishigami(inputs))

## Multiprocessing
We first want to get a sense how many CPU's we have available.

In [13]:
print(f"Number of cpu : {mp.cpu_count()}")

Number of cpu : 4


In [14]:
# show groups of processes working on total and main effects., mpi worker setup nicely done as well as task differs.

## mp.Process

* without communication

In [15]:
num_outer = num_inner = 100
task_partial = partial(task_mp_no_communication, num_outer, num_inner)

In [16]:
processes = list()
for which in range(3):
    p = mp.Process(target=task_partial, args=(which, ))
    processes.append(p)

# We can execute our work.
[p.start() for p in processes]
[p.join() for p in processes];

... started on input parameter 0
... started on input parameter 1
... started on input parameter 2
... finished input parameter 0
... finished input parameter 2
... finished input parameter 1


* with communication

In [17]:
# We need to prepare a container for communication. One
# alternative is to derine a shared variable.
qout = mp.Queue()

# We can partial out most function arguments
task_partial = partial(task_mp_queue, num_outer, num_inner, qout)

In [18]:
# We can prepare our work.
processes = []
for which in range(3):
    p = mp.Process(target=task_partial, args=(which, ))
    processes.append(p)
    
# We can execute our work.
[p.start() for p in processes]
[p.join() for p in processes]
    
# We need to ensure that we have a unique
# order of the results.
unsorted_result = [qout.get() for p in processes]
result = [t[1] for t in sorted(unsorted_result)] 
print(result)

[0.2791831519566242, 0.4691635591850568, 0.009084340128113134]


## mp.Pool

In [19]:
task_partial = partial(task_mp_no_communication, num_outer, num_inner)

pool = mp.Pool(processes=3)
pool.map(task_partial, range(3))

pool.close()
pool.join()

... started on input parameter 0
... started on input parameter 1
... started on input parameter 2
... finished input parameter 0
... finished input parameter 2
... finished input parameter 1


In [20]:
task_partial = partial(task_mp_management, num_outer, num_inner)

tasks = list(itertools.product(["main", "total"], range(3)))

pool = mp.Pool(processes=3)
rslt = pool.map(task_partial, tasks)

pool.close()
pool.join()

working on main effect for input 1
working on main effect for input 2
working on main effect for input 0
working on total effect for input 1
working on total effect for input 0
working on total effect for input 2
