In [1]:
from threading import Thread, current_thread
from multiprocessing.pool import ThreadPool
from functools import partial
from time import sleep
import numpy as np
import numba as nb
import os

# Manual Start

In [13]:
def compute():
    print(f"Hello! I am process number {os.getpid()}")
    print(f"Hello! I am thread number {current_thread().ident}")
    print("Starting to work!")
    sleep(1)
    print("Done!")
    

def run_threads(n = 3):
    
    threads = []
    
    for i in range(n):
        print("Creating Thread", i)
        thread = Thread(target=compute)
        thread.start()
        threads.append(thread)
        
    # Waiting for all processees
    for i in range(n):
        threads[i].join()
        
    print("All the processes are done!")
        
%time run_threads(3)

Creating Thread 0
Hello! I am process number 3792884
Hello! I am thread number 140540756874944
Creating Thread 1
Hello! I am process number 3792884
Hello! I am thread number 140540765267648
Starting to work!
Creating Thread 2
Hello! I am process number 3792884
Hello! I am thread number 140540774708928
Starting to work!
Starting to work!
Done!Done!

Done!
All the processes are done!
CPU times: user 968 µs, sys: 3.25 ms, total: 4.21 ms
Wall time: 1 s


# Thread Pool

In [7]:
def work(i, matrix):
    return np.linalg.norm(matrix[i])

shape = (10000, 500, 50)
dtype = np.float32

matrix = np.ones(shape, dtype=dtype)

workers = ThreadPool()

In [11]:
%timeit result = workers.map(partial(work, matrix=matrix), range(shape[0]))

183 ms ± 43.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Within Numba

In [19]:
@nb.njit(parallel=True)
def work_numba(matrix):
    result = np.empty(matrix.shape[0])
    
    for i in nb.prange(matrix.shape[0]):
        result[i] = np.linalg.norm(matrix[i])
        
    return result

In [20]:
%timeit work_numba(matrix)

59.6 ms ± 9.33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
