# numba

In [114]:
import numpy as np
from numba import config, njit, prange
import time
import numba

def isArraySorted(arr):
    n = len(arr)
    if (n > 0):
        prev = arr[0]
        for i in range(n):
            if (arr[i] < prev):
                return False
    return True;

def createRandomArr(n):
    return np.random.randint(0, 100, n)

N = 10000
ITERS = 100

@njit
def quickSortSeq(arr, low, high):
    if (low < high):
        # partitionHoar
        i = low
        j = high
        pivot = arr[(i + j) // 2]
        while (1):
            while (arr[i] < pivot):
                i = i + 1
            while (arr[j] > pivot):
                j = j - 1

            if (i >= j):
                break

            arr[i], arr[j] = arr[j], arr[i]
            i = i + 1
            j = j - 1
        pi = j

        # recurcive call
        quickSortSeq(arr, low, pi)
        quickSortSeq(arr, pi + 1, high)

@njit(parallel=True)
def quickSortPar(arr, low, high):
    if low < high:
        i = low
        j = high
        pivot = arr[(i + j) // 2]
        while True:
            while arr[i] < pivot:
                i = i + 1
            while arr[j] > pivot:
                j = j - 1

            if i >= j:
                break

            arr[i], arr[j] = arr[j], arr[i]
            i = i + 1
            j = j - 1
        pi = j

        for id in prange(2):
            if (id == 0):
                quickSortSeq(arr, low, pi)
            else:
                quickSortSeq(arr, pi + 1, high)

In [115]:
arr = createRandomArr(10)
print(arr)

[18 78 16 68 65  4  1 94 69 88]


In [116]:
quickSortPar(arr, 0, len(arr)-1)
print(arr)

[ 1  4 16 18 65 68 69 78 88 94]


In [117]:
def calcParTime1():
    arr = createRandomArr(10)
    quickSortPar(arr, 0, len(arr) - 1)
    threads_n = 4
    while (threads_n > 0):
        numba.set_num_threads(threads_n)
        full_time = 0
        for it in range(ITERS):
            arr = createRandomArr(N)
            # print(arr)
    
            start_time = time.time()
    
            quickSortPar(arr, 0, len(arr) - 1)
    
            end_time = time.time()
            elapsed_time = end_time - start_time;
            # print(elapsed_time)
            full_time += elapsed_time
    
            # print(arr)
            if (not isArraySorted(arr)):
                print("Massive is not sorted\n")
            
        full_time /= ITERS
        print(f"Time taken ({threads_n} threads):", full_time, "seconds")
        threads_n //= 2

In [118]:
N = 100000
ITERS = 100

config.THREADING_LAYER = 'omp'
config.NUMBA_DEFAULT_NUM_THREADS = 4

for i in range(3):
    calcParTime1()

Time taken (4 threads): 0.0039040803909301757 seconds
Time taken (2 threads): 0.003230011463165283 seconds
Time taken (1 threads): 0.0037974953651428224 seconds
Time taken (4 threads): 0.00449885368347168 seconds
Time taken (2 threads): 0.003260934352874756 seconds
Time taken (1 threads): 0.003692772388458252 seconds
Time taken (4 threads): 0.0036490726470947265 seconds
Time taken (2 threads): 0.0030921816825866697 seconds
Time taken (1 threads): 0.003779768943786621 seconds


# numba.openmp

In [15]:
from numba import njit
from numba.openmp import openmp_context as openmp
from numba.openmp import omp_get_thread_num, omp_get_num_threads, omp_set_num_threads, omp_get_max_threads, omp_get_wtime
import numpy as np
from numba import njit
from numba.openmp import openmp_context as openmp
import random

def isArraySorted(arr):
    n = len(arr)
    if (n > 0):
        prev = arr[0]
        for i in range(n):
            if (arr[i] < prev):
                return False
    return True;

def createRandomArr(n):
    return np.random.randint(0, 100, n)
    
@njit
def getOmpTime():
    return omp_get_wtime()

@njit        
def printNumThreads():
    with openmp('parallel'):
        ID = omp_get_thread_num()
        if (ID == 0):
            print("omp_get_max_threads() =", omp_get_max_threads())
            print("omp_get_num_threads() =", omp_get_num_threads())
        print("omp_get_thread_num() =", ID)

@njit
def printThreadNum():
    print("thread_num = ", omp_get_thread_num());

@njit
def quickSortPar(arr, low, high, max_d, d = 0):
    if (low < high):
        # partitionHoar
        i = low
        j = high
        pivot = arr[(i + j) // 2]
        while (1):
            while (arr[i] < pivot):
                i = i + 1
            while (arr[j] > pivot):
                j = j - 1
        
            if (i >= j):
                break
        
            arr[i], arr[j] = arr[j], arr[i]
            i = i + 1
            j = j - 1
        pi = j

        #print("omp_get_thread_num() = ", omp_get_thread_num());
        #print(arr)
        # recurcive call
        if (d < max_d):
            #arr1 = None
            #arr2 = None
            #with openmp("parallel sections"):
            with openmp("task shared(arr)"):
                #print("omp_get_thread_num() = ", omp_get_thread_num());
                #print(arr)
                quickSortPar(arr, low, pi, max_d, d + 1)
                #quickSortSeq(arr, low, pi)
            with openmp("task shared(arr)"):   
                #print("omp_get_thread_num() = ", omp_get_thread_num());
                #print(arr)
                quickSortPar(arr, pi + 1, high, max_d, d + 1)  
                #quickSortSeq(arr, pi + 1, high)
            with openmp("taskwait"):
                return
                #res_arr = arr1 + arr2
        else:
            quickSortPar(arr, low, pi, max_d, d + 1)
            quickSortPar(arr, pi + 1, high, max_d, d + 1)
        #quickSortPar(arr, low, pi)
        #quickSortPar(arr, pi + 1, high)

@njit
def quickSortParHelp(arr, max_d):
    with openmp("parallel shared(arr)"):
        #ID = omp_get_thread_num()
        #print("omp_get_thread_num() =", ID)
        with openmp("single"):
            #print("num_threads = ", omp_get_num_threads())
            #ID2 = omp_get_thread_num()
            #print("single omp_get_thread_num() =", ID2)
            quickSortPar(arr, 0, len(arr)-1, max_d)
            #print("arr after quickSortPar=", arr)
            #with openmp("taskwait"):
                #print("arr after quickSortPar=", arr)
                #return arr

printNumThreads()

omp_get_thread_num() = 3
omp_get_max_threads() = 4
omp_get_num_threads() = 4
omp_get_thread_num() = 0
omp_get_thread_num() = 2
omp_get_thread_num() = 1


In [18]:
arr = createRandomArr(6)
print(arr)
quickSortParHelp(arr, 1)
print(arr)
if (isArraySorted(arr)):
    print("Massive", arr, "IS sorted\n")
if (not isArraySorted(arr)):
    print("Massive is not sorted\n")

[37  8 90 54 70 15]
[ 8 15 37 54 70 90]
Massive [ 8 15 37 54 70 90] IS sorted



In [11]:
arr = createRandomArr(10)
print(arr)
with openmp("parallel"):
    with openmp("single"):
        quickSortPar(arr, 0, len(arr)-1, 2)
print(arr)

[40 39 96 23 51  6 51 39 14 42]
[40 39 42 23 14  6 39 51 51 96]


In [3]:
@njit
def test3(n):
    ID = omp_get_thread_num()
    print("id =", ID, "n =", n)
    if (n > 0):
        with openmp("task private(n)"):
            test3(n-1)
        with openmp("task private(n)"):
            test3(n-2)

@njit
def test2():
    with openmp('parallel'):
        ID = omp_get_thread_num()
        print("omp_get_thread_num() =", ID)
        with openmp("single"):
            print("num_threads = ", omp_get_num_threads())
            ID2 = omp_get_thread_num()
            print("single omp_get_thread_num() =", ID2)
            test3(2)
            


test2()

omp_get_thread_num() = 3
num_threads =  4
single omp_get_thread_num() = 3
id = 3 n = 2
omp_get_thread_num() = 2
id = 2 n = 1
omp_get_thread_num() = 1
id = 1 n = -1
id = 3 n = 0
omp_get_thread_num() = 0
id = 2 n = -2


In [5]:
#setNumThreads(4)

arr = createRandomArr(10)
print(arr)
with openmp("parallel"):
    printThreadNum()
    with openmp("single"):
        quickSortPar(arr, 0, len(arr)-1, 2)
print(arr)

[30 66  6 61 81 11  2 95 77 90]
thread_num =  0
omp_get_thread_num() =  0
omp_get_thread_num() =  0
omp_get_thread_num() =  0
omp_get_thread_num() =  0
omp_get_thread_num() =  0
omp_get_thread_num() =  0
[30 66  6 61 77 11  2 95 81 90]


In [19]:
@njit
def setNumThreads(n):
    omp_set_num_threads(n)


def calcParTime2():
    arr = createRandomArr(10)
    #with openmp("parallel"):
    #    with openmp("single"):
    #        quickSortPar(arr, 0, len(arr)-1, 2)
    quickSortParHelp(arr, 2)
    threads_n = 4
    while (threads_n > 0):
        setNumThreads(threads_n)
        full_time = 0
        for it in range(ITERS):
            arr = createRandomArr(N)
            # print(arr)

            max_d = 0
            sum_th_n = 1
            cur_th_n = 1
            while (sum_th_n < threads_n):
                cur_th_n *= 2
                sum_th_n += cur_th_n
                max_d += 1
    
            start_time = getOmpTime()
    
            #quickSortPar(arr, 0, len(arr) - 1)
            #with openmp("parallel"):
            #    with openmp("single"):
            #        quickSortPar(arr, 0, len(arr)-1, max_d)
            quickSortParHelp(arr, max_d)
    
            end_time = getOmpTime()
            elapsed_time = end_time - start_time;
            # print(elapsed_time)
            full_time += elapsed_time
    
            # print(arr)
            #if (not isArraySorted(arr)):
            #    print("Massive is not sorted\n")
            
        full_time /= ITERS
        print(f"Time taken ({threads_n} threads):", full_time, "seconds")
        threads_n //= 2

In [29]:
N = 100000
ITERS = 200

for i in range(3):
    calcParTime2()

Time taken (4 threads): 0.003495000600814819 seconds
Time taken (2 threads): 0.003810269832611084 seconds
Time taken (1 threads): 0.00428169846534729 seconds
Time taken (4 threads): 0.003446553945541382 seconds
Time taken (2 threads): 0.0036129355430603027 seconds
Time taken (1 threads): 0.0045724356174469 seconds
Time taken (4 threads): 0.0034904921054840087 seconds
Time taken (2 threads): 0.004006369113922119 seconds
Time taken (1 threads): 0.004559621810913086 seconds


# Testing

In [15]:
@njit        
def printNumThreads():
    with openmp('parallel'):
        ID = omp_get_thread_num()
        if (ID == 0):
            print("omp_get_max_threads() =", omp_get_max_threads())
            print("omp_get_num_threads() =", omp_get_num_threads())
        print("omp_get_thread_num() =", ID)

printNumThreads()

omp_get_thread_num() =omp_get_thread_num() = 3
omp_get_thread_num() = 1
 2
omp_get_max_threads() = 4
omp_get_num_threads() = 4
omp_get_thread_num() = 0


In [17]:
@njit
def test1():
    arr = createRandomArr(10)
    print(arr)
    with openmp("parallel"):
        ID = omp_get_thread_num()
        print("omp_get_thread_num() = ", ID);
        with openmp("single"):
            quickSortPar(arr, 0, len(arr)-1, 2)
    print(arr)

test1()

TypingError: Failed in nopython mode pipeline (step: Handle with contexts)
Failed in nopython mode pipeline (step: Handle with contexts)
Failed in nopython mode pipeline (step: nopython frontend)
Untyped global name 'createRandomArr': Cannot determine Numba type of <class 'function'>

File "../../../tmp/ipykernel_489/4166193997.py", line 3:
<source missing, REPL/exec in use?>


In [196]:
@njit
def getFreeThreads(threads):
    id1 = -1;
    id2 = -1;
    for i in range(len(threads)):
        if (threads[i]):
            if (id1 == -1):
                id1 = i
            else:
                id2 = i
        if (not id1 == -1 and not id2 == -1):
            break;
    return id1, id2
        

@njit
def quickSortPar(arr, low, high, threads):
    if (low < high):
        # partitionHoar
        i = low
        j = high
        pivot = arr[(i + j) // 2]
        while (1):
            while (arr[i] < pivot):
                i = i + 1
            while (arr[j] > pivot):
                j = j - 1
        
            if (i >= j):
                break
        
            arr[i], arr[j] = arr[j], arr[i]
            i = i + 1
            j = j - 1
        pi = j

        threadID = omp_get_thread_num()
        #print("start:", threadID)
        threads[threadID] = True;
        #print("HERE2", threads)
        id1, id2 = getFreeThreads(threads)
        #print(id1, id2)
        # recurcive call
        if (not id1 == -1 and not id2 == -1):
            threads[id1] = False
            threads[id2] = False
            #print("HERE", threads)
            with openmp("parallel shared(arr, low, pi, high, threads)"):
                threadID = omp_get_thread_num()
                #print(threadID)
                if (threadID == id1):
                    quickSortPar(arr, low, pi, threads)
                elif (threadID == id2):
                    quickSortPar(arr, low, pi, threads)
        else:
            quickSortPar(arr, low, pi, threads)
            quickSortPar(arr, pi + 1, high, threads)
        '''
        if (d < max_d):
            #with openmp("parallel"):
            with openmp("task shared(arr, low, pi)"):
                quickSortPar(arr, low, pi, max_d, d + 1)
                #quickSortSeq(arr, low, pi)
            with openmp("task shared(arr, pi, high)"):    
                quickSortPar(arr, pi + 1, high, max_d, d + 1)  
                #quickSortSeq(arr, pi + 1, high)
        else:
            quickSortPar(arr, low, pi, max_d, d + 1)
            quickSortPar(arr, pi + 1, high, max_d, d + 1)
        '''
        #quickSortPar(arr, low, pi)
        #quickSortPar(arr, pi + 1, high)