In [51]:
from ctypes import CDLL
randlib = CDLL("libc.so.6")
import time, sys
import multiprocessing as mp

# Look here for spoilers of what I used to speed up code
import numpy as np
from numba import jit 
from numba.experimental import jitclass
from numba import float32, int32
from numba import types



def genrand3dpt(MIN, MAX):
    X = (MAX-MIN)*(float(randlib.rand())/2147483647)+MIN
    Y = (MAX-MIN)*(float(randlib.rand())/2147483647)+MIN
    Z = (MAX-MIN)*(float(randlib.rand())/2147483647)+MIN
    return X,Y,Z

class point:
    def __init__(self, x:float ,y: float, z: float):
        self.x, self.y, self.z = x, y, z
    
def setCase(case):
    if(case=='T1'):
        seed = 7
        pts = 40000
        cores = 1
        expected = "Expected Min, Max: 8.705  16830.027\n"
    elif case=='T2':
        seed = 7
        pts = 20
        cores =4
        expected = "Expected Min, Max: 8.705  16830.027\n"
    elif case=='T3':
        seed = 7
        pts = 80000
        cores = 1
        expected = "Expected Min, Max: 0.260  16989.490\n"
    elif case=='T4':
        seed = 7
        pts = 80000
        cores = 2
        expected = "Expected Min, Max: 0.260  16989.490\n"
    elif case=='T5':
        seed = 7
        pts = 80000
        cores = 4
        expected = "Expected Min, Max: 0.260  16989.490\n"
    elif case=='T6':
        seed = 111
        pts = 111000
        cores = 3
        expected = "Expected Min, Max: 1.555  16965.943\n"
    return seed, pts, cores, expected

# Parallelization, Calling C and Other Methods: Part 2 of the Hit Series "Speeding Up My Code"

I had never done parallel coding in Python before this project, but I had done it in C in Dr. Butler's class. I needed to do some reading on how to spawn processes. Luckily, most of my work was easy because it was easily translated. Below is my initial implementation. I started with Numba because it was already blindingly fast.

In [40]:
spec = [
    ('array', float32[:,:]),
    ('value', float32),
    ('value', float32),
    ('value', int32),
    ('value', int32),    
]


class args:
    def __init__(self, points, strt, cnt):
        self.pts=points
        self.MAX = 0
        self.MIN = 1.7976931348623157e+308
        self.start = strt
        self.ptcnt = cnt
        
@jit(types.Tuple((float32, float32))(float32[:,:],float32,float32,int32,int32))
def shortest_numba(pts, max, min, start, ptcnt):
    for i in range(start, ptcnt,corecnt):
        for j in range(i+1, ptcnt):
            distance = (pts[i][0] - pts[j][0])**2 + (pts[i][1] - pts[j][1])**2 + (pts[i][2]- pts[j][2])**2
            if max < distance:
                max = distance
            elif min  > distance:
                min  = distance
    min, max = min **0.5, max**0.5
    return max, min

def numbaworker(q, passer):
    q.put(shortest_numba(np.float32(passer.pts), np.float32(passer.MAX), np.float32(passer.MIN), np.int32(passer.start), np.int32(passer.ptcnt)))   

def parallel():
    seed, pts, corecnt, expected = setCase('T6')
    randlib.srand(seed)
    print(expected)

    Q = mp.Queue()

    points = np.empty((pts,3), dtype=np.double)
    for i in range(pts):
        points[i]  = genrand3dpt(0, 10000) # call random points macro   
    MAXandMINS=[]
    procs=list()
    
    t1= time.time()    # to maintain consistency, I am timing like butler does.     
    for i in range(corecnt):
        passer = args(points=points, strt=i, cnt=len(points))
        proc = mp.Process(target=numbaworker, args=(Q, passer))
        proc.start()
        procs.append(proc)

    for i, proc in enumerate(procs):
        proc.join()
    
    for i in range(corecnt):
        mx, mn = Q.get()
        MAXandMINS.append(mx)
        MAXandMINS.append(mn)
    print(min(MAXandMINS),max(MAXandMINS))
    t2=time.time()
    print(t2-t1)
parallel()

Expected Min, Max: 1.555  16965.943

1.5551254749298096 16965.943359375
4.6719701290130615


I noticed something strange. No matter what core count I ran it at, the numba was fast and sometimes even faster with one core. It was running the same speed as the C. My hunch is that under the hood, Numba is actually doing some parallelization on its own. I decided to test this theory by doing a vanilla python solution just to see if I was going crazy.

## Vanilla Python with Parallel

In [None]:
class args:
    def __init__(self, points, strt, cnt):
        self.pts=points
        self.MAX = 0
        self.MIN = 1.7976931348623157e+308
        self.start = strt
        self.ptcnt = cnt

def shortest(pts, max, min, start, ptcnt):
    for i in range(start, ptcnt,corecnt):
        for j in range(i+1, ptcnt):
            distance = (pts[i][0] - pts[j][0])**2 + (pts[i][1] - pts[j][1])**2 + (pts[i][2]- pts[j][2])**2
            if max < distance:
                max = distance
            elif min  > distance:
                min  = distance
    min, max = min **0.5, max**0.5
    return max, min

def worker(q, passer):
    q.put(shortest(passer.pts, passer.MAX, passer.MIN, passer.start, passer.ptcnt))   

def parallel():
    seed, pts, corecnt, expected = setCase('T2')
    randlib.srand(seed)
    print(expected)

    Q = mp.Queue()

    points = np.empty((pts,3), dtype=np.double)
    for i in range(pts):
        points[i]  = genrand3dpt(0, 10000) # call random points macro   
    MAXandMINS=[]
    procs=list()
    
    t1= time.time()    # to maintain consistency, I am timing like butler does.     
    for i in range(corecnt):
        passer = args(points=points, strt=i, cnt=len(points))
        proc = mp.Process(target=worker, args=(Q, passer))
        proc.start()
        procs.append(proc)

    for i, proc in enumerate(procs):
        proc.join()
    
    for i in range(corecnt):
        mx, mn = Q.get()
        MAXandMINS.append(mx)
        MAXandMINS.append(mn)
    print(min(MAXandMINS),max(MAXandMINS))
    t2=time.time()
    print(t2-t1)
parallel()