In [1]:
load_ext cython

In [2]:
import cv2
import skimage.io
import matplotlib.pyplot as plt
from helpers import *

img = skimage.io.imread('dock.jpg')
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

In [3]:
size,scalingFactor,coeff_array,index_array,size_array,kernel_map = loadPickle("retina.pkl")

In [4]:
import time
import os
from threading import Thread

def format_result(times):
    suffix = ['nS', 'µS','mS','S']
    mean=np.mean(times)
    std=np.std(times)
    s1=int(np.log(mean) / np.log(1000))
    s2=int(np.log(std) / np.log(1000))
    if s1>3:
        s1=3
    if s2>3:
        s2=3
    mean /= 1000**s1
    std /= 1000**s2
    return ('%.2f%s ± %.1f%s per loop (%i runs)'%(mean, suffix[s1], std, suffix[s2], len(times)))

def bench(fn,nLoops,timeout=10,fmt=True):
    times = []
    initial = time.time()
    while len(times)<nLoops and time.time()-initial < timeout:
        start = time.time_ns()
        fn()
        end = time.time_ns()
        times.append(end-start)
    if fmt:
        return format_result(times)
    else:
        return (np.mean(times),np.std(times),len(times))

In [5]:
%%cython -a

# cython: language_level=3

cimport cython
from libc.math cimport round

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef sample(unsigned char[::1] img_flat, double[::1] coeffs, unsigned char[::1] result_flat,
             unsigned short[::1] sizes, unsigned int[::1] idx, unsigned long long offset=0):
    cdef unsigned int x
    cdef unsigned int j
    cdef unsigned int i=0
    cdef double accumulator
    with nogil:
        for x in range(sizes.shape[0]):
            accumulator = 0
            for j in range(i, i+(sizes[x])):
                accumulator += img_flat[idx[j]]*coeffs[j]
            result_flat[x+offset] = <unsigned char>round(accumulator)
            i += sizes[x]

In [6]:
result = np.zeros(size_array.shape, dtype=np.uint8)
sample(img_gray.ravel(), coeff_array, result, size_array, index_array)
validated_result = result.copy()

In [7]:
%%timeit
result = np.zeros(size_array.shape, dtype=np.uint8)
sample(img_gray.ravel(), coeff_array, result, size_array, index_array)

9.18 ms ± 781 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
def sliceArrays(n):
    valid_offsets = np.cumsum(size_array)
    coeffs_per_slice = len(coeff_array)//n
    j = 1
    offsets = [0]
    elements_per_split = []
    sizes = [0]
    for i in range(len(valid_offsets)):
        if valid_offsets[i]>= j*coeffs_per_slice:
            sizes.append(i+1)
            elements_per_split.append(valid_offsets[i]-offsets[-1])
            offsets.append(valid_offsets[i])
            j+=1
    split_coeffs = [coeff_array[offsets[i]:offsets[i+1]] for i in range(len(offsets)-1)]
    split_indices = [index_array[offsets[i]:offsets[i+1]] for i in range(len(offsets)-1)]
    split_sizes = [size_array[sizes[i]:sizes[i+1]] for i in range(len(sizes)-1)]
    return (split_coeffs, split_sizes, split_indices, sizes)

def multithreaded(img_flat,coeffs, sizes, indices, offsets):
    result = np.zeros(size_array.shape, dtype=np.uint8)
    threads = [Thread(target=sample, args=(img_flat, coeffs[i], result, sizes[i], indices[i], offsets[i])) for i in range(len(coeffs))]
    
    for thread in threads:
        thread.start()
        
    for thread in threads:
        thread.join()
        
    return result
        

for i in range(os.cpu_count()):
    img_flat = img_gray.ravel()
    coeffs, sizes, indices , offsets= sliceArrays(i+1)
    if (validated_result==result).all():
        print("%i Threads: %s"%(i+1,bench(lambda:multithreaded(img_flat, coeffs, sizes, indices, offsets),1000)))
    else:
        print("Sampling result is incorrect")

1 Threads: 9.48mS ± 1.1mS per loop (1000 runs)
2 Threads: 5.11mS ± 738.0µS per loop (1000 runs)
3 Threads: 3.72mS ± 661.5µS per loop (1000 runs)
4 Threads: 4.22mS ± 593.8µS per loop (1000 runs)
5 Threads: 3.88mS ± 834.6µS per loop (1000 runs)
6 Threads: 3.61mS ± 711.1µS per loop (1000 runs)
7 Threads: 3.57mS ± 541.3µS per loop (1000 runs)
8 Threads: 4.81mS ± 3.8mS per loop (1000 runs)
