In [1]:
import os
import cv2
import time
import pickle
import skimage.io
import numpy as np
import tkinter as tk
from functions import *
from tkinter import *
from matplotlib import pyplot as plt
from tkinter import filedialog as fd
from tkinter import messagebox as mb
from concurrent.futures import ThreadPoolExecutor

def format_result(times):
    suffix = ['nS', 'µS','mS','S']
    mean=np.mean(times)
    std=np.std(times)
    s1=int(np.log(mean) / np.log(1000))
    s2=int(np.log(std) / np.log(1000))
    if s1>3:
        s1=3
    if s2>3:
        s2=3
    mean /= 1000**s1
    std /= 1000**s2
    return ('%.2f%s ± %.1f%s per loop (%i runs)'%(mean, suffix[s1], std, suffix[s2], len(times)))

def bench(fn,nLoops,timeout=10,fmt=True):
    times = []
    initial = time.time()
    while len(times)<nLoops and time.time()-initial < timeout:
        start = time.time_ns()
        fn()
        end = time.time_ns()
        times.append(end-start)
    if fmt:
        return format_result(times)
    else:
        return (np.mean(times),np.std(times),len(times))

def packPixels():
    overlapCounter = np.zeros((h,w),dtype='uint8')
    for idx in range(len(locs)):
        size = int(locs[idx][6])
        offset = size/2.0
        x , y = np.round(locs[idx][:2])
        x1,y1,x2,y2 = int(x-offset),int(y-offset),int(x+offset),int(y+offset)
        overlapCounter[y1:y2,x1:x2] += 1
    nLayers = np.max(overlapCounter)
    coeff_layers = []
    [coeff_layers.append(np.zeros((h,w))) for i in range(nLayers)]
    index_layers = []
    [index_layers.append(np.zeros((h,w))) for i in range(nLayers)]
    for idx in range(len(locs)):
        values = coeffs[idx]
        size = int(locs[idx][6])
        offset = size/2.0
        x , y = np.round(locs[idx][:2])
        pixelCoords = [(int(x+i-offset), int(y+j-offset), values[j,i]) for j in range(size) for i in range(size)]
        for coord in pixelCoords:
            a , b , c = coord
            for i in range(nLayers):
                if coeff_layers[i][b,a] == 0:
                    coeff_layers[i][b,a] = c
                    index_layers[i][b,a] = idx+1
                    break
    scaling_factor = ((2**coeff_bits)-1) / np.max(coeff_layers)
    for i in range(len(coeff_layers)):
        coeff_layers[i] = (coeff_layers[i]*scaling_factor).astype(numpy_type[types['COEFFICIENTS']])#.flatten()
        index_layers[i] = index_layers[i].astype(numpy_type[types['INDEX']])#.flatten()
    return coeff_layers,index_layers

def packKernels():
    pass

numpy_type = {
    'unsigned char': 'uint8',
    'unsigned short': 'uint16',
    'unsigned long': 'uint32',
    'unsigned long long': 'uint64'
}

types = pickle.load(open("types.pkl", "rb"))
coeffs = np.squeeze(pickle.load(open("50k_coeff.pkl", "rb" ), encoding='latin1'))
locs = pickle.load(open("50k_loc.pkl", "rb" ), encoding='latin1')
w = 2*int(np.max(np.abs(locs[:,0])) + np.max(locs[:,6]//2)+1)
h = 2*int(np.max(np.abs(locs[:,1])) + np.max(locs[:,6]//2)+1)
coeff_bits = 16

img = skimage.io.imread('dock.jpg')
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

generateLayers = True
method = packPixels

if generateLayers:
    coeff_layers,index_layers = method()
    idx = len(locs)+1
    pickle.dump((coeff_layers,index_layers,idx), open("layers.pkl", "wb"))
else:
    coeff_layers,index_layers, idx = pickle.load(open("layers.pkl", "rb"))
    
result = np.zeros(idx, dtype=numpy_type[types['RESULTS']])

In [2]:
def sample_manualThread(img,Ncores=1):
    img_flat = img[y1:y2, x1:x2].reshape(h*w)
    zeros_int32(result)
    for i in range(len(coeff_layers)):
        with ThreadPoolExecutor(max_workers=Ncores) as executor:
            img_array = np.array_split(img_flat,Ncores)
            coeff_array = np.array_split(coeff_layers[i],Ncores)
            index_array = np.array_split(index_layers[i],Ncores)
            jobs = [executor.submit(sample, img_array[x], coeff_array[x], index_array[x], result) for x in range(Ncores)]
            
def sample_prangeThread(img,Ncores=1):
    img_flat = img[y1:y2, x1:x2].reshape(h*w)
    zeros_int32(result)
    for i in range(len(coeff_layers)):
        sample_parallel(img_flat, coeff_layers[i], index_layers[i], result, Ncores)

def noThreads(img):
    img_flat = img[y1:y2, x1:x2].reshape(h*w)
    zeros_int32(result)
    for i in range(len(coeff_layers)):
        sample(img_flat, coeff_layers[i], index_layers[i], result)

In [3]:
y, x = 1080//2, 1920//2
x1, y1, x2, y2 = int(x-(w//2)), int(y-(h//2)), int(x+(w//2)), int(y+(h//2))

print("Manual threads:")
for i in range(os.cpu_count()):
    print("%i Thread(s): %s"%(i+1,bench(lambda:sample_manualThread(img_gray,i+1),1000)))
print('#'*100)
print("Automatic threads:")
for i in range(os.cpu_count()):
    print("%i Thread(s): %s"%(i+1,bench(lambda:sample_manualThread(img_gray,i+1),1000)))
print('#'*100)
print("No threading:")
print(bench(lambda:noThreads(img_gray),1000))

Manual threads:
1 Thread(s): 22.80mS ± 1.2mS per loop (439 runs)
2 Thread(s): 17.87mS ± 1.6mS per loop (560 runs)
3 Thread(s): 19.69mS ± 1.6mS per loop (508 runs)
4 Thread(s): 20.99mS ± 1.7mS per loop (477 runs)
5 Thread(s): 22.56mS ± 1.6mS per loop (444 runs)
6 Thread(s): 26.17mS ± 2.4mS per loop (383 runs)
7 Thread(s): 29.23mS ± 1.4mS per loop (343 runs)
8 Thread(s): 33.81mS ± 2.9mS per loop (296 runs)
####################################################################################################
Automatic threads:
1 Thread(s): 23.62mS ± 2.3mS per loop (424 runs)
2 Thread(s): 18.09mS ± 1.6mS per loop (553 runs)
3 Thread(s): 19.93mS ± 1.9mS per loop (502 runs)
4 Thread(s): 20.58mS ± 1.8mS per loop (486 runs)
5 Thread(s): 23.06mS ± 2.2mS per loop (434 runs)
6 Thread(s): 25.71mS ± 1.5mS per loop (389 runs)
7 Thread(s): 29.12mS ± 1.6mS per loop (344 runs)
8 Thread(s): 33.70mS ± 2.6mS per loop (297 runs)
################################################################################

In [5]:
resolution = np.array([1080,1920], dtype='int32')
retina_size = np.array([928,928], dtype='int32')
fixation = np.array([1920/2,1080/2], dtype='int32')
img_x1, img_y1, img_x2, img_y2, ret_x1, ret_y1, ret_x2, ret_y2 = get_bounds(resolution, retina_size, fixation)
print(ret_x1, ret_y1, ret_x2, ret_y2)

0 0 928 928


In [3]:
%%timeit
img_x1, img_y1, img_x2, img_y2, ret_x1, ret_y1, ret_x2, ret_y2 = get_bounds(resolution, retina_size, fixation)

2.8 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [10]:
%%timeit
img_gray[img_y1:img_y2, img_x1:img_x2].flatten()

61.2 µs ± 2.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [4]:
def dynamic_size(img):
    img_flat = img[img_y1:img_y2, img_x1:img_x2].flatten()
    zeros_int32(result)
    for i in range(len(coeff_layers)):
        coeffs = coeff_layers[i][ret_y1:ret_y2, ret_x1:ret_x2].flatten()
        indices = index_layers[i][ret_y1:ret_y2, ret_x1:ret_x2].flatten()
        sample(img_flat, coeffs, indices, result)
        
print(bench(lambda:dynamic_size(img_gray), 1000))

70.09mS ± 3.3mS per loop (143 runs)
