In [None]:
import numpy as np
import tifffile as tiff
from nanopyx.core.transform import Convolution2D as Conv2D 
from matplotlib import pyplot as plt

In [None]:
task_run_times_unthreaded = []
task_run_times_threaded = []
task_run_times_threaded_static = []
task_run_times_threaded_guided = []
task_run_times_threaded_dynamic = []
task_run_times_opencl = []

task_run_times_numba = []
task_run_times_cupy = []
task_run_times_transonic = []
task_run_times_dask = []

In [None]:
conv2d = Conv2D()
dim_sizes  = [(dim, dim) for dim in [100, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000]]
kernel_sizes = [(dim, dim) for dim in range(1, 25, 4)]

In [None]:
def conv_func(conv, image, knl, run_type):

    tt = []
    for _ in range(3):
        conv.run(image, knl, run_type=run_type)
        tt.append(conv._last_time)
    
    return tt

In [None]:
for dim in dim_sizes:
    img = np.random.random(dim).astype(np.float32)
    for ks in kernel_sizes:
        kernel = np.ones(ks).astype(np.float32)

        _last_time = conv_func(conv2d, img, kernel,run_type="Threaded")
        task_run_times_threaded.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Threaded_static")
        task_run_times_threaded_static.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Threaded_guided")
        task_run_times_threaded_guided.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Threaded_dynamic")
        task_run_times_threaded_dynamic.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Unthreaded")
        task_run_times_unthreaded.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="OpenCL_NVIDIA GeForce RTX 4090") # change to appropriate device name (OpenCL_Apple M1 Pro, OpenCL_NVIDIA GeForce RTX 4090)
        task_run_times_opencl.append((dim[0], ks[0], _last_time))
        
        _last_time = conv_func(conv2d, img,kernel,run_type="Numba")
        task_run_times_numba.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Cuda")
        task_run_times_cupy.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Transonic")
        task_run_times_transonic.append((dim[0], ks[0], _last_time))

        _last_time = conv_func(conv2d, img,kernel,run_type="Dask")
        task_run_times_dask.append((dim[0], ks[0], _last_time))

In [None]:
# Run this cell to store benchmark values for future usage

task_run_times_unthreaded = np.array(task_run_times_unthreaded,dtype='object')
task_run_times_threaded = np.array(task_run_times_threaded,dtype='object')
task_run_times_threaded_static = np.array(task_run_times_threaded_static,dtype='object')
task_run_times_threaded_guided = np.array(task_run_times_threaded_guided,dtype='object')
task_run_times_threaded_dynamic = np.array(task_run_times_threaded_dynamic,dtype='object')
task_run_times_opencl = np.array(task_run_times_opencl,dtype='object')

task_run_times_numba = np.array(task_run_times_numba,dtype='object')
task_run_times_cupy = np.array(task_run_times_cupy,dtype='object')
task_run_times_transonic = np.array(task_run_times_transonic,dtype='object')
task_run_times_dask = np.array(task_run_times_dask,dtype='object')


np.save("task_run_times_opencl.npy", task_run_times_opencl)
np.save("task_run_times_unthreaded.npy", task_run_times_unthreaded)
np.save("task_run_times_threaded.npy", task_run_times_threaded)
np.save("task_run_times_threaded_static.npy", task_run_times_threaded_static)
np.save("task_run_times_threaded_guided.npy", task_run_times_threaded_guided)
np.save("task_run_times_threaded_dynamic.npy", task_run_times_threaded_dynamic)

np.save("task_run_times_numba.npy", task_run_times_numba)
np.save("task_run_times_cupy.npy", task_run_times_cupy)
np.save("task_run_times_transonic.npy", task_run_times_transonic)
np.save("task_run_times_dask.npy", task_run_times_dask)

In [None]:
# Run this cell if you're importing previous benchmarks

import os
import numpy as np

fld = "Workstation"
task_run_times_opencl = np.load("task_run_times_opencl.npy", allow_pickle=True)
task_run_times_unthreaded = np.load("task_run_times_unthreaded.npy", allow_pickle=True)
task_run_times_threaded = np.load("task_run_times_threaded.npy", allow_pickle=True)
task_run_times_threaded_static = np.load("task_run_times_threaded_static.npy", allow_pickle=True)
task_run_times_threaded_guided = np.load("task_run_times_threaded_guided.npy", allow_pickle=True)
task_run_times_threaded_dynamic = np.load("task_run_times_threaded_dynamic.npy", allow_pickle=True)

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm

sns.color_palette("Spectral", as_cmap=True)

ratio_threaded_opencl = np.array([np.average(i) for i in task_run_times_opencl[:,2]]) / np.array([np.average(i) for i in task_run_times_threaded[:,2]])
ratio_threaded_opencl = np.array(ratio_threaded_opencl).reshape(9, 6)
ax = sns.heatmap(ratio_threaded_opencl, annot=True, cmap="Spectral",
                 xticklabels=[k[0] for k in kernel_sizes],
                 yticklabels=[4*i[0]**2 / 1000000 for i in dim_sizes],
                 norm=LogNorm(),
                 fmt=".1f")
plt.title("Ratio between OpenCL and Threaded CPU run times")
plt.ylabel("Input image size (MB)")
plt.xlabel("Kernel size (Size x Size)")
plt.savefig("ratio_threaded_opencl_" + fld + ".pdf", dpi=300)
plt.show()

In [None]:
# plot per data shape
index = 5
step = 6
image_sizes = [4*i[0]**2 / 1000000 for i in dim_sizes]
plt.figure(figsize=(8, 5))
plt.plot(image_sizes, [np.average(i) for i in task_run_times_unthreaded[index::step, 2]], 'orange', label='Cython unthreaded')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_threaded[index::step, 2]], 'blue', label='Cython threaded')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_threaded_static[index::step, 2]], 'lightpink', label='Cython threaded static')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_threaded_guided[index::step, 2]], 'green', label='Cython threaded guided')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_threaded_dynamic[index::step, 2]], 'black', label='Cython threaded dynamic')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_opencl[index::step, 2]], 'magenta', label='pyOpenCL')

plt.plot(image_sizes, [np.average(i) for i in task_run_times_numba[index::step, 2]], label='Numba')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_cupy[index::step, 2]], label='Cupy')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_transonic[index::step, 2]], label='Transonic')
plt.plot(image_sizes, [np.average(i) for i in task_run_times_dask[index::step, 2]], label='Dask')

plt.legend()
plt.xlabel("Input Image Size (MB)")
plt.ylabel("Run times (s)")
plt.title("Run times of a 2D convolution with kernel size 21x21")
plt.xlim((0,100))
plt.ylim((0,4))
#plt.savefig("sizevstime_" + fld + ".pdf", dpi=300)
plt.savefig("sizevstime_zoomed_"+fld+".pdf",dpi=300)

In [None]:
# plot per kernel shape
index = 3
step = 6
plt.figure(figsize=(8, 5))
plt.plot(task_run_times_unthreaded[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_unthreaded[index*step:(index*step)+step, 2]], 'orange', label='Cython unthreaded')
plt.plot(task_run_times_threaded[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_threaded[index*step:(index*step)+step, 2]], 'blue', label='Cython threaded')
plt.plot(task_run_times_threaded_static[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_threaded_static[index*step:(index*step)+step, 2]], 'lightpink', label='Cython threaded static')
plt.plot(task_run_times_threaded_guided[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_threaded_guided[index*step:(index*step)+step, 2]], 'green', label='Cython threaded guided')
plt.plot(task_run_times_threaded_dynamic[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_threaded_dynamic[index*step:(index*step)+step, 2]], 'black', label='Cython threaded dynamic')
plt.plot(task_run_times_opencl[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_opencl[index*step:(index*step)+step, 2]], 'magenta', label='pyOpenCL')

plt.plot(task_run_times_numba[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_numba[index*step:(index*step)+step, 2]], label='Numba')
plt.plot(task_run_times_cupy[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_cupy[index*step:(index*step)+step, 2]], label='Cupy')
plt.plot(task_run_times_transonic[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_transonic[index*step:(index*step)+step, 2]], label='Transonic')
plt.plot(task_run_times_dask[index*step:(index*step)+step, 1], [np.average(i) for i in task_run_times_dask[index*step:(index*step)+step, 2]], label='Dask')



plt.legend()
plt.xlabel("Kernel Size (Size x Size)")
plt.ylabel("Run times (s)")
plt.title("Run times of a 2D convolution with input image size of 2500x2500")
plt.xlim((10,20))
plt.ylim((0,0.5))
#plt.savefig("kernelvstime_" + fld + ".pdf", dpi=300)
plt.savefig("kernelvstime_zoomed_" + fld + ".pdf", dpi=300)