In [1]:
from timeit import default_timer    
import numpy as np

from nanopyx.core.transform import NLMDenoising, eSRRF_ST

  cupy._util.experimental('cupyx.jit.rawkernel')


## Unthreaded faster than OpenCL

In [2]:
img = np.random.random((5,1000,1000)).astype(np.float32)
ps = 50
pd = 50
h = 0.1
sigma = 1.0

nlm = NLMDenoising(clear_benchmarks=True)

nlm._run_types.pop('OpenCL_Intel(R) UHD Graphics 770')
nlm._run_types.pop('Threaded')
nlm._run_types.pop('Threaded_dynamic')
nlm._run_types.pop('Threaded_guided')
nlm._run_types.pop('Threaded_static')

for i in range(3):
    _ = nlm.benchmark(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)

esrrf = eSRRF_ST(clear_benchmarks=True)

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 257.70569350000005 seconds
Agent: NLMDenoising using Unthreaded ran in 140.31870280008297 seconds
Agent: NLMDenoising using Python ran in 166.33503409998957 seconds
Fastest run type: Unthreaded
Slowest run type: OpenCL_NVIDIA GeForce RTX 4090
Unthreaded is 1.19x faster than Python
Unthreaded is 1.84x faster than OpenCL_NVIDIA GeForce RTX 4090
Python is 1.55x faster than OpenCL_NVIDIA GeForce RTX 4090
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 258.53339290001895 seconds
Agent: NLMDenoising using Unthreaded ran in 140.29249749996234 seconds
Agent: NLMDenoising using Python ran in 163.82784679997712 seconds
Fastest run type: Unthreaded
Slowest run type: OpenCL_NVIDIA GeForce RTX 4090
Unthreaded is 1.17x faster than Python
Unthreaded is 1.84x faster than OpenCL_NVIDIA GeForce RTX 4090
Python is 1.58x faster than OpenCL_NVIDIA GeForce RTX 4090
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 

In [3]:
times_liquid_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_liquid_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_liquid_1))

Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 137.62035179999657 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 1.0275708999251947 seconds
Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 137.52744199999142 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5362481999909505 seconds
Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 138.31837200000882 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5136264000320807 seconds
FINAL TIME: 138.53959663336477


In [4]:
times_pythonnlm_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Python')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_pythonnlm_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_pythonnlm_1))

Agent: NLMDenoising using Python ran in 163.67569679999724 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5025924999499694 seconds
Agent: NLMDenoising using Python ran in 165.35446070006583 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5090494999894872 seconds
Agent: NLMDenoising using Python ran in 163.8204972000094 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.4754666000371799 seconds
FINAL TIME: 164.80727100003665


In [5]:
times_allgpu_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='OpenCL_NVIDIA GeForce RTX 4090')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_allgpu_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allgpu_1))

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 259.19837340002414 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.4494493999518454 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 257.83627550001256 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5347164999693632 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 258.3585788999917 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.43966219993308187 seconds
FINAL TIME: 258.95869426665985


In [6]:
times_untnlm_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Unthreaded')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_untnlm_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_untnlm_1))

Agent: NLMDenoising using Unthreaded ran in 139.6733835999621 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.501745899906382 seconds
Agent: NLMDenoising using Unthreaded ran in 138.91377670003567 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.48830460000317544 seconds
Agent: NLMDenoising using Unthreaded ran in 139.09618759993464 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.49474909994751215 seconds
FINAL TIME: 139.74116063331408


In [14]:
print("Image 5x1000x1000 (frame,row,col)")
print("NLM patch size 50, patch distance 50, h 0.1, sigma 1")
print("eSRRF default values ALWAYS on gpu")


print("Liquid chooses (unthreaded) for nlm:", np.average(times_liquid_1),np.std(times_liquid_1))
print("Always python for nlm:", np.average(times_pythonnlm_1),np.std(times_pythonnlm_1))
print("Always GPU for nlm", np.average(times_allgpu_1),np.std(times_allgpu_1))
print("Always unthreaded for nlm", np.average(times_untnlm_1),np.std(times_untnlm_1))

Image 5x1000x1000 (frame,row,col)
NLM patch size 50, patch distance 50, h 0.1, sigma 1
eSRRF default values ALWAYS on gpu
Liquid chooses (unthreaded) for nlm: 138.53959663336477 0.3201652414742892
Always python for nlm: 164.80727100003665 0.7623515248157177
Always GPU for nlm 258.95869426665985 0.5332983505148261
Always unthreaded for nlm 139.74116063331408 0.32755787064753833


## OpenCL faster than Unthreaded

In [7]:
img = np.random.random((5,500,500)).astype(np.float32)
ps = 5
pd = 100
h = 0.1
sigma = 1.0

for i in range(3):
    _ = nlm.benchmark(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.1125362999737263 seconds
Agent: NLMDenoising using Unthreaded ran in 193.49460520001594 seconds
Agent: NLMDenoising using Python ran in 216.2504562000977 seconds
Fastest run type: OpenCL_NVIDIA GeForce RTX 4090
Slowest run type: Python
OpenCL_NVIDIA GeForce RTX 4090 is 91.59x faster than Unthreaded
OpenCL_NVIDIA GeForce RTX 4090 is 102.37x faster than Python
Unthreaded is 1.12x faster than Python
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.1090008000610396 seconds
Agent: NLMDenoising using Unthreaded ran in 195.32358660001773 seconds
Agent: NLMDenoising using Python ran in 215.9644315999467 seconds
Fastest run type: OpenCL_NVIDIA GeForce RTX 4090
Slowest run type: Python
OpenCL_NVIDIA GeForce RTX 4090 is 92.61x faster than Unthreaded
OpenCL_NVIDIA GeForce RTX 4090 is 102.40x faster than Python
Unthreaded is 1.11x faster than Python
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.08

In [8]:
times_liquid_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_liquid_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_liquid_2))

Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.0850866999244317 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.18761959997937083 seconds
Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.0860612000105903 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1820156000321731 seconds
Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.0676665999926627 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.21367800002917647 seconds
FINAL TIME: 2.2918500333325937


In [9]:
times_pythonnlm_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Python')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_pythonnlm_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_pythonnlm_2))

Agent: NLMDenoising using Python ran in 214.94981749996077 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1963244000216946 seconds
Agent: NLMDenoising using Python ran in 214.65648640005384 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.2030254000565037 seconds
Agent: NLMDenoising using Python ran in 215.9210472999839 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.19534779991954565 seconds
FINAL TIME: 215.3844068000326


In [10]:
times_allgpu_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='OpenCL_NVIDIA GeForce RTX 4090')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_allgpu_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allgpu_2))

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.091298299957998 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1972550000064075 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.0781228000996634 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.18130279995966703 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 2.057868600008078 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.22035229997709394 seconds
FINAL TIME: 2.2884540000231937


In [11]:
times_untnlm_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Unthreaded')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_untnlm_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_untnlm_2))

Agent: NLMDenoising using Unthreaded ran in 190.99014130001888 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.18062180001288652 seconds
Agent: NLMDenoising using Unthreaded ran in 191.37247830000706 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.17891250003594905 seconds
Agent: NLMDenoising using Unthreaded ran in 191.5184929999523 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.17274909990374 seconds
FINAL TIME: 191.48296093331496


In [15]:
print("Image 5x500x500 (frame,row,col)")
print("NLM patch size 5, patch distance 100, h 0.1, sigma 1")
print("eSRRF default values ALWAYS on gpu")

print("Liquid chooses (unthreaded) for nlm:", np.average(times_liquid_2),np.std(times_liquid_2))
print("Always python for nlm:", np.average(times_pythonnlm_2),np.std(times_pythonnlm_2))
print("Always GPU for nlm", np.average(times_allgpu_2),np.std(times_allgpu_2))
print("Always unthreaded for nlm", np.average(times_untnlm_2),np.std(times_untnlm_2))

Image 5x500x500 (frame,row,col)
NLM patch size 5, patch distance 100, h 0.1, sigma 1
eSRRF default values ALWAYS on gpu
Liquid chooses (unthreaded) for nlm: 2.2918500333325937 0.008745049325195925
Always python for nlm: 215.3844068000326 0.5361846983394957
Always GPU for nlm 2.2884540000231937 0.015851810845416675
Always unthreaded for nlm 191.48296093331496 0.2219043828949791
