In [1]:
from timeit import default_timer    
import numpy as np

from nanopyx.core.transform import NLMDenoising, eSRRF_ST

  cupy._util.experimental('cupyx.jit.rawkernel')


## Unthreaded faster than OpenCL

In [2]:
img = np.random.random((1,1000,1000)).astype(np.float32)
ps = 50
pd = 50
h = 0.1
sigma = 1.0

nlm = NLMDenoising(clear_benchmarks=True)
nlm._run_types.pop('OpenCL_Intel(R) UHD Graphics 770')
nlm._run_types.pop('Python')
nlm._run_types.pop('Threaded_dynamic')
nlm._run_types.pop('Threaded_guided')
nlm._run_types.pop('Threaded_static')

for i in range(3):
    _ = nlm.benchmark(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)



esrrf = eSRRF_ST(clear_benchmarks=True)
esrrf._run_types.pop('OpenCL_Intel(R) UHD Graphics 770')
esrrf._run_types.pop('Threaded_dynamic')
esrrf._run_types.pop('Threaded_guided')
esrrf._run_types.pop('Threaded_static')

for i in range(3):
    _ = esrrf.benchmark(img)

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 51.52080970001407 seconds
Agent: NLMDenoising using Threaded ran in 691.5242190000135 seconds
Agent: NLMDenoising using Unthreaded ran in 27.745663000037894 seconds
Fastest run type: Unthreaded
Slowest run type: Threaded
Unthreaded is 1.86x faster than OpenCL_NVIDIA GeForce RTX 4090
Unthreaded is 24.92x faster than Threaded
OpenCL_NVIDIA GeForce RTX 4090 is 13.42x faster than Threaded
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 51.49116460001096 seconds
Agent: NLMDenoising using Threaded ran in 695.4065490998328 seconds
Agent: NLMDenoising using Unthreaded ran in 27.880847699940205 seconds
Fastest run type: Unthreaded
Slowest run type: Threaded
Unthreaded is 1.85x faster than OpenCL_NVIDIA GeForce RTX 4090
Unthreaded is 24.94x faster than Threaded
OpenCL_NVIDIA GeForce RTX 4090 is 13.51x faster than Threaded
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 51.50801769993268 seconds
Agent:

In [3]:
times_allunt_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type="Unthreaded")
    esrrf.run(img, run_type="Unthreaded")
    times_allunt_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allunt_1))

Agent: NLMDenoising using Unthreaded ran in 32.720616000005975 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.0155423001851887 seconds
Agent: GradientRobertsCross using Unthreaded ran in 0.00662099989131093 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 4.316713099833578 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 4.147863100050017 seconds
Agent: RadialGradientConvergence using Unthreaded ran in 40.058085300028324 seconds
Agent: eSRRF_ST using Unthreaded ran in 49.66394250001758 seconds
Agent: NLMDenoising using Unthreaded ran in 27.97229290008545 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.0026229000650346 seconds
Agent: GradientRobertsCross using Unthreaded ran in 0.0022729001939296722 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 4.11598729994148 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 4.12739079981111 seconds
Agent: RadialGradientConvergence using Unthreaded ran i

In [4]:
times_allthr_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Threaded')
    esrrf.run(img, run_type="Threaded")
    times_allthr_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allthr_1))

Agent: NLMDenoising using Threaded ran in 719.536378499819 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.12065930012613535 seconds
Agent: GradientRobertsCross using Threaded ran in 0.003327899845317006 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.3577672999817878 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.35534730018116534 seconds
Agent: RadialGradientConvergence using Threaded ran in 2.6899519998114556 seconds
Agent: eSRRF_ST using Threaded ran in 3.6726496000774205 seconds
Agent: NLMDenoising using Threaded ran in 744.5416928001214 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.12480150000192225 seconds
Agent: GradientRobertsCross using Threaded ran in 0.0037072000559419394 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.4025940999854356 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.39131440012715757 seconds
Agent: RadialGradientConvergence using Threaded ran in 2.788020800100

In [5]:
times_allgpu_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='OpenCL_NVIDIA GeForce RTX 4090')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_allgpu_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allgpu_1))

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 51.65759970014915 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.25608200021088123 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 52.33022120012902 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.18321229983121157 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 51.824568999931216 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.18816679995507002 seconds
FINAL TIME: 52.15994170005433


In [6]:
times_liquid_1 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)
    esrrf.run(img)
    times_liquid_1.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_liquid_1))

Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 27.919292999897152 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.19796929997392 seconds
Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 28.110868399962783 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.19906010013073683 seconds
Querying the Agent...
Agent: NLMDenoising using Unthreaded ran in 27.863231600029394 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.20921640004962683 seconds
FINAL TIME: 28.18454696664897


In [7]:
print("Image 5x1000x1000 (frame,row,col)")
print("NLM patch size 50, patch distance 50, h 0.1, sigma 1")


print("Liquid chooses:", np.average(times_liquid_1),np.std(times_liquid_1))
print("Always unth:", np.average(times_allunt_1),np.std(times_allunt_1))
print("Always GPU for nlm", np.average(times_allgpu_1),np.std(times_allgpu_1))
print("Always thr", np.average(times_allthr_1),np.std(times_allthr_1))

Image 5x1000x1000 (frame,row,col)
NLM patch size 50, patch distance 50, h 0.1, sigma 1
Liquid chooses: 28.18454696664897 0.10311728945420798
Always unth: 79.11862650001422 2.3137894900869123
Always GPU for nlm 52.15994170005433 0.2696895934339224
Always thr 729.7135787665999 13.427899808435084


## OpenCL faster than Unthreaded

In [8]:
img = np.random.random((1,500,500)).astype(np.float32)
ps = 5
pd = 100
h = 0.1
sigma = 1.0

for i in range(3):
    _ = nlm.benchmark(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)
    
for i in range(3):
    _ = esrrf.benchmark(img)

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5070013001095504 seconds
Agent: NLMDenoising using Threaded ran in 12.163010700140148 seconds
Agent: NLMDenoising using Unthreaded ran in 39.0914115998894 seconds
Fastest run type: OpenCL_NVIDIA GeForce RTX 4090
Slowest run type: Unthreaded
OpenCL_NVIDIA GeForce RTX 4090 is 23.99x faster than Threaded
OpenCL_NVIDIA GeForce RTX 4090 is 77.10x faster than Unthreaded
Threaded is 3.21x faster than Unthreaded
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.6040031001903117 seconds
Agent: NLMDenoising using Threaded ran in 12.03970819991082 seconds
Agent: NLMDenoising using Unthreaded ran in 38.622796799987555 seconds
Fastest run type: OpenCL_NVIDIA GeForce RTX 4090
Slowest run type: Unthreaded
OpenCL_NVIDIA GeForce RTX 4090 is 19.93x faster than Threaded
OpenCL_NVIDIA GeForce RTX 4090 is 63.94x faster than Unthreaded
Threaded is 3.21x faster than Unthreaded
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX

In [9]:
times_allunt_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type="Unthreaded")
    esrrf.run(img, run_type="Unthreaded")
    times_allunt_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allunt_2))

Agent: NLMDenoising using Unthreaded ran in 38.36395670007914 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 0.2466012998484075 seconds
Agent: GradientRobertsCross using Unthreaded ran in 0.0007382000330835581 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.0054710998665541 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.003644100157544 seconds
Agent: RadialGradientConvergence using Unthreaded ran in 9.728363499976695 seconds
Agent: eSRRF_ST using Unthreaded ran in 12.07044030004181 seconds
Agent: NLMDenoising using Unthreaded ran in 38.55065259989351 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 0.24844160000793636 seconds
Agent: GradientRobertsCross using Unthreaded ran in 0.0008862998802214861 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.0012670001015067 seconds
Agent: ShiftMagnify_catmull_rom using Unthreaded ran in 1.0023387998808175 seconds
Agent: RadialGradientConvergence using Unthreaded

In [10]:
times_allthr_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='Threaded')
    esrrf.run(img, run_type="Threaded")
    times_allthr_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allthr_2))

Agent: NLMDenoising using Threaded ran in 12.213367700111121 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.04546610009856522 seconds
Agent: GradientRobertsCross using Threaded ran in 0.001340400194749236 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.11502430005930364 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.12193899997510016 seconds
Agent: RadialGradientConvergence using Threaded ran in 0.7014878999907523 seconds
Agent: eSRRF_ST using Threaded ran in 1.0839123001787812 seconds
Agent: NLMDenoising using Threaded ran in 12.380548099987209 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.03945759986527264 seconds
Agent: GradientRobertsCross using Threaded ran in 0.001790100010111928 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.11945969983935356 seconds
Agent: ShiftMagnify_catmull_rom using Threaded ran in 0.12271070014685392 seconds
Agent: RadialGradientConvergence using Threaded ran in 0.65378669

In [11]:
times_allgpu_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma, run_type='OpenCL_NVIDIA GeForce RTX 4090')
    esrrf.run(img, run_type="OpenCL_NVIDIA GeForce RTX 4090")
    times_allgpu_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_allgpu_2))

Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5269965999759734 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1299916000571102 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.49035510001704097 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.12603700021281838 seconds
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.4812241999898106 seconds
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.11636889982037246 seconds
FINAL TIME: 0.6397889334087571


In [12]:
times_liquid_2 = []
for _ in range(3):
    t1 = default_timer()
    nlm.run(img,patch_size=ps,patch_distance=pd,h=h,sigma=sigma)
    esrrf.run(img)
    times_liquid_2.append(default_timer()-t1)

print("FINAL TIME:", np.average(times_liquid_2))

Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5113912001252174 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1354211000725627 seconds
Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.5252174001652747 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.1293897998984903 seconds
Querying the Agent...
Agent: NLMDenoising using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.4848659001290798 seconds
Querying the Agent...
Agent: eSRRF_ST using OpenCL_NVIDIA GeForce RTX 4090 ran in 0.11458220006898046 seconds
FINAL TIME: 0.6599667666790386


In [13]:
print("Image 1x500x500 (frame,row,col)")
print("NLM patch size 5, patch distance 100, h 0.1, sigma 1")


print("Liquid chooses:", np.average(times_liquid_2),np.std(times_liquid_2))
print("Always unth:", np.average(times_allunt_2),np.std(times_allunt_2))
print("Always GPU for nlm", np.average(times_allgpu_2),np.std(times_allgpu_2))
print("Always thr", np.average(times_allthr_2),np.std(times_allthr_2))

Image 1x500x500 (frame,row,col)
NLM patch size 5, patch distance 100, h 0.1, sigma 1
Liquid chooses: 0.6599667666790386 0.022342916413642114
Always unth: 50.56203823342609 0.08900858575940253
Always GPU for nlm 0.6397889334087571 0.01732282451199119
Always thr 13.412811300018802 0.0829010514229891
