In [1]:
from nanopyx.liquid._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

## Load Data

In [2]:
from nanopyx.core.generate.noise_add_simplex import get_simplex_noise

image_vv_big = get_simplex_noise(300, 300, amplitude = 1000)
vv_large_data = np.tile(image_vv_big, (500, 1, 1))

image_v_big = get_simplex_noise(300, 300, amplitude = 1000)
v_large_data = np.tile(image_v_big, (100, 1, 1))

image_big = get_simplex_noise(300, 300, amplitude = 1000)
large_data = np.tile(image_big, (10, 1, 1))

image_medium = get_simplex_noise(100, 100, amplitude = 1000)
medium_data = np.tile(image_medium, (10, 1, 1))

image_small = get_simplex_noise(10, 10, amplitude = 1000)
small_data = np.tile(image_small, (10, 1, 1))

image_v_small = get_simplex_noise(10, 10, amplitude = 1000)
v_small_data = np.tile(image_v_small, (1, 1, 1))

reps = 10

In [3]:
crsm = CRShiftAndMagnify(testing=True)
# njit trigger early compilation
crsm.run(v_small_data,0,0,1,1, run_type="Numba")

Consider adding default arguments to the njit implementation to trigger early compilation


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


array([[[2214.6787, 4605.5874, 4156.2437, 4039.9307, 3984.6626,
         3948.8308, 3853.249 , 3634.205 , 3285.155 , 3015.2454],
        [3964.1003, 8241.088 , 7498.191 , 7452.3823, 7542.5317,
         7619.9653, 7498.283 , 7064.6445, 6356.009 , 5853.9946],
        [2466.4246, 5132.51  , 4801.351 , 5079.236 , 5477.8887,
         5776.529 , 5780.7603, 5430.5806, 4853.1606, 4571.2725],
        [1726.2405, 3611.4475, 3504.8176, 3931.6724, 4434.4424,
         4778.146 , 4786.045 , 4447.463 , 3962.6726, 3896.5344],
        [1503.0298, 3163.897 , 3146.1677, 3609.8596, 4068.1016,
         4296.4253, 4175.932 , 3761.3872, 3311.362 , 3382.6626],
        [1695.3105, 3578.3855, 3566.5615, 4050.064 , 4436.0283,
         4488.2676, 4162.534 , 3603.6897, 3109.1743, 3181.4912],
        [2037.2219, 4308.376 , 4304.1035, 4880.881 , 5310.3936,
         5302.211 , 4858.837 , 4200.178 , 3613.1885, 3555.706 ],
        [2233.5208, 4742.4727, 4821.199 , 5619.1304, 6298.542 ,
         6486.4795, 6167.139 , 55

In [4]:
df = pd.DataFrame(index=['Unthreaded','Threaded_dynamic','Threaded_static','Threaded_guided','Threaded','OpenCL_Apple M1 Pro','Numba','Python'])

In [5]:
magnification = 5
print("Dataset shape:", v_small_data.shape)

average_results = {}
for i in range(reps):
    interpolated = crsm.benchmark(v_small_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[v_small_data.shape] = average_results

Dataset shape: (1, 10, 10)
Fastest run type: Unthreaded
Slowest run type: Python
Unthreaded is 2.93x faster than Threaded_static
Unthreaded and Threaded_static have similar outputs!
Unthreaded is 5.79x faster than Threaded
Unthreaded and Threaded have similar outputs!
Unthreaded is 6.41x faster than Numba
Unthreaded and Numba have similar outputs!
Unthreaded is 17.51x faster than Threaded_dynamic
Unthreaded and Threaded_dynamic have similar outputs!
Unthreaded is 21.97x faster than Threaded_guided
Unthreaded and Threaded_guided have similar outputs!
Unthreaded is 209.93x faster than OpenCL_Apple M1 Pro
Unthreaded and OpenCL_Apple M1 Pro have similar outputs!
Unthreaded is 516.79x faster than Python
Unthreaded and Python have similar outputs!
Threaded_static is 1.98x faster than Threaded
Threaded_static and Threaded have similar outputs!
Threaded_static is 2.19x faster than Numba
Threaded_static and Numba have similar outputs!
Threaded_static is 5.98x faster than Threaded_dynamic
Thread

In [6]:
magnification = 5
print("Dataset shape:", small_data.shape)
average_results = {}
for i in range(reps):
    interpolated = crsm.benchmark(small_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[small_data.shape] = average_results

Dataset shape: (10, 10, 10)
Fastest run type: Unthreaded
Slowest run type: Python
Unthreaded is 2.02x faster than Numba
Unthreaded and Numba have similar outputs!
Unthreaded is 2.27x faster than Threaded_dynamic
Unthreaded and Threaded_dynamic have similar outputs!
Unthreaded is 2.43x faster than Threaded_guided
Unthreaded and Threaded_guided have similar outputs!
Unthreaded is 2.46x faster than Threaded_static
Unthreaded and Threaded_static have similar outputs!
Unthreaded is 2.47x faster than Threaded
Unthreaded and Threaded have similar outputs!
Unthreaded is 28.64x faster than OpenCL_Apple M1 Pro
Unthreaded and OpenCL_Apple M1 Pro have similar outputs!
Unthreaded is 508.38x faster than Python
Unthreaded and Python have similar outputs!
Numba is 1.12x faster than Threaded_dynamic
Numba and Threaded_dynamic have similar outputs!
Numba is 1.20x faster than Threaded_guided
Numba and Threaded_guided have similar outputs!
Numba is 1.22x faster than Threaded_static
Numba and Threaded_stat

In [7]:
crsm._run_types.pop('Python')

<bound method ShiftAndMagnify._run_python of <nanopyx.liquid._le_interpolation_catmull_rom.ShiftAndMagnify object at 0x1695339d0>>

In [8]:
magnification = 5
print("Dataset shape:", medium_data.shape)
average_results = {'Python':None}
for i in range(reps):
    interpolated = crsm.benchmark(medium_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[medium_data.shape] = average_results

Dataset shape: (10, 100, 100)
Fastest run type: OpenCL_Apple M1 Pro
Slowest run type: Unthreaded
OpenCL_Apple M1 Pro is 1.52x faster than Threaded_dynamic
OpenCL_Apple M1 Pro and Threaded_dynamic have similar outputs!
OpenCL_Apple M1 Pro is 1.64x faster than Threaded_guided
OpenCL_Apple M1 Pro and Threaded_guided have similar outputs!
OpenCL_Apple M1 Pro is 1.98x faster than Threaded_static
OpenCL_Apple M1 Pro and Threaded_static have similar outputs!
OpenCL_Apple M1 Pro is 2.17x faster than Threaded
OpenCL_Apple M1 Pro and Threaded have similar outputs!
OpenCL_Apple M1 Pro is 2.18x faster than Numba
OpenCL_Apple M1 Pro and Numba have similar outputs!
OpenCL_Apple M1 Pro is 6.02x faster than Unthreaded
OpenCL_Apple M1 Pro and Unthreaded have similar outputs!
Threaded_dynamic is 1.08x faster than Threaded_guided
Threaded_dynamic and Threaded_guided have similar outputs!
Threaded_dynamic is 1.31x faster than Threaded_static
Threaded_dynamic and Threaded_static have similar outputs!
Threa

In [9]:
magnification = 5
print("Dataset shape:", large_data.shape)
average_results = {'Python':None}
for i in range(reps):
    interpolated = crsm.benchmark(large_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[large_data.shape] = average_results

Dataset shape: (10, 300, 300)
Fastest run type: OpenCL_Apple M1 Pro
Slowest run type: Unthreaded
OpenCL_Apple M1 Pro is 3.15x faster than Threaded_dynamic
OpenCL_Apple M1 Pro and Threaded_dynamic have similar outputs!
OpenCL_Apple M1 Pro is 3.38x faster than Threaded_guided
OpenCL_Apple M1 Pro and Threaded_guided have similar outputs!
OpenCL_Apple M1 Pro is 4.16x faster than Threaded
OpenCL_Apple M1 Pro and Threaded have similar outputs!
OpenCL_Apple M1 Pro is 4.41x faster than Numba
OpenCL_Apple M1 Pro and Numba have similar outputs!
OpenCL_Apple M1 Pro is 4.55x faster than Threaded_static
OpenCL_Apple M1 Pro and Threaded_static have similar outputs!
OpenCL_Apple M1 Pro is 12.83x faster than Unthreaded
OpenCL_Apple M1 Pro and Unthreaded have similar outputs!
Threaded_dynamic is 1.07x faster than Threaded_guided
Threaded_dynamic and Threaded_guided have similar outputs!
Threaded_dynamic is 1.32x faster than Threaded
Threaded_dynamic and Threaded have similar outputs!
Threaded_dynamic i

In [10]:
magnification = 5
print("Dataset shape:", v_large_data.shape)
average_results = {'Python':None}
for i in range(reps):
    interpolated = crsm.benchmark(v_large_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[v_large_data.shape] = average_results

Dataset shape: (100, 300, 300)
Fastest run type: OpenCL_Apple M1 Pro
Slowest run type: Unthreaded
OpenCL_Apple M1 Pro is 2.91x faster than Threaded_dynamic
OpenCL_Apple M1 Pro and Threaded_dynamic have similar outputs!
OpenCL_Apple M1 Pro is 2.91x faster than Threaded_guided
OpenCL_Apple M1 Pro and Threaded_guided have similar outputs!
OpenCL_Apple M1 Pro is 3.47x faster than Threaded
OpenCL_Apple M1 Pro and Threaded have similar outputs!
OpenCL_Apple M1 Pro is 3.53x faster than Threaded_static
OpenCL_Apple M1 Pro and Threaded_static have similar outputs!
OpenCL_Apple M1 Pro is 4.23x faster than Numba
OpenCL_Apple M1 Pro and Numba have similar outputs!
OpenCL_Apple M1 Pro is 10.90x faster than Unthreaded
OpenCL_Apple M1 Pro and Unthreaded have similar outputs!
Threaded_dynamic is 1.00x faster than Threaded_guided
Threaded_dynamic and Threaded_guided have similar outputs!
Threaded_dynamic is 1.19x faster than Threaded
Threaded_dynamic and Threaded have similar outputs!
Threaded_dynamic 

In [11]:
magnification = 5
print("Dataset shape:", vv_large_data.shape)
average_results = {'Python':None}
for i in range(reps):
    interpolated = crsm.benchmark(vv_large_data,0,0,magnification,magnification)
    for result in interpolated:
        try:
            average_results[result[1]] = result[0] + average_results[result[1]]/reps
        except KeyError:
            average_results[result[1]] = result[0]/reps

print(average_results)
df[vv_large_data.shape] = average_results

Dataset shape: (500, 300, 300)
Fastest run type: OpenCL_Apple M1 Pro
Slowest run type: Unthreaded
OpenCL_Apple M1 Pro is 1.54x faster than Threaded_dynamic
OpenCL_Apple M1 Pro and Threaded_dynamic have similar outputs!
OpenCL_Apple M1 Pro is 1.57x faster than Threaded_guided
OpenCL_Apple M1 Pro and Threaded_guided have similar outputs!
OpenCL_Apple M1 Pro is 1.92x faster than Threaded_static
OpenCL_Apple M1 Pro and Threaded_static have similar outputs!
OpenCL_Apple M1 Pro is 1.95x faster than Threaded


In [None]:
df.to_csv('Benchmarks_datashape.csv')