In [1]:
import pyclesperanto as cle
import numpy as np

# Custom kernel operation

Several algorithms are already present in the library but you may want to perform more specific task or develop your own kernel operations. clEsperanto provides the functions `native_execute` to run OpenCL C code directly.

In [2]:
cle.native_execute?

[31mSignature:[39m
cle.native_execute(
    anchor=[38;5;28;01mNone[39;00m,
    kernel_source: str = [33m''[39m,
    kernel_name: str = [33m''[39m,
    global_size: tuple = ([32m1[39m, [32m1[39m, [32m1[39m),
    local_size: tuple = ([32m1[39m, [32m1[39m, [32m1[39m),
    parameters: dict = {},
    device: pyclesperanto._pyclesperanto._Device = [38;5;28;01mNone[39;00m,
)
[31mDocstring:[39m
Execute an OpenCL kernel from a file or a string

Call, build, and execute a kernel compatible with OpenCL language.
The kernel can be called from a file or a string.

The parameters must still be passed as a dictionary with the correct types and order.
Buffer parameters must be passed as Array objects. Scalars must be passed as Python native float or int.


Parameters
----------
anchor : str, default = '__file__'
    Enter __file__ when calling this method and the corresponding open.cl
    file lies in the same folder as the python file calling it.
    Ignored if kernel_source i

## Native OpenCL Kernel

```c
__kernel void add_arrays(__global float* a, __global float* b, __global float* output, int size) {
    int x = get_global_id(0); // Global ID in the 1st dimension

    if (x < size) {
        output[x] = a[x] + b[x];
    }
}
```

In [3]:
a = cle.push(np.ones(10))
b = cle.push(np.ones(10) * 2)
output = cle.create(a.shape)

kernel_source = """
__kernel void add_arrays(__global float* a, __global float* b, __global float* output) {
    int x = get_global_id(0);
    output[x] = a[x] + b[x];
}"""

kernel_name = "add_arrays"  # Must match the kernel name in the source !

parameters = { # keys must match the kernel arguments name, type, and order in the source !
    "a": a,
    "b": b,
    "output": output
}

cle.native_execute(
    kernel_source=kernel_source,
    kernel_name=kernel_name,
    parameters=parameters,
    global_size=a.size          # should correspond to the number of work items (e.g. pixels)
)

print(f"{a} + \n{b} = \n{output}")

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + 
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.] = 
[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]


## CLIJ-OpenCL Kernel

```c
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void add_arrays(IMAGE_a_TYPE a, IMAGE_b_TYPE b, IMAGE_output_TYPE output) {
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);
    
    IMAGE_a_PIXEL_TYPE value_a = READ_IMAGE(a, sampler, POS_a_INSTANCE(x,y,z,0)).x;
    IMAGE_b_PIXEL_TYPE value_b = READ_IMAGE(b, sampler, POS_b_INSTANCE(x,y,z,0)).x;
    WRITE_IMAGE(output, POS_output_INSTANCE(x,y,z,0), CONVERT_output_PIXEL_TYPE(value_a + value_b));
}
```

This introduce placeholder to adapte the kernel to the data type and dimensionality and to facilitate algorithms

In [6]:
a = cle.push(np.ones(10))
b = cle.push(np.ones(10) * 2)
output = cle.create(a.shape)

kernel_source = """
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void add_arrays(IMAGE_a_TYPE a, IMAGE_b_TYPE b, IMAGE_output_TYPE output) {
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);
    
    IMAGE_a_PIXEL_TYPE value_a = READ_IMAGE(a, sampler, POS_a_INSTANCE(x,y,z,0)).x;
    IMAGE_b_PIXEL_TYPE value_b = READ_IMAGE(b, sampler, POS_b_INSTANCE(x,y,z,0)).x;
    WRITE_IMAGE(output, POS_output_INSTANCE(x,y,z,0), CONVERT_output_PIXEL_TYPE(value_a + value_b));
}"""

kernel_name = "add_arrays"  # Must match the kernel name in the source !

parameters = { # keys must match the kernel arguments name, type, and order in the source !
    "a": a,
    "b": b,
    "output": output
}

cle.execute(
    kernel_source=kernel_source,
    kernel_name=kernel_name,
    parameters=parameters,
    global_size=a.shape,  # should correspond to the number of work items (e.g. pixels)
)

print(f"{a} + \n{b} = \n{output}")

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + 
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.] = 
[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]
