In [None]:
import pyclesperanto as cle
import numpy as np

# Custom kernel operation

Several algorithms are already present in the library but you may want to perform more specific task or develop your own kernel operations. clEsperanto provides the functions `native_execute` to run OpenCL C code directly.

In [None]:
cle.native_execute?

## Native OpenCL Kernel

```c
__kernel void add_arrays(__global float* a, __global float* b, __global float* output, int size) {
    int x = get_global_id(0); // Global ID in the 1st dimension

    if (x < size) {
        output[x] = a[x] + b[x];
    }
}
```

In [None]:
a = cle.push(np.ones(10))
b = cle.push(np.ones(10) * 2)
output = cle.create(a.shape)

kernel_source = """
__kernel void add_arrays(__global float* a, __global float* b, __global float* output) {
    int x = get_global_id(0);
    output[x] = a[x] + b[x];
}"""

kernel_name = "add_arrays"  # Must match the kernel name in the source !

parameters = { # keys must match the kernel arguments name, type, and order in the source !
    "a": a,
    "b": b,
    "output": output
}

cle.native_execute(
    kernel_source=kernel_source,
    kernel_name=kernel_name,
    parameters=parameters,
    global_size=a.size          # should correspond to the number of work items (e.g. pixels)
)

print(f"{a} + \n{b} = \n{output}")

## CLIJ-OpenCL Kernel

```c
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void add_arrays(IMAGE_a_TYPE a, IMAGE_b_TYPE b, IMAGE_output_TYPE output) {
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);
    
    IMAGE_a_PIXEL_TYPE value_a = READ_IMAGE(a, sampler, POS_a_INSTANCE(x,y,z,0)).x;
    IMAGE_b_PIXEL_TYPE value_b = READ_IMAGE(b, sampler, POS_b_INSTANCE(x,y,z,0)).x;
    WRITE_IMAGE(output, POS_output_INSTANCE(x,y,z,0), CONVERT_output_PIXEL_TYPE(value_a + value_b));
}
```

This introduce placeholder to adapte the kernel to the data type and dimensionality and to facilitate algorithms

In [None]:
a = cle.push(np.ones(10))
b = cle.push(np.ones(10) * 2)
output = cle.create(a.shape)

kernel_source = """
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void add_arrays(IMAGE_a_TYPE a, IMAGE_b_TYPE b, IMAGE_output_TYPE output) {
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);
    
    IMAGE_a_PIXEL_TYPE value_a = READ_IMAGE(a, sampler, POS_a_INSTANCE(x,y,z,0)).x;
    IMAGE_b_PIXEL_TYPE value_b = READ_IMAGE(b, sampler, POS_b_INSTANCE(x,y,z,0)).x;
    WRITE_IMAGE(output, POS_output_INSTANCE(x,y,z,0), CONVERT_output_PIXEL_TYPE(value_a + value_b));
}"""

kernel_name = "add_arrays"  # Must match the kernel name in the source !

parameters = { # keys must match the kernel arguments name, type, and order in the source !
    "a": a,
    "b": b,
    "output": output
}

cle.execute(
    kernel_source=kernel_source,
    kernel_name=kernel_name,
    parameters=parameters,
    global_size=a.shape,  # should correspond to the number of work items (e.g. pixels)
)

print(f"{a} + \n{b} = \n{output}")

* `IMAGE_xxx_TYPE` place holder for global array
* `IMAGE_xxx_PIXEL_TYPE` place holder for an array pixel type
* `POS_xxx_INSTANCE` place holder for coordinate system of an array
* `READ_IMAGE( <image>, <sampler>, <position> )` & `WRITE_IMAGE( <image>, <position>, <value> )` acces function for read/write on array
* `CONVERT_xxx_PIXEL_TYPE()` value convertor 