<a href="https://colab.research.google.com/github/Arul1996/Arul1996/blob/main/cupy_ElementwiseKernel_Examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. To Run the notebook open it in playground mode and 
2. Don't forget to turn on the GPU from *Runtime -> Change runtime type*

In [1]:
import cupy as cp

# Non-generic kernel

In [2]:
squared_diff = cp.ElementwiseKernel(
   'int64 x, int64 y',        # input argument list
   'int64 z',                 # output argument list
   'z = (x - y) * (x - y)',   # loop body code
   'squared_diff')            # kernel name

x = cp.array([1, 2, 3, 4])
y = cp.array([5, 4, 7, 8])

squared_diff(x, y)

array([16,  4, 16, 16])

#Generic kernel

In [3]:
elemetwise_addition = cp.ElementwiseKernel(
   'S x, T y',             # inputs, S and T are datatypes
   'S z',                  # datatype of z will be the same as x
   'z = (x + (S) y)',      # y is typecasted to S(the type of x)
   'elemetwise_addition')  # kernel name

x = cp.array([1.5, 2.5, 3.5, 4.5])
y = cp.array([5, 4, 7, 8])

elemetwise_addition(x, y)

array([ 6.5,  6.5, 10.5, 12.5])

# Raw arguments (indexing inside the kernel)

In [4]:
indexwise_rearange = cp.ElementwiseKernel(
   'S x, raw T y',                 # y has been specified as 'raw'
   'S z',                                                           
   'z = y[x]',                     # x is looped elementwise and 
   'indexwise_rearange')           #     the y is indexed by the   
                                   #     corrsponding element of x

x = cp.array([2, 3, 1, 0])         # x[i] belongs to [0, len(y) - 1]
y = cp.array([5, 4, 7, 8])         # For this particular example

indexwise_rearange(x, y)           # Elements of y are rearranged
                                   # according to the indeices 
                                   # specified in x

array([7, 8, 4, 5])

# Scalar arguments

In [8]:
vector_scalar_mult = cp.ElementwiseKernel(
   'int64 x, float64 y',
   'float64 z',                                                    
   'z = x * y',
   'vector_scalar_mult')

x = cp.array([2, 3, 1, 0])

vector_scalar_mult(x, 2.0)       # y is a scalar

x

array([2, 3, 1, 0])

# passing output arguments by reference

In [None]:
vector_scalar_mult = cp.ElementwiseKernel(
   'float64 y',        
   'S x',                    # x will be updated by the kernel                                
   'x = x * y',
   'vector_scalar_mult')

x = cp.array([2, 3, 1, 0])

vector_scalar_mult(2.0, x)   # x is passed as an output argument

x                            # updated x

array([4, 6, 2, 0])