[Reference](https://python.plainenglish.io/using-numba-for-blazing-performance-in-python-656e8e32f8c)

In [1]:
pip install numba



In [2]:
import numpy as np
size = int(5e7)
array = np.arange(size).astype(np.float32)

In [3]:
def function_with_normal_loop(array):
    out = []
    for i in array:
        out.append(sqrt(i))
    return out

In [4]:
def function_with_list_comprehension(array):
    return [sqrt(x) for x in array]

In [5]:
def function_with_map(array):
    return list(map(sqrt, array))

In [6]:
from numba import jit

In [7]:
@jit(nopython=True)
def function_with_normal_loop(array):
    out = []
    for i in array:
        out.append(sqrt(i))
    return out

In [8]:
@jit(nopython=True)
def function_with_list_comprehension(array):
    return [sqrt(x) for x in array]

In [9]:
@jit(nopython=True)
def function_with_map(array):
    return list(map(sqrt, array))

In [10]:
from numba import vectorize

In [11]:
@vectorize
def function_with_vectorize(elem):
    return sqrt(elem)

In [13]:
@vectorize(['float32(float32)'])
def function_with_vectorize(elem):
    return sqrt(elem)

Compilation is falling back to object mode WITHOUT looplifting enabled because Function "function_with_vectorize" failed type inference due to: NameError: name 'sqrt' is not defined
  @vectorize(['float32(float32)'])

File "<ipython-input-13-dbc7301b15b1>", line 2:
@vectorize(['float32(float32)'])
def function_with_vectorize(elem):
^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-13-dbc7301b15b1>", line 2:
@vectorize(['float32(float32)'])
def function_with_vectorize(elem):
^

  state.func_ir.loc))


In [14]:
from numba import cuda

In [15]:
@cuda.jit
def normal_function(array, out):
    idx = cuda.grid(1)
    out[idx] = sqrt(array[idx])d_a = cuda.to_device(array)
d_out = cuda.device_array_like(d_a)
blocks_per_grid = 32
threads_per_block = 128
normal_function[blocks_per_grid, threads_per_block](d_a, d_out)
print(d_out.copy_to_host())

In [17]:
@vectorize(['float32(float32)'], target='cuda')
def vectorize_with_cuda(elem, ):
    return sqrt(elem)