# First steps with Numba

In [1]:
import numba as nb
import numpy as np

In [2]:
@nb.jit(nopython=True)
def sum_sq(a):
    N = len(a)
    
    result = 0
    for i in range(N):
        result += a[i] ** 2
    
    return result

In [3]:
x = np.random.rand(10000)

In [4]:
%timeit sum_sq.py_func(x)

4.3 ms ± 81.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
%timeit sum_sq(x)

12.8 µs ± 5.41 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%timeit (x**2).sum()

9.44 µs ± 93.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
x_list = x.tolist()

## Type specializations

In [8]:
@nb.jit(nopython=True)
def sum_sq(a):
    N = len(a)
    
    result = 0
    for i in range(N):
        result += a[i] ** 2
    
    return result

In [9]:
sum_sq.signatures

[]

In [10]:
x = np.random.rand(1000).astype('float64')
sum_sq(x)
sum_sq.signatures

[(array(float64, 1d, C),)]

In [12]:
x = np.random.rand(10000).astype('float32')
sum_sq(x)
sum_sq.signatures

[(array(float64, 1d, C),), (array(float32, 1d, C),)]

In [13]:
@nb.jit((nb.float64[:],))
def sum_sq(a):
    N = len(a)
    
    result = 0
    for i in range(N):
        result += a[i] ** 2
    
    return result

In [14]:
sum_sq(x.astype('float32'))

TypeError: No matching definition for argument type(s) array(float32, 1d, C)

In [19]:
@nb.jit(['float64(float64[:])',
         'float32(float32[:])'])
def sum_sq(a):
    N = len(a)
    
    result = 0
    for i in range(N):
        result += a[i] ** 2
    
    return result

In [20]:
sum_sq.signatures

[(array(float64, 1d, A),), (array(float32, 1d, A),)]

In [21]:
sum_sq.inspect_types()

sum_sq (array(float64, 1d, A),)
--------------------------------------------------------------------------------
# File: <ipython-input-19-4687c4bff0ac>
# --- LINE 1 --- 

@nb.jit(['float64(float64[:])',

         # --- LINE 2 --- 

         'float32(float32[:])'])

# --- LINE 3 --- 

def sum_sq(a):

    # --- LINE 4 --- 
    # label 0
    #   a = arg(0, name=a)  :: array(float64, 1d, A)
    #   $2load_global.0 = global(len: <built-in function len>)  :: Function(<built-in function len>)
    #   N = call $2load_global.0(a, func=$2load_global.0, args=[Var(a, <ipython-input-19-4687c4bff0ac>:4)], kws=(), vararg=None)  :: (array(float64, 1d, A),) -> int64
    #   del $2load_global.0

    N = len(a)

# --- LINE 5 --- 



    # --- LINE 6 --- 
    #   result = const(int, 0)  :: Literal[int](0)

    result = 0

    # --- LINE 7 --- 
    #   $14load_global.4 = global(range: <class 'range'>)  :: Function(<class 'range'>)
    #   $18call_function.6 = call $14load_global.4(N, func=$14load_global.4

In [22]:
@nb.jit
def concatenate(strings):
    result = ''
    for s in strings:
        result += s
    return result

In [23]:
concatenate(['hello', 'world'])
concatenate.signatures
concatenate.inspect_types()

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'strings' of function 'concatenate'.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "<ipython-input-22-52e1d864c8e7>", line 2:
@nb.jit
def concatenate(strings):
^



concatenate (reflected list(unicode_type)<iv=None>,)
--------------------------------------------------------------------------------
# File: <ipython-input-22-52e1d864c8e7>
# --- LINE 1 --- 

@nb.jit

# --- LINE 2 --- 

def concatenate(strings):

    # --- LINE 3 --- 
    # label 0
    #   strings = arg(0, name=strings)  :: reflected list(unicode_type)<iv=None>
    #   result = const(str, )  :: Literal[str]()

    result = ''

    # --- LINE 4 --- 
    #   $8get_iter.2 = getiter(value=strings)  :: iter(reflected list(unicode_type)<iv=None>)
    #   del strings
    #   $phi10.0 = $8get_iter.2  :: iter(reflected list(unicode_type)<iv=None>)
    #   del $8get_iter.2
    #   jump 10
    # label 10
    #   result.2 = phi(incoming_values=[Var(result, <ipython-input-22-52e1d864c8e7>:3), Var(result.1, <ipython-input-22-52e1d864c8e7>:5)], incoming_blocks=[0, 12])  :: unicode_type
    #   del result.1
    #   $10for_iter.1 = iternext(value=$phi10.0)  :: pair<unicode_type, bool>
    #   $10for_i

In [24]:
x = ['hello'] * 1000
%timeit concatenate.py_func(x)

81.9 µs ± 1.25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [25]:
%timeit concatenate(x)

1.27 ms ± 23.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Ufuncs and Gufuncs

In [21]:
# Numpy vectorization
@np.vectorize
def cantor_py(a, b):
    return  int(0.5 * (a + b)*(a + b + 1) + b)

In [22]:
cantor_py(np.array([1, 2]), 2)

array([ 8, 12])

In [23]:
# Numba vectorization
@nb.vectorize
def cantor(a, b):
    return  int(0.5 * (a + b)*(a + b + 1) + b)

In [24]:
cantor(np.array([1, 2]), 2)

array([ 8, 12])

In [25]:
x1 = np.random.rand(10000)
x2 = np.random.rand(10000)

In [26]:
%timeit cantor_py(x1, x2)

2.4 ms ± 23.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [27]:
%timeit cantor(x1, x2)

9.1 µs ± 204 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [28]:
%timeit (0.5 * (x1 + x2)*(x1 + x2 + 1) + x2).astype(int)

33.2 µs ± 1.12 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


# Matrix multiplication

In [29]:
a = np.random.rand(3, 3)
b = np.random.rand(3, 3)

c = np.matmul(a, b)
c.shape

(3, 3)

In [30]:
a = np.random.rand(10, 3, 3)
b = np.random.rand(10, 3, 3)

c = np.matmul(a, b)
c.shape

np.allclose(np.matmul(a[0], b[0]), c[0])

True

In [31]:
a = np.random.rand(10, 3, 3)
b = np.random.rand(3, 3)
c = np.matmul(a, b)
c.shape

(10, 3, 3)

In [32]:
@nb.guvectorize(['float64[:], float64[:], float64[:]'], '(n), (n) -> ()')
def euclidean(a, b, out):
    N = a.shape[0]
    out[0] = 0
    for i in range(N):
        out[0] += (a[i] - b[i])**2    

In [33]:
a = np.random.rand(2)
b = np.random.rand(2)
c = euclidean(a, b)

a = np.random.rand(10, 2)
b = np.random.rand(10, 2)
c = euclidean(a, b)

a = np.random.rand(10, 2)
b = np.random.rand(2)
c = euclidean(a, b)

In [34]:
a = np.random.rand(10000, 2)
b = np.random.rand(10000, 2)

In [35]:
%timeit ((a - b)**2).sum(axis=1)

153 µs ± 13.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [36]:
%timeit euclidean(a, b)

47.1 µs ± 3.19 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [37]:
np.allclose(((a - b)**2).sum(axis=1), euclidean(a, b))

True

# JIT classes

In [38]:
class Node:
    def __init__(self, value):
        self.next = None
        self.value = value


class LinkedList:
    def __init__(self):
        self.head = None
    
    def push_front(self, value):
        if self.head == None:
            self.head = Node(value)
        else:
            # We replace the head
            new_head = Node(value)
            new_head.next = self.head
            self.head = new_head
    
    def show(self):
        node = self.head
        while node is not None:
            print(node.value)
            node = node.next
@nb.jit   
def sum_list(lst):
    result = 0
    node = lst.head
    while node is not None:
        result += node.value
        node = node.next
    return result
        
lst = LinkedList()
lst.push_front(1)
lst.push_front(2)
lst.push_front(3)
lst.show()

sum_list(lst)

3
2
1


Compilation is falling back to object mode WITH looplifting enabled because Function "sum_list" failed type inference due to: non-precise type pyobject
During: typing of argument at <ipython-input-38-caf8ec740e10> (27)

File "<ipython-input-38-caf8ec740e10>", line 27:
def sum_list(lst):
    result = 0
    ^

  @nb.jit

File "<ipython-input-38-caf8ec740e10>", line 26:
@nb.jit   
def sum_list(lst):
^

Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-38-caf8ec740e10>", line 26:
@nb.jit   
def sum_list(lst):
^



6

In [39]:
lst = LinkedList()
[lst.push_front(i) for i in range(10000)]

%timeit sum_list(lst)
%timeit sum_list.py_func(lst)

1.73 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.01 ms ± 175 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [41]:
from numba.experimental import jitclass

In [42]:
node_type = nb.deferred_type()

node_spec = [
    ('next', nb.optional(node_type)),
    ('value', nb.int64)
]

@jitclass(node_spec)
class Node:
    def __init__(self, value):
        self.next = None
        self.value = value

node_type.define(Node.class_type.instance_type)


ll_spec = [
    ('head', nb.optional(Node.class_type.instance_type))
]

@jitclass(ll_spec)
class LinkedList:
    def __init__(self):
        self.head = None
    
    def push_front(self, value):
        if self.head is None:
            self.head = Node(value)
        else:
            # We replace the head
            new_head = Node(value)
            new_head.next = self.head
            self.head = new_head
    
    def show(self):
        node = self.head
        while node is not None:
            print(node.value)
            node = node.next


lst = LinkedList()
lst.push_front(1)
lst.push_front(2)
lst.push_front(3)
lst.show()

3
2
1


In [43]:
lst = LinkedList()
[lst.push_front(i) for i in range(10000)]

%timeit sum_list(lst)
%timeit sum_list.py_func(lst)

106 µs ± 2.64 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.42 ms ± 51.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Tips and Tricks

In [44]:
a = [[0, 1, 2], 
      [3, 4], 
      [5, 6, 7, 8]]

@nb.jit
def sum_sublists(a):
    result = [0]

    for sublist in a:
        result.append(sum(sublist))
    
    return result[1:]

sum_sublists(a)

Compilation is falling back to object mode WITH looplifting enabled because Function "sum_sublists" failed type inference due to: Untyped global name 'sum': Cannot determine Numba type of <class 'builtin_function_or_method'>

File "<ipython-input-44-804026218b99>", line 10:
def sum_sublists(a):
    <source elided>
    for sublist in a:
        result.append(sum(sublist))
        ^

  @nb.jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "sum_sublists" failed type inference due to: Cannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>

File "<ipython-input-44-804026218b99>", line 9:
def sum_sublists(a):
    <source elided>

    for sublist in a:
    ^

  @nb.jit

File "<ipython-input-44-804026218b99>", line 7:
def sum_sublists(a):
    result = [0]
    ^

Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.py

[3, 7, 26]