In [3]:
def iterate(n):
    a = 0
    for i in range(n):
        a += 1
    return a

## Ballpark estimate

In [1]:
print(f"{1_000_000 * 3 / 2 * 0.3e-9 * 1e3} ms")

0.45 ms


## Measurement

In [4]:
%timeit iterate(1_000_000)

28.3 ms ± 428 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
30e-3 / 1000000

## Numpy

In [5]:
import numpy as np

In [6]:
array = np.ones(1000000)

In [7]:
array.sum()

1000000.0

In [8]:
%timeit np.ones(1000000).sum()

511 µs ± 34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
%timeit np.ones(1000000)

In [None]:
%timeit array.sum()

## Numba

In [10]:
import numba as nb

In [11]:
iterate_numba = nb.njit(iterate)

In [13]:
%timeit iterate_numba(1_000_000_000)

158 ns ± 2.2 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [14]:
print(iterate_numba.inspect_asm()[nb.int64,])

	.text
	.file	"<string>"
	.globl	_ZN8__main__11iterate$241Ex
	.p2align	4, 0x90
	.type	_ZN8__main__11iterate$241Ex,@function
_ZN8__main__11iterate$241Ex:
	movq	%rdx, %rax
	sarq	$63, %rax
	andnq	%rdx, %rax, %rax
	movq	%rax, (%rdi)
	xorl	%eax, %eax
	retq
.Lfunc_end0:
	.size	_ZN8__main__11iterate$241Ex, .Lfunc_end0-_ZN8__main__11iterate$241Ex

	.globl	_ZN7cpython8__main__11iterate$241Ex
	.p2align	4, 0x90
	.type	_ZN7cpython8__main__11iterate$241Ex,@function
_ZN7cpython8__main__11iterate$241Ex:
	.cfi_startproc
	pushq	%r14
	.cfi_def_cfa_offset 16
	pushq	%rbx
	.cfi_def_cfa_offset 24
	subq	$24, %rsp
	.cfi_def_cfa_offset 48
	.cfi_offset %rbx, -24
	.cfi_offset %r14, -16
	movq	%rsi, %rdi
	movabsq	$.const.iterate, %rsi
	movabsq	$PyArg_UnpackTuple, %rbx
	leaq	16(%rsp), %r8
	movl	$1, %edx
	movl	$1, %ecx
	xorl	%eax, %eax
	callq	*%rbx
	movq	$0, 8(%rsp)
	testl	%eax, %eax
	je	.LBB1_1
	movabsq	$_ZN08NumbaEnv8__main__11iterate$241Ex, %rax
	cmpq	$0, (%rax)
	je	.LBB1_4
	movq	16(%rsp), %rdi
	movabsq	$PyNumber

In [15]:
@nb.njit()
def mysum(array):
    result = 0
    for a in array:
        result += a
    return result

In [16]:
%timeit mysum(array)

1.03 ms ± 6.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [17]:
@nb.njit()
def mysum_2(array):
    N = len(array)
    i0 = 0
    i1 = N // 2
    s0 = 0
    s1 = 0
    result = 0
    while i0 < N // 2:
        s0 += array[i0]
        s1 += array[i1]
        i0 += 1
        i1 += 1
    
    return s1 + s0

In [18]:
%timeit mysum_2(array)

548 µs ± 16.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
