# Chapter 19: Code optimization

Robert Johansson

Source code listings for [Numerical Python - A Practical Techniques Approach for Industry](http://www.apress.com/9781484205549) (ISBN 978-1-484205-54-9).

The source code listings can be downloaded from http://www.apress.com/9781484205549

In [None]:
import numba

In [None]:
import pyximport

In [None]:
import cython

In [None]:
import numpy as np

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Numba

In [None]:
np.random.seed(0)

In [None]:
data = np.random.randn(50000)

In [None]:
def py_sum(data):
    s = 0
    for d in data:
        s += d
    return s

In [None]:
def py_cumsum(data):
    out = np.zeros(len(data), dtype=np.float64)
    s = 0 
    for n in range(len(data)):
        s += data[n]
        out[n] = s

    return out

In [None]:
%timeit py_sum(data)

In [None]:
assert abs(py_sum(data) - np.sum(data)) < 1e-10

In [None]:
%timeit np.sum(data)

In [None]:
%timeit py_cumsum(data)

In [None]:
assert np.allclose(np.cumsum(data), py_cumsum(data))

In [None]:
%timeit np.cumsum(data)

In [None]:
@numba.jit
def jit_sum(data):
    s = 0 
    for d in data:
        s += d

    return s

In [None]:
assert abs(jit_sum(data) - np.sum(data)) < 1e-10

In [None]:
%timeit jit_sum(data)

In [None]:
jit_cumsum = numba.jit()(py_cumsum)

In [None]:
assert np.allclose(np.cumsum(data), jit_cumsum(data))

In [None]:
%timeit jit_cumsum(data)

## Julia fractal

In [None]:
def py_julia_fractal(z_re, z_im, j):
    for m in range(len(z_re)):
        for n in range(len(z_im)):
            z = z_re[m] + 1j * z_im[n]
            for t in range(256):
                z = z ** 2 - 0.05 + 0.68j
                if np.abs(z) > 2.0:
                #if (z.real * z.real + z.imag * z.imag) > 4.0:  # a bit faster
                    j[m, n] = t
                    break

In [None]:
jit_julia_fractal = numba.jit(nopython=True)(py_julia_fractal)

In [None]:
N = 1024
j = np.zeros((N, N), np.int64)
z_real = np.linspace(-1.5, 1.5, N)
z_imag = np.linspace(-1.5, 1.5, N)

In [None]:
jit_julia_fractal(z_real, z_imag, j)

In [None]:
fig, ax = plt.subplots(figsize=(14, 14))
ax.imshow(j, cmap=plt.cm.RdBu_r,
          extent=[-1.5, 1.5, -1.5, 1.5])
ax.set_xlabel("$\mathrm{Re}(z)$", fontsize=18)
ax.set_ylabel("$\mathrm{Im}(z)$", fontsize=18)
fig.tight_layout()
fig.savefig("ch19-numba-julia-fractal.pdf")

In [None]:
%timeit py_julia_fractal(z_real, z_imag, j)

In [None]:
%timeit jit_julia_fractal(z_real, z_imag, j)

## Vectorize

In [None]:
def py_Heaviside(x):
    if x == 0.0:
        return 0.5
    
    if x < 0.0:
        return 0.0
    else:
        return 1.0

In [None]:
x = np.linspace(-2, 2, 50001)

In [None]:
%timeit [py_Heaviside(xx) for xx in x]

In [None]:
np_vec_Heaviside = np.vectorize(py_Heaviside)

In [None]:
np_vec_Heaviside(x)

In [None]:
%timeit np_vec_Heaviside(x)

In [None]:
def np_Heaviside(x):
    return (x > 0.0) + (x == 0.0)/2.0

In [None]:
%timeit np_Heaviside(x)

In [None]:
@numba.vectorize([numba.float32(numba.float32),
                  numba.float64(numba.float64)])
def jit_Heaviside(x):
    if x == 0.0:
        return 0.5
    
    if x < 0:
        return 0.0
    else:
        return 1.0

In [None]:
%timeit jit_Heaviside(x)

In [None]:
jit_Heaviside([-1, -0.5, 0.0, 0.5, 1.0])

# Cython

In [None]:
!rm cy_sum.*

In [None]:
%%writefile cy_sum.pyx

def cy_sum(data):
    s = 0.0
    for d in data:
        s += d
    return s

In [None]:
!cython cy_sum.pyx

In [None]:
# 5 lines of python code -> 1470 lines of C code ...
!wc cy_sum.c

In [None]:
%%writefile setup.py

from distutils.core import setup
from Cython.Build import cythonize

import numpy as np
setup(ext_modules=cythonize('cy_sum.pyx'),
      include_dirs=[np.get_include()],
      requires=['Cython', 'numpy'] )

In [None]:
!python setup.py build_ext --inplace > /dev/null

In [None]:
from cy_sum import cy_sum

In [None]:
cy_sum(data)

In [None]:
%timeit cy_sum(data)

In [None]:
%timeit py_sum(data)

In [None]:
%%writefile cy_cumsum.pyx

cimport numpy
import numpy

def cy_cumsum(data):
    out = numpy.zeros_like(data)
    s = 0 
    for n in range(len(data)):
        s += data[n]
        out[n] = s

    return out

In [None]:
pyximport.install(setup_args={'include_dirs': np.get_include()});

In [None]:
pyximport.install(setup_args=dict(include_dirs=np.get_include()));

In [None]:
from cy_cumsum import cy_cumsum

In [None]:
%timeit cy_cumsum(data)

In [None]:
%timeit py_cumsum(data)

## Using IPython cython command

In [None]:
%load_ext cython

In [None]:
%%cython -a
def cy_sum(data):
    s = 0.0
    for d in data:
        s += d
    return s

In [None]:
%timeit cy_sum(data)

In [None]:
%timeit py_sum(data)

In [None]:
assert np.allclose(np.sum(data), cy_sum(data))

In [None]:
%%cython -a
cimport numpy
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
def cy_sum(numpy.ndarray[numpy.float64_t, ndim=1] data):
    cdef numpy.float64_t s = 0.0
    #cdef int n, N = data.shape[0]
    cdef int n, N = len(data)
    for n in range(N):
        s += data[n]
    return s

In [None]:
%timeit cy_sum(data)

In [None]:
%timeit jit_sum(data)

In [None]:
%timeit np.sum(data)

## Cummulative sum

In [None]:
%%cython -a
cimport numpy
import numpy
cimport cython

ctypedef numpy.float64_t FTYPE_t

@cython.boundscheck(False)
@cython.wraparound(False)
def cy_cumsum(numpy.ndarray[FTYPE_t, ndim=1] data):
    cdef int n, N = data.size
    cdef numpy.ndarray[FTYPE_t, ndim=1] out = numpy.zeros(N, dtype=data.dtype)
    cdef numpy.float64_t s = 0.0
    for n in range(N):
        s += data[n]
        out[n] = s
    return out

In [None]:
%timeit py_cumsum(data)

In [None]:
%timeit cy_cumsum(data)

In [None]:
%timeit jit_cumsum(data)

In [None]:
%timeit np.cumsum(data)

In [None]:
assert np.allclose(cy_cumsum(data), np.cumsum(data))

## Fused types

In [None]:
py_sum([1.0, 2.0, 3.0, 4.0, 5.0])

In [None]:
py_sum([1, 2, 3, 4, 5])

In [None]:
cy_sum(np.array([1.0, 2.0, 3.0, 4.0, 5.0]))

In [None]:
cy_sum(np.array([1, 2, 3, 4, 5]))

In [None]:
%%cython -a
cimport numpy
cimport cython

ctypedef fused I_OR_F_t:
    numpy.int64_t 
    numpy.float64_t 

@cython.boundscheck(False)
@cython.wraparound(False)
def cy_fused_sum(numpy.ndarray[I_OR_F_t, ndim=1] data):
    cdef I_OR_F_t s = 0
    cdef int n, N = data.size
    for n in range(N):
        s += data[n]
    return s

In [None]:
cy_fused_sum(np.array([1.0, 2.0, 3.0, 4.0, 5.0]))

In [None]:
cy_fused_sum(np.array([1, 2, 3, 4, 5]))

## Julia fractal

In [None]:
%%cython -a
cimport numpy
cimport cython

ctypedef numpy.int64_t ITYPE_t
ctypedef numpy.float64_t FTYPE_t

cpdef inline double abs2(double complex z):
    return z.real * z.real + z.imag * z.imag

@cython.boundscheck(False)
@cython.wraparound(False)
def cy_julia_fractal(numpy.ndarray[FTYPE_t, ndim=1] z_re, 
                     numpy.ndarray[FTYPE_t, ndim=1] z_im, 
                     numpy.ndarray[ITYPE_t, ndim=2] j):
    cdef int m, n, t, M = z_re.size, N = z_im.size
    cdef double complex z
    for m in range(M):
        for n in range(N):
            z = z_re[m] + 1.0j * z_im[n]
            for t in range(256):
                z = z ** 2 - 0.05 + 0.68j
                if abs2(z) > 4.0:
                    j[m, n] = t
                    break

In [None]:
N = 1024

In [None]:
j = np.zeros((N, N), dtype=np.int64)

In [None]:
z_real = np.linspace(-1.5, 1.5, N)

In [None]:
z_imag = np.linspace(-1.5, 1.5, N)

In [None]:
%timeit cy_julia_fractal(z_real, z_imag, j)

In [None]:
%timeit jit_julia_fractal(z_real, z_imag, j)

In [None]:
j1 = np.zeros((N, N), dtype=np.int64)

In [None]:
cy_julia_fractal(z_real, z_imag, j1)

In [None]:
j2 = np.zeros((N, N), dtype=np.int64)

In [None]:
jit_julia_fractal(z_real, z_imag, j2)

In [None]:
assert np.allclose(j1, j2)

## Calling C function

In [None]:
%%cython

cdef extern from "math.h":
     double acos(double)

def cy_acos1(double x):
    return acos(x)

In [None]:
%timeit cy_acos1(0.5)

In [None]:
%%cython

from libc.math cimport acos

def cy_acos2(double x):
    return acos(x)

In [None]:
%timeit cy_acos2(0.5)

In [None]:
from numpy import arccos

In [None]:
%timeit arccos(0.5)

In [None]:
from math import acos

In [None]:
%timeit acos(0.5)

In [None]:
assert cy_acos1(0.5) == acos(0.5)

In [None]:
assert cy_acos2(0.5) == acos(0.5)

# Versions

In [None]:
%reload_ext version_information

In [None]:
%version_information numpy, cython, numba, matplotlib