In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
def gen_sym_matrix(M):
    A = np.random.rand(M, M).astype(np.float64)
    return np.matmul(A, A.T)+np.eye(M)

In [3]:
np.random.seed(331)
A128 =  gen_sym_matrix(128)
A256 = gen_sym_matrix(256)
A2048 = gen_sym_matrix(2048)

## Scipy - LAPACK

In [4]:
from scipy.linalg import cholesky

In [5]:
print("1. Scipy", end='')

print("\n   128:", end='')
%timeit l = cholesky(A128, lower=True)

print("\n   256:", end='')
%timeit l = cholesky(A256, lower=True)

print("\n   2048:", end='')
%timeit l = cholesky(A2048, lower=True)

1. Scipy
   128:89.4 µs ± 1.21 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

   256:366 µs ± 15.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

   2048:92.2 ms ± 2.91 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Native Python
[Cholesky decomposition - Rosetta Code](https://rosettacode.org/wiki/Cholesky_decomposition#Python3.X_version_using_extra_Python_idioms)

In [6]:
from math import sqrt
def cholesky_native_python(A, **kwarg):
    M = len(A)
    L = [[0.0] * len(A) for _ in range(len(A))]
    for i in range(M):
        for j in range(0, i+1):
            s = sum(L[i][k] * L[j][k] for k in range(j))
            if(i==j):
                L[i][j] = sqrt(A[i][i] - s)
            else:
                L[i][j] = (A[i][j] - s)/L[j][j]
    return L

In [7]:
print("2. Native Python", end='')

print("\n   128:", end='')
A128_list = A128.tolist()
%timeit l = cholesky_native_python(A128_list, lower=True)

print("\n   256:", end='')
A256_list = A256.tolist()
%timeit l = cholesky_native_python(A256_list, lower=True)

2. Native Python
   128:44.4 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

   256:327 ms ± 8.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Python - Numpy

In [8]:
def cholesky_numpy(A, **kwarg):
    M = A.shape[0]
    L = np.zeros_like(A, dtype=np.float64)

    for i in range(M):
        for j in range(0, i+1):
            s = np.dot(L[i,:j],L[j,:j])
            L[i, j] = np.sqrt(A[i,i]-s) if(i==j) else ((A[i,j]-s)/L[j,j])
    return L

In [9]:
print("3. Python - Numpy", end='')

print("\n   128:", end='')
%timeit l = cholesky_numpy(A128, lower=True)

print("\n   256:", end='')
%timeit l = cholesky_numpy(A256, lower=True)

3. Python - Numpy
   128:21.8 ms ± 3.94 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)

   256:87 ms ± 10 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Python - Numpy - Numba

In [10]:
from numba import jit

@jit(nopython=True)
def cholesky_numpy_numba(A):
    M = A.shape[0]
    L = np.zeros_like(A, dtype=np.float64)

    for i in range(M):
        for j in range(0, i+1):
            s = np.dot(L[i,:j],L[j,:j])
            L[i, j] = np.sqrt(A[i,i]-s) if(i==j) else ((A[i,j]-s)/L[j,j])
    return L

In [11]:
print("4. Python - Numpy - Numba", end='')

print("\n   128:", end='')
%timeit l = cholesky_numpy_numba(A128)

print("\n   256:", end='')
%timeit l = cholesky_numpy_numba(A256)

print("\n   2048:", end='')
%timeit l = cholesky_numpy_numba(A2048)

5. Python - Numpy - Numba
   128:352 µs ± 17.6 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

   256:1.79 ms ± 33.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

   2048:691 ms ± 40 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Cython

In [12]:
%load_ext Cython

In [13]:
%%cython
import numpy as np
cimport numpy as np
cimport cython

ctypedef double DTYPE_t

@cython.boundscheck(False)
@cython.wraparound(False)
def cholesky_numpy_cython(np.ndarray[DTYPE_t, ndim=2]  A):
    cdef int i
    cdef int j
    cdef double s
    cdef int M = A.shape[0]
    cdef np.ndarray[DTYPE_t, ndim=2] L = np.zeros_like(A, dtype=np.float64)

    for i in range(M):
        for j in range(0, i+1):
            s = np.dot(L[i,:j],L[j,:j])
            L[i, j] = (A[i,i]-s)**0.5 if(i==j) else ((A[i,j]-s)/L[j,j])
    return L

In [14]:
print("5. Python - Numpy - Cython", end='')

print("\n   128:", end='')
%timeit l = cholesky_numpy_cython(A128)

print("\n   256:", end='')
%timeit l = cholesky_numpy_cython(A256)

print("\n   2048:", end='')
%timeit l = cholesky_numpy_numba(A2048)

5. Python - Numpy - Cython
   128:11.6 ms ± 268 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

   256:45.8 ms ± 971 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

   2048:688 ms ± 35.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Swig

In [15]:
from _SwigMod import cholesky_swig

In [16]:
print("6. CPP - SWIG", end='')

print("\n   128:", end='')
%timeit l = cholesky_swig(A128)

print("\n   256:", end='')
%timeit l = cholesky_swig(A256)

print("\n   2048:", end='')
%timeit l = cholesky_swig(A2048)

6. CPP - SWIG
   128:247 µs ± 5.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

   256:2.27 ms ± 75.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

   2048:1.54 s ± 7.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Swig - Eigen

In [17]:
from _swig_eigen_mod import cholesky_swig_eigen

In [18]:
print("7. CPP - SWIG - Eigen", end='')

print("\n   128:", end='')
%timeit l = cholesky_swig_eigen(A128)

print("\n   256:", end='')
%timeit l = cholesky_swig_eigen(A256)

print("\n   2048:", end='')
%timeit l = cholesky_swig_eigen(A2048)

6. CPP - SWIG - Eigen
   128:165 µs ± 1.05 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

   256:912 µs ± 4.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

   2048:327 ms ± 2.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
