In [1]:
import numpy as np
import scipy
import scipy.sparse

In [2]:
# Dense matrix
A_numpy = np.random.uniform(-200000,200000, size = (100,200))
B_numpy = np.random.uniform(-200000,200000, size = (200,300))
A_list = list(A_numpy)
B_list = list(B_numpy)
A_sparse = scipy.sparse.lil_matrix(A_numpy)
B_sparse = scipy.sparse.lil_matrix(B_numpy)

# Sparse matrix (linked list sparse matrix)
C_numpy = np.zeros((300,300))
D_numpy = np.zeros((300,300))

C_numpy[0, :100] = np.random.rand(100)
C_numpy[1, 100:200] = C_numpy[0, :100]
np.fill_diagonal(C_numpy, np.random.rand(1000))

D_numpy[100, 100:300] = np.random.rand(200)
D_numpy[100:200, 200] = np.random.rand(100)
np.fill_diagonal(D_numpy, np.random.rand(1000))

C_sparse = scipy.sparse.lil_matrix(C_numpy)
D_sparse = scipy.sparse.lil_matrix(D_numpy)

C_list = list(C_numpy)
D_list = list(D_numpy)

# Sparse matrix (large)
E_numpy = np.zeros((10000,10000))
F_numpy = np.zeros((10000,10000))

E_numpy[0, :100] = np.random.rand(100)
E_numpy[1, 100:200] = E_numpy[0, :100]
np.fill_diagonal(E_numpy, np.random.rand(1000))

F_numpy[100, 100:300] = np.random.rand(200)
F_numpy[100:200, 200] = np.random.rand(100)
np.fill_diagonal(F_numpy, np.random.rand(1000))

E_sparse = scipy.sparse.lil_matrix(E_numpy)
F_sparse = scipy.sparse.lil_matrix(F_numpy)

In [3]:
def dot_elementwise(matA, matB):
    """
    2DarrayA.dot(2DarrayB)
    """
    result = []

    for i in xrange(len(matA)):
        thisrow = []
        for j in xrange(len(matB[0])):
            element = 0
            for k in xrange(len(matB)):
                element += matA[i][k] * matB[k][j]
            thisrow.append(element)
        result.append(thisrow)

    return result

In [4]:
%load_ext cython

In [5]:
%%cython
cimport cython
import numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef dot_cython(double [:, :] A, double [:, :] B):
    cdef int A_r = A.shape[0]
    cdef int A_c = A.shape[1]
    cdef int B_c = B.shape[1]
    cdef int i,j,k
    cdef double [:, :] out = np.zeros((A_r, B_c), dtype = np.float64)
        
    for i in xrange(A_r):
        for j in xrange(B_c):
            for k in xrange(A_c):
                out[i,j] += A[i,k]*B[k,j]
                
    return np.asarray(out)

In [6]:
%%cython
cimport cython
from cython.parallel import prange
import numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef dot_cython_parallel(double [:, :] A, double [:, :] B):
    cdef int A_r = A.shape[0]
    cdef int A_c = A.shape[1]
    cdef int B_c = B.shape[1]
    cdef int i,j,k
    cdef double [:, :] out = np.zeros((A_r, B_c), dtype = np.float64)
    
    for k in xrange(A_c):
        for i in prange(A_r, nogil=True):
            for j in prange(B_c):
                out[i,j] += A[i,k]*B[k,j]
                
    return np.asarray(out)

Dense Matrix:

In [11]:
%%timeit
result_dense_elementwise = dot_elementwise(A_list, B_list)

1 loops, best of 3: 2.53 s per loop


In [12]:
%%timeit
result_dense_cython = dot_cython(A_numpy, B_numpy)

100 loops, best of 3: 7.51 ms per loop


In [13]:
%%timeit
result_dense_cython_parallel = dot_cython_parallel(A_numpy, B_numpy)

100 loops, best of 3: 5.27 ms per loop


In [19]:
%%timeit
result_dense_numpy = A_numpy.dot(B_numpy)

1000 loops, best of 3: 213 µs per loop


In [15]:
%%timeit
result_dense_scipysparse = A_sparse.dot(B_sparse)

10 loops, best of 3: 21.7 ms per loop


Sparse Matrix

In [16]:
%%timeit
result_dense_elementwise2 = dot_elementwise(C_list, D_list)

1 loops, best of 3: 11.5 s per loop


In [17]:
%%timeit
result_dense_cython2 = dot_cython(C_numpy, D_numpy)

10 loops, best of 3: 33.9 ms per loop


In [18]:
%%timeit
result_dense_cython_parallel2 = dot_cython_parallel(C_numpy, D_numpy)

10 loops, best of 3: 25 ms per loop


In [20]:
%%timeit
result_dense_numpy2 = C_numpy.dot(D_numpy)

1000 loops, best of 3: 934 µs per loop


In [21]:
%%timeit
result_dense_scipysparse2 = C_sparse.dot(D_sparse)

1000 loops, best of 3: 734 µs per loop


Large Sparse

In [8]:
%%time
result_dense_numpy3 = E_numpy.dot(F_numpy)

CPU times: user 53.5 s, sys: 1.24 s, total: 54.7 s
Wall time: 28.2 s


In [9]:
%%timeit
result_dense_scipysparse3 = E_sparse.dot(F_sparse)

100 loops, best of 3: 10.9 ms per loop


---