In [1]:
%load_ext Cython 


In [3]:
%%cython --compile-args=-O3 
cimport cython
import numpy as np



ctypedef fused my_type:
    int
    float
    double

@cython.cdivision(True) # Modulo is checking for 0 div, no need -> 10% speedup
@cython.boundscheck(False) # Bounds in the end ? No change
#@cython.infer_types(True) # No need for typing for inheritances (especially for numpy)
@cython.wraparound(False)
cdef c_run_eig(my_type [:,::1] matrix):
    return np.linalg.eig(matrix)

cdef double [:,::1] mymat =  np.ones((100,100), dtype=np.float)


In [None]:
%timeit -n 1000 c_run_eig(np.ones((100,100)))

In [None]:
import numpy as np
%timeit -n 1000 np.linalg.eig(np.ones((100,100)))

In [3]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp -a
# distutils: language=c++

from cython.parallel cimport prange # Parallel range
from libc.math cimport sin 
    
cimport cython
import numpy as np


cdef extern from "math.h" nogil:
    double sqrt(double m)

@cython.cdivision(True) # Modulo is checking for 0 div, no need -> 10% speedup
@cython.boundscheck(False) # Bounds in the end ? No change
cdef double c_tsi(double x, int N) nogil:
    cdef double val = x / (N + 1)
    return(val)

@cython.boundscheck(False) # Bounds in the end ? No change
@cython.wraparound(False)
cpdef double [:,:,:] get_B_two(int N):
    cdef int i, j, z
    cdef double [:,:,:] B2 = np.zeros((N, N, N), dtype=np.float)

    for i in prange(N, nogil=True):
        for j in range(N):
            for z in range(N):
               B2[i,j,z] = sqrt(c_tsi(i, N)**2 + c_tsi(j, N)**2 + c_tsi(z, N)**2)
    return B2

In [5]:
%timeit np.asarray(get_B_two(100)) # 197 msgil, 57ms ohne. 27.5 oben 8ms memview

2.04 ms ± 189 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
import numba as nb
@nb.jit
def tsi(x, N):
    return(x / (N + 1))

nb.jit
def gen_B_two(N):
    B2 = np.zeros((N, N, N))
    for i in range(N):
        for j in range(N):
            for z in range(N):
                B2[i, j, z] = np.linalg.norm(
                    tsi(np.array([i, j, z]), N))
    return B2

In [None]:
%timeit gen_B_two(100)# speedup 1000x 6.48s;5.48

In [None]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp -a
from cython.parallel cimport prange # Parallel range
from libc.math cimport sin 
import tensorly as tl
cimport cython
import numpy as np


def mode_n_multiplication(double[:, :, :]tensor, double[:, :] matrix, int mode=0):
    """Computed the mode product between a matrix and a tensor

    Parameters
    ----------
    tensor : tl.tensor or ndarray with ndim=3
    matrix : ndarray
    mode : int
    """

    if matrix.shape[1] != tensor.shape[mode]:
        raise ValueError("Dimensions for mode multiplication were wrong! Tensor: {0}, Matrix: {1}".format(
            str(tensor.shape[mode]), str(matrix.shape[1])))
    new_shape = list(tensor.shape)
    new_shape[mode] = matrix.shape[0]
    out = np.dot(matrix, tl.unfold(tensor, mode))
    return tl.fold(out, mode, new_shape)

In [None]:
import numexpr
import numpy as np

In [None]:
a = np.random.uniform(1,10,(10000,10000))


In [None]:
%timeit numexpr.evaluate("a**2")

In [None]:
%timeit np.power(a,2)

In [13]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp -a
from cython.parallel cimport prange # Parallel range
from libc.math cimport sin 
cimport cython

cdef extern from "math.h" nogil:
    double sqrt(double m)

@cython.boundscheck(False) # Bounds in the end ? No change
@cython.wraparound(False)
cpdef double frobenius_norm(double[:, :, :] tensor, int N):
    """Computed the frobenius norm of a tensor or matrix

    Parameters
    ----------
    tensor : tl.tensor or ndarray
    """
    cdef int i, j, z
    cdef double frob_norm = 0

    for i in prange(N,nogil=True):
        for j in range(N):
            for z in range(N):
                frob_norm += tensor[i, j, z] * tensor[i, j, z] # uses openmp reduce +
    frob_norm = sqrt(frob_norm)

    return(frob_norm)

In [10]:
import numpy as np 
tensor = np.random.uniform(1,10,(600,600,600))
N = 600

In [11]:
%timeit frobenius_norm(tensor, N)#77 without, 25 with prange

73.4 ms ± 2.53 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
%timeit np.sqrt(np.sum(np.square(tensor)))

666 ms ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
np.allclose(frobenius_norm(tensor, N),np.sqrt(np.sum(np.square(tensor))))

In [None]:
np.sqrt(np.sum(np.square(tensor)))

In [None]:
frobenius_norm(tensor, N)