In [1]:
# Load extension
%load_ext Cython 

In [93]:
def f(x):
    return x ** 2 - x

def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx
%timeit -n 1000 integrate_f(0,5, 10000)

1.98 ms ± 77.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [105]:
%%cython
def f(x):
    return x ** 2 - x

def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx


In [104]:
%timeit -n 1000 integrate_f(0,5, 10000)

1.24 ms ± 21.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [5]:
%%cython
def f(double x):
    return x ** 2 - x


def integrate_f(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx


In [6]:
%timeit -n 1000 integrate_f(0,5, 10000)

350 µs ± 13.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [97]:
%%cython --compile-args=-O3
cdef double f(double x) except? -2: # Except will check for error at -2 (? no hard error)
    return x ** 2 - x

# reminder: cdef creates a c function. cpdef creates a python wrapper for this function
#           This means, that the function is available in python and C. cdef only in C

cpdef integrate_f(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx


In [98]:
%timeit -n 10000 integrate_f(0,5, 10000) # Without O3 (difference 13 vs 16 mycro sec) . Removing cpdef has a runtime of 1.35 (same). Now with O3

13 µs ± 1.25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [81]:
#%%cython?

In [108]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp -a
from cython.parallel import prange # Parallel range
cimport cython
# reminder: -a shows the code annotation. For nogil, we need pure white areas

cdef double f(double x) nogil: # Now without except. Changes amount of python code when f is called 1.26 ms
  #  with nogil:
    return x ** 2 - x

@cython.boundscheck(False)
@cython.cdivision(True)
def integrate_f(double a, double b, int N):
    if (N==0):
        print("N cannot be zero!")
        import sys; sys.exit()
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N # Here / checks for zero division in python. So gil is not possible :( Deactivate with cdivision, but be aware what is happening (N != 0)
    for i in prange(N, nogil=True):
        #if i == -1: # How to use GIL and python in nogil loops :-)
        #    with gil:
        #        raise Exception()
        s += f(a + i * dx)
    return s * dx


In [107]:
%timeit -n 10000 integrate_f(0,5, 10000) # removing cpdef has a runtime of 1.35 (same)

12.6 µs ± 1.56 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [96]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp 
from cython.parallel cimport parallel
cimport openmp

cdef int num_threads

openmp.omp_set_dynamic(1)
with nogil, parallel():
    num_threads = openmp.omp_get_num_threads()
    with gil:
        print(num_threads)


In [100]:
%%cython --compile-args=-fopenmp  --link-args=-fopenmp
cimport openmp
cdef dot_product(int* x, int* y, size_t N):
    cdef int i, result = 0
    for i in openmp.range(N, reduce="+:result"):
        result += x[i]*y[i]
    return result


Error compiling Cython file:
------------------------------------------------------------
...
cimport openmp
cdef dot_product(int* x, int* y, size_t N):
    cdef int i, result = 0
    for i in openmp.range(N, reduce="+:result"):
                  ^
------------------------------------------------------------

/home/pierre/.cache/ipython/cython/_cython_magic_9d6373d8df79f1d9bacced8e8ffe768f.pyx:4:19: cimported module has no attribute 'range'


TypeError: object of type 'NoneType' has no len()