In [1]:
%load_ext cython

# num_threads=1

In [2]:
%%cython --force -c=/openmp
import cython
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef double compute_pi_parallel_1(int num_steps):
    cdef double step = 1.0 / num_steps
    cdef double x, pi = 0.0
    cdef int i

    for i in prange(num_steps, nogil=True, num_threads=1):
        x = (i + 0.5) * step
        pi += 4.0 / (1.0 + x*x)

    return step * pi

In [3]:
%timeit compute_pi_parallel_1(100000000)

1.51 s ± 60.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# num_threads=4

In [4]:
%%cython --force -c=/openmp
import cython
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef double compute_pi_parallel_4(int num_steps):
    cdef double step = 1.0 / num_steps
    cdef double x, pi = 0.0
    cdef int i

    for i in prange(num_steps, nogil=True, num_threads=4):
        x = (i + 0.5) * step
        pi += 4.0 / (1.0 + x*x)

    return step * pi

In [5]:
%timeit compute_pi_parallel_4(100000000)

464 ms ± 15.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# num_threads=6

In [6]:
%%cython --force -c=/openmp
import cython
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef double compute_pi_parallel_6(int num_steps):
    cdef double step = 1.0 / num_steps
    cdef double x, pi = 0.0
    cdef int i

    for i in prange(num_steps, nogil=True, num_threads=6):
        x = (i + 0.5) * step
        pi += 4.0 / (1.0 + x*x)

    return step * pi

In [7]:
%timeit compute_pi_parallel_6(100000000)

322 ms ± 15.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
