## Step 0: Python: Benchmarking

In [1]:
import numpy as np

def f(x):
    return x ** 2 - x

def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

def apply_integrate_f(col_a, col_b, col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f(col_a[i], col_b[i], col_N[i])
    return res

In [2]:
import pandas as pd

df = pd.DataFrame({"a": np.random.randn(1000),
                  "b": np.random.randn(1000),
                  "N": np.random.randint(100, 1000, (1000))})

df

Unnamed: 0,a,b,N
0,1.385363,0.913368,978
1,-0.652428,-0.608931,632
2,0.625848,-0.037070,736
3,-0.268273,0.363193,491
4,-0.075692,0.704845,208
...,...,...,...
995,0.514227,0.832500,378
996,-2.176724,-0.201893,687
997,0.490345,-1.063400,512
998,0.212604,0.076931,585


In [3]:
%timeit apply_integrate_f(df['a'], df['b'], df['N'])

198 ms ± 2.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Step 1: Cython: Benchmarking

In [4]:
# import Cython extension
%load_ext cython

In [5]:
%%cython -a

import numpy as np
import pandas as pd

def f_cython(x):
    return x * (x - 1)

def integrate_f_cython(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_cython(a + i * dx)
    return s * dx

def apply_integrate_f_cython(col_a, col_b, col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f_cython(col_a[i], col_b[i], col_N[i])
    return res

In [6]:
%timeit apply_integrate_f_cython(df['a'], df['b'], df['N'])

137 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Step 2: Cython: Adding data type annotation to input variables

In [7]:
%%cython -a

import numpy as np
import pandas as pd

def f_cython_dtype0(double x):
    return x ** 2 - x

def integrate_f_cython_dtype0(double a, double b, long N):   
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_cython_dtype0(a + i * dx)
    return s * dx

def apply_integrate_f_cython_dtype0(double[:] col_a, double[:] col_b, long[:] col_N):  
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f_cython_dtype0(col_a[i], col_b[i], col_N[i])
    return res

In [8]:
# this will not work
#%timeit apply_integrate_f_cython_dtype0(df['a'], df['b'], df['N'])

# this command works (see the description below)
%timeit apply_integrate_f_cython_dtype0(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

63.9 ms ± 477 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Step 3: Cython: Adding data type annotation to functions

In [9]:
%%cython -a

import numpy as np
import pandas as pd

cdef f_cython_dtype1(double x):
    return x ** 2 - x

cpdef integrate_f_cython_dtype1(double a, double b, long N):   
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_cython_dtype1(a + i * dx)
    return s * dx

cpdef apply_integrate_f_cython_dtype1(double[:] col_a, double[:] col_b, long[:] col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f_cython_dtype1(col_a[i], col_b[i], col_N[i])
    return res

In [10]:
%timeit apply_integrate_f_cython_dtype1(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

56.3 ms ± 492 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Step 4: Cython: Adding data type annotation to local variables

In [11]:
%%cython -a

import numpy as np
import pandas as pd

cdef double f_cython_dtype2(double x):
    return x ** 2 - x

cpdef double integrate_f_cython_dtype2(double a, double b, long N):   
    cdef double s, dx
    cdef long i
    
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_cython_dtype2(a + i * dx)
    return s * dx

cpdef double[:] apply_integrate_f_cython_dtype2(double[:] col_a, double[:] col_b, long[:] col_N):
    cdef long n,i
    cdef double[:] res
    
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f_cython_dtype2(col_a[i], col_b[i], col_N[i])
    return res

In [12]:
%timeit apply_integrate_f_cython_dtype2(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

14 ms ± 30.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
