# Enhancing Performance (Official Documentation)

[Original Material](https://pandas.pydata.org/pandas-docs/stable/enhancingperf.html)

Interesting guide on how to write C extensions for pandas



In [2]:
import pandas as pd
import numpy as np

%load_ext line_profiler
%load_ext Cython

In [4]:
df = pd.DataFrame({'a': np.random.randn(1000),
                    'b': np.random.randn(1000),
                    'N': np.random.randint(100, 1000, (1000)),
                    'x': 'x'})

NameError: name 'integrate_f' is not defined

### Target functions of our optimization

In [10]:
def f(x):
    return x * (x - 1)

def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

In [4]:
%%timeit 

df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)

179 ms ± 3.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Pandas Vectorization

In [11]:
%%timeit 
integrate_f(df['a'], df['b'], df['N'])

TypeError: only integer scalar arrays can be converted to a scalar index

#### Profiling the function

In [5]:
%lprun -f df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)

UsageError: Could not find function 'df.apply(lambda'.
SyntaxError: unexpected EOF while parsing (<string>, line 1)


### Plain Cython

Simply copy and use Cython

In [9]:
%%cython
def f_plain(x):
    return x * (x - 1)

def integrate_f_plain(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx 

In [10]:
%timeit df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1)

107 ms ± 3.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Adding Types

In [11]:
%%cython
cdef double f_typed(double x) except? -2:
    return x * (x - 1)

cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

In [12]:
%timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

32.1 ms ± 1.74 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%prun -l 4 df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

 

### Using ND-Arrays

In [15]:
%%cython
cimport numpy as np
import numpy as np

cdef double f_typed(double x) except? -2:
    return x * (x - 1)

cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b, np.ndarray col_N):
    assert (col_a.dtype == np.float and col_b.dtype == np.float and col_N.dtype == np.int)
    cdef Py_ssize_t i, n = len(col_N)
    assert (len(col_a) == len(col_b) == n)
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(len(col_a)):
        res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i])
    return res

In [16]:
%timeit apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)

1.3 ms ± 92.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [17]:
%prun -l 4 apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)

 

### More advanced Techniques

Removing the bound checks
- Risking a segmentation fault

In my tests I saw no significant improvement through these techniques

In [24]:
%%cython
cimport cython
cimport numpy as np
import numpy as np

cdef double f_typed(double x) except? -2:
    return x * (x - 1)

cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef np.ndarray[double] apply_integrate_f_wrap(np.ndarray[double] col_a, np.ndarray[double] col_b, np.ndarray[int] col_N):
    cdef int i, n = len(col_N)
    assert len(col_a) == len(col_b) == n
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(n):
        res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i])
    return res

In [25]:
%timeit apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)

1.25 ms ± 25.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
