From https://stackoverflow.com/questions/52673285/performance-of-pandas-apply-vs-np-vectorize-to-create-new-column-from-existing-c

In [1]:
import pandas as pd
import numpy as np
import time

def divide(a, b):
    if b == 0:
        return 0.0
    return float(a)/b

# A_list = np.random.randint(1, 100, N)
# B_list = np.random.randint(1, 100, N)
# df = pd.DataFrame({'A': A_list, 'B': B_list})

In [2]:
for N in [1000, 10000, 100000, 1000000]:    

    print ('')

    #create some random data, put in a dataframe
    A_list = np.random.randint(1, 100, N)
    B_list = np.random.randint(1, 100, N)
    df = pd.DataFrame({'A': A_list, 'B': B_list})
    
    %time df['result'] = df.apply(lambda row: divide(row['A'], row['B']), axis=1)
    %time df['result2'] = np.vectorize(divide)(df['A'], df['B'])

    # Make sure results from df.apply and np.vectorize match.
    assert(df['result'].equals(df['result2']))


CPU times: user 20 ms, sys: 32 µs, total: 20 ms
Wall time: 19.3 ms
CPU times: user 895 µs, sys: 230 µs, total: 1.12 ms
Wall time: 945 µs

CPU times: user 201 ms, sys: 0 ns, total: 201 ms
Wall time: 201 ms
CPU times: user 3.79 ms, sys: 0 ns, total: 3.79 ms
Wall time: 3.73 ms

CPU times: user 1.5 s, sys: 8.76 ms, total: 1.51 s
Wall time: 1.51 s
CPU times: user 51.4 ms, sys: 0 ns, total: 51.4 ms
Wall time: 51.3 ms

CPU times: user 14.3 s, sys: 129 ms, total: 14.4 s
Wall time: 14.4 s
CPU times: user 321 ms, sys: 16 ms, total: 337 ms
Wall time: 337 ms


In [6]:
np.random.seed(0)
N = 10**5

%time list(map(divide, df['A'], df['B']))                                   # 43.9 ms
%time np.vectorize(divide)(df['A'], df['B'])                                # 48.1 ms
%time [divide(a, b) for a, b in zip(df['A'], df['B'])];                     # 49.4 ms
%time [divide(a, b) for a, b in df[['A', 'B']].itertuples(index=False)]     # 112 ms

dfab=df[['A','B']]
%time dfab.apply(lambda row: divide(*row), axis=1, raw=True)                  # 760 ms
%time dfab.apply(lambda row: divide(*row), axis=1)              # 4.83 s
%time df.apply(lambda row: divide(row['A'], row['B']), axis=1)              # 4.83 s
%time [divide(row['A'], row['B']) for _, row in df[['A', 'B']].iterrows()]; # 11.6 s

CPU times: user 140 ms, sys: 18.7 ms, total: 159 ms
Wall time: 158 ms
CPU times: user 123 ms, sys: 14.8 ms, total: 138 ms
Wall time: 139 ms
CPU times: user 163 ms, sys: 9.85 ms, total: 173 ms
Wall time: 174 ms
CPU times: user 367 ms, sys: 4.99 ms, total: 372 ms
Wall time: 374 ms
CPU times: user 3.35 s, sys: 12 μs, total: 3.35 s
Wall time: 3.37 s
CPU times: user 2.96 s, sys: 40.8 ms, total: 3 s
Wall time: 3.01 s
CPU times: user 4.71 s, sys: 53.6 ms, total: 4.77 s
Wall time: 4.79 s
CPU times: user 20.9 s, sys: 24 ms, total: 21 s
Wall time: 21.1 s


In [5]:
#ignore divide by 0 and possible np.inf
%time (df['A'] / df['B']);

CPU times: user 11 ms, sys: 3.96 ms, total: 15 ms
Wall time: 9.64 ms


In [4]:
# true vecorization
%time (df['A'] / df['B']).replace([np.inf,-np.inf],0);  # .644 ms
%time np.where(df['B'] == 0, 0, df['A'] / df['B']);     # .644 ms

CPU times: user 37.5 ms, sys: 12.1 ms, total: 49.6 ms
Wall time: 47.2 ms
CPU times: user 17.7 ms, sys: 655 µs, total: 18.4 ms
Wall time: 18.4 ms


In [3]:
#want more speedup? compile to C
from numba import njit

@njit
def divide(a, b):
    res = np.empty(a.shape)
    for i in range(len(a)):
        if b[i] != 0:
            res[i] = a[i] / b[i]
        else:
            res[i] = 0
    return res

%timeit divide(df['A'].values, df['B'].values);  # 717 µs

3.38 ms ± 353 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# Using @njit(parallel=True) may provide a further boost for larger arrays.