In [9]:
import numpy as np
import pandas as pd
import time
import sys

# Test and measure different ways of applying a custom function to a numpy array

In [10]:
def _statit(f):
  """ simple stat decorator

      prints time taken and the returned object size
  """
  def timed(*args, **kw):
    time_start = time.time()
    result = f(*args, **kw)
    time_end = time.time()

    time_taken = (time_end - time_start) * 1000
    result_size = sys.getsizeof(result)

    print('{:35s}:{:2.2f} ms.\n\t result size: {} bytes\n\n'.
      format(f.__name__, time_taken, result_size))

    return result

  return timed


In [11]:
def mapit(i):
  """ Intentionally not used lambdas, in order to keep code readable
      for non-lambdas programmers. But it's perfectly ok to use
      lambda i: mappings[i]

      For binary mappings is also ok not to use a function at all, but just the statement:
      "'red' if i == 0 else 'green'"
  """
  return mappings[i]

In [17]:
@_statit
def mapping_with_list_comprehension(y):
  return [mapit(i) for i in y]

@_statit
def mapping_with_map_function(y):
  return list(map(mapit, y))

@_statit
def mapping_with_npvectorize(y):
  # note, that we include the np.vectorize(f) init time here
  return np.vectorize(mapit)(y)

@_statit
def mapping_with_series_map(y):    
  s = pd.Series(y)
  return s.map(mappings)

@_statit
def mapping_with_series_apply(y):
  s = pd.Series(y)
  return s.apply(mapit)


In [19]:
mappings = {
    0: 'red',
    1: 'green',
    2: 'blue'
}

ARRAY_SIZE = 1_000_000
y = np.random.choice([0, 1, 2], size=ARRAY_SIZE)

mapping_with_list_comprehension(y)
mapping_with_map_function(y)
mapping_with_npvectorize(y)
mapping_with_series_map(y)
mapping_with_series_apply(y)

mapping_with_list_comprehension    :392.78 ms.
	 result size: 8697472 bytes


mapping_with_map_function          :305.02 ms.
	 result size: 8250176 bytes


mapping_with_npvectorize           :174.47 ms.
	 result size: 20000096 bytes


mapping_with_series_map            :18.73 ms.
	 result size: 80997835 bytes


mapping_with_series_apply          :120.22 ms.
	 result size: 80997835 bytes




0           red
1         green
2           red
3          blue
4          blue
          ...  
999995     blue
999996      red
999997     blue
999998    green
999999     blue
Length: 1000000, dtype: object