# This is a test of ReLU function speeds in Python
###### Author: Mike Tamillow

In [1]:
import cProfile as profile
import numpy as np

In [2]:
def max_zero_floor(x):
    if x < 0:
        return 0
    return x

In [3]:
relu1 = lambda y: [x if x>0 else 0 for x in range(-y, y, 1)] # iterating ternary operation
relu2 = lambda y: [max(0, x) for x in range(-y, y, 1)] # iterating max operation
relu3 = lambda y: [max_zero_floor(x) for x in range(-y, y, 1)] # iterating udf
relu4 = lambda y: [np.maximum(0, x) for x in range(-y, y, 1)] # iterating numpy.maximum
relu5 = lambda y: np.maximum(0, range(-y, y, 1)) # Uses broadcasting instead of iterating
relu6 = lambda y: max(0, range(-y, y, 1)) # max can broadcast too!

In [4]:
relu1_pro = profile.runctx('relu1(10**5)', globals(), locals()) # iterating ternary operation

         4 function calls in 0.024 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.017    0.017    0.021    0.021 <ipython-input-3-144de2f1b269>:1(<lambda>)
        1    0.002    0.002    0.024    0.024 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.004    0.004    0.004    0.004 {range}




In [5]:
relu2_pro = profile.runctx('relu2(10**5)', globals(), locals()) # iterating max operation

         200004 function calls in 0.078 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.043    0.043    0.077    0.077 <ipython-input-3-144de2f1b269>:2(<lambda>)
        1    0.001    0.001    0.078    0.078 <string>:1(<module>)
   200000    0.031    0.000    0.031    0.000 {max}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.002    0.002    0.002    0.002 {range}




In [6]:
relu3_pro = profile.runctx('relu3(10**5)', globals(), locals()) # iterating udf

         200004 function calls in 0.075 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   200000    0.025    0.000    0.025    0.000 <ipython-input-2-7fd001c5d2df>:1(max_zero_floor)
        1    0.047    0.047    0.074    0.074 <ipython-input-3-144de2f1b269>:3(<lambda>)
        1    0.001    0.001    0.075    0.075 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.002    0.002    0.002    0.002 {range}




In [7]:
relu4_pro = profile.runctx('relu4(10**5)', globals(), locals()) # iterating numpy.maximum

         4 function calls in 0.215 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.211    0.211    0.213    0.213 <ipython-input-3-144de2f1b269>:4(<lambda>)
        1    0.002    0.002    0.215    0.215 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.002    0.002    0.002    0.002 {range}




In [8]:
relu5_pro = profile.runctx('relu5(10**5)', globals(), locals()) # Uses broadcasting instead of iterating

         4 function calls in 0.016 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.014    0.014    0.016    0.016 <ipython-input-3-144de2f1b269>:5(<lambda>)
        1    0.000    0.000    0.016    0.016 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.003    0.003    0.003    0.003 {range}




In [9]:
relu6_pro = profile.runctx('relu6(10**5)', globals(), locals()) # max can broadcast too!

         5 function calls in 0.003 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.002    0.002 <ipython-input-3-144de2f1b269>:6(<lambda>)
        1    0.001    0.001    0.003    0.003 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {max}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.002    0.002    0.002    0.002 {range}




### My final assessment is that broadcasting is by far the most efficient means of running an ReLU on a list of numbers (math nerds read vector). 


### Broadcasting on the built-in max function was 5x as fast as broadcasting over numpy's maximum function. However, iterating on the ternary operation inline was 50% slower than numpy's maximum. 


### Iterating is costly, but just as costly was function call overhead, which when combined with the built-in max function, took about 3x as long to process relative to iterating on a ternary expression, and over 20x relative to broadcasting on the same max function. 


### Creating a user defined function and iterating on it was on par with iterating on the built in max. By far the slowest route was iterating over the numpy.maximum function, most likely because of the roundabout path to reach a solution.