# This is a test of ReLU function speeds in Python
###### Author: Mike Tamillow

In [1]:
import cProfile as profile
import numpy as np

In [2]:
def max_zero_floor(x):
    if x < 0:
        return 0
    return x

In [3]:
relu1 = lambda y: [x if x>0 else 0 for x in range(y)] # iterating ternary operation
relu2 = lambda y: [max(0, x) for x in range(y)] # iterating max operation
relu3 = lambda y: [max_zero_floor(x) for x in range(y)] # iterating udf
relu4 = lambda y: [np.maximum(0, x) for x in range(y)] # iterating numpy.maximum
relu5 = lambda y: np.maximum(0, range(y)) # Uses broadcasting instead of iterating
relu6 = lambda y: max(0, range(y)) # max can broadcast too!

In [10]:
relu1_pro = profile.runctx('relu1(10**5)', globals(), locals()) # iterating ternary operation

         4 function calls in 0.010 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.009    0.009    0.009    0.009 <ipython-input-3-272d4ee2a132>:1(<lambda>)
        1    0.001    0.001    0.010    0.010 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




In [11]:
relu2_pro = profile.runctx('relu2(10**5)', globals(), locals()) # iterating max operation

         100004 function calls in 0.044 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.024    0.024    0.041    0.041 <ipython-input-3-272d4ee2a132>:2(<lambda>)
        1    0.003    0.003    0.044    0.044 <string>:1(<module>)
   100000    0.016    0.000    0.016    0.000 {max}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




In [12]:
relu3_pro = profile.runctx('relu3(10**5)', globals(), locals()) # iterating udf

         100004 function calls in 0.042 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   100000    0.014    0.000    0.014    0.000 <ipython-input-2-7fd001c5d2df>:1(max_zero_floor)
        1    0.026    0.026    0.041    0.041 <ipython-input-3-272d4ee2a132>:3(<lambda>)
        1    0.001    0.001    0.042    0.042 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




In [13]:
relu4_pro = profile.runctx('relu4(10**5)', globals(), locals()) # iterating numpy.maximum

         4 function calls in 0.113 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.111    0.111    0.112    0.112 <ipython-input-3-272d4ee2a132>:4(<lambda>)
        1    0.001    0.001    0.113    0.113 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




In [14]:
relu5_pro = profile.runctx('relu5(10**5)', globals(), locals()) # Uses broadcasting instead of iterating

         4 function calls in 0.008 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.007    0.007    0.008    0.008 <ipython-input-3-272d4ee2a132>:5(<lambda>)
        1    0.000    0.000    0.008    0.008 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




In [15]:
relu6_pro = profile.runctx('relu6(10**5)', globals(), locals()) # max can broadcast too!

         5 function calls in 0.002 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.001    0.001 <ipython-input-3-272d4ee2a132>:6(<lambda>)
        1    0.001    0.001    0.002    0.002 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {max}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.001    0.001    0.001    0.001 {range}




### My final assessment is that broadcasting is by far the most efficient means of running an ReLU on a list of numbers (math nerds read vector). 


### Broadcasting on the built-in max function was 4x as fast as broadcasting over numpy's maximum function. However, iterating on the ternary operation inline was just slightly slower than numpy's maximum. 


### Iterating is costly, but just as costly was function call overhead, which when combined with the built-in max function, took over 4x as long to process relative to iterating on a ternary expression, and over 20x relative to broadcasting on the same max function. 


### Creating a user defined function and iterating on it was on par with iterating on the built in max. By far the slowest route was iterating over the numpy.maximum function, most likely because of the roundabout path to reach a solution.