In [None]:
# So far we have been discussing the basics of NumPy, now we will focus on computation of NumPy arrays
# Coputation on NumPy arrays can be fast or slow, key for fast is to use vectorized operations through NumPy's universal functions
# This section motivates using these universal functions, which are used to make repeated calculations on array elements much more efficient
# We will also go over most common/useful arithmetic ufuncs available in NumPy package

In [1]:
# Python's lack of performance usually shows when many small operations are being repeated
# Ex. Looping over arrays to operate on each element
# Ex. We have an array of values we'd like to compute the reciprocal (1 divided by value) of each
# One approach can be like below:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [2]:
# Above looks normal for someone in Java or C
# But measuring execution time of this code for large input tells us its pretty slow
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

1 loop, best of 3: 3.45 s per loop


In [None]:
# As we can see, it takes several milliseconds to compute million operations and store the result
# Bottleneck is not the operations, but type-checking and function dispatches that CPython must do to each cycle of the loop
# Each time reciprocal is computed, Python examines object's type and then does a dynamic lookup of the correct function to use for that type

In [3]:
# NumPy provides convenient interface into just this kind of statically typed/compiled routine
# Known as a vectorized operation and is accomplished simply by performing an operation on the array which is then applied to each element
# This approach is designed to push the loop into the compiled layer that underlies NumPy
# Lets compare the 2 results below
print(compute_reciprocals(values))
print(1.0 / values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [4]:
%timeit (1.0 / big_array)

100 loops, best of 3: 6.58 ms per loop


In [5]:
# As we can see, this method is MUCH faster than the method we use
# Vectorized operations in NumPy are implemented via ufuncs
# ufuncs main purpose is to quickly execute repeated operations on values in NumPy arrays
# We did operations with a scalar and an array, but we can operate between two arrays as well
np.arange(5) / np.arange(1, 6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [6]:
# ufunc operations not limited to 1D arrays, can act on multi-D arrays
x = np.arange(9).reshape((3, 3))
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]], dtype=int32)

In [None]:
# Computations using vectorizations through ufuncs are almost always more efficient than python loops
# Any time a loop is used, consider if it can be replaced with a vectorized expression

In [8]:
# 2 types of ufuncs: unary ufuncs (which operate on single input) and binary ufuncs (which operate on two inputs)
# NumPy ufuncs feel natural since they are basically Python arithmetic operators
# Addition, subtraction, multiplication, and division can all be used
x = np.arange(4)
print("x     =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2) # floor division

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


In [9]:
# unary ufuncs for negation, exponential, and modulus
print("-x     = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [10]:
# Can be combined together in any way, standard order of operations is still respected
-(0.5*x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [11]:
# All arithmetic operators are wrappers around specific functions built into NumPy
# Ex. + is wrapper for np.add
# There is also an absolute value function
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [13]:
# NumPy also has trigonometric ufuncs
# We will put 3 evenly spaced out numbers between 0 and pie into 'theta'
theta = np.linspace(0, np.pi, 3)
print("theta     ", theta) 
print("sin(theta) = ", np.sin(theta)) 
print("cos(theta) = ", np.cos(theta)) 
print("tan(theta) = ", np.tan(theta)) 

theta      [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [14]:
# Exponential NumPy ufuncs
x = [1, 2, 3]
print("x     = ", x)
print("e^x   = ", np.exp(x))
print("2^x   = ", np.exp2(x))
print("3^x   = ", np.power(3, x))

x     =  [1, 2, 3]
e^x   =  [ 2.71828183  7.3890561  20.08553692]
2^x   =  [2. 4. 8.]
3^x   =  [ 3  9 27]


In [None]:
# There are many more NumPy ufuncs available which can be looked up
# Another good source for more specialized functions is through scipy.special
# They have many specialized functions (gamma function, integral of guassian, etc.)

In [15]:
# There are some specialized FEATURES that comes with ufuncs
# For large calculations, useful to specify array where the result of calculation will be stored
# Instead of using seperate temp array, you can use this to write computation results to memory location where you would like them to be
# This is implemented through the 'out' argument
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [16]:
# This can also be used with array views
# Ex. we can write results of computation to every other element of specified array
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


In [None]:
# If we wrote y[::2] = 2 ** x, it would result in creation of temp array to hold results of 2 ** x, followed by second operation copying those values into array y
# Might not make much of a difference for small computations, but memory savings are significant from large arrays

In [17]:
# For binary ufuncs, some interesting aggregates can be computed 
# Ex. if we want to reduce an array with particular operation, we can use reduce method of any ufunc
# reduce repeatedly applies given operation to elements of array until only a single result remains
x = np.arange(1, 6)
np.add.reduce(x)

15

In [18]:
# So above, we created array of ints from 1 to 5, added up all the elements, and ouputed that single result
# Reduce on multiple ufunc does the same thing
np.multiply.reduce(x)

120

In [19]:
# If we want to store all intermediate results of computations (result after each computation), we use 'accumulate'
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15], dtype=int32)

In [21]:
# Any ufunc can also compute output of all pairs of two different inputs
# This is done using 'outer' method and allows, in one line, to do things like create a multiplication table
x = np.arange(1, 6)
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])