In [1]:
# Previously seen how NumPy's ufuncs can be used to vectorize operations, as a result removing slow python loops
# Another way to do this is to use NumPy's broadcasting functionality
# Broadcasting is simply set of rules for applying binary ufuncs on arrays of different sizes
# Recall that for arrays of same size, binary operations are performed on element-by-element basis
import numpy as np
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b

array([5, 6, 7])

In [2]:
# Broadcasting allows these types of binary operations to be performed on arrays of different sizes
# Ex. Adding a scalar to an array
a + 5

array([5, 6, 7])

In [3]:
# Basically, this operation stretches/duplicates the value 5 into the array [5, 5, 5] and adds the results
# Advantage of NumPy's broadcasting is that the duplication of values does not actually take place
# Can extend this to arrays of higher dimensions as well, like adding a 1D array to a 2D array
M = np.ones((3, 3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [4]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [5]:
# Here, the 1D array a is stretched (or broadcast) across the second dimension in order to match shape of M
# Farily simple to understand now, but gets more complicated when both arrays broadcast
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [6]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [None]:
# Here, BOTH arrays broadcast to match a common shape
# Here are some rules to follow for better understanding
# Rule 1: If 2 arrays differ in dimension size, the one with the fewer dimensions is padded with ones on its leading side
# Rule 2: If shape of 2 arrays does not match in any dimension, array with shape equal to 1 in that dimension is stretched to match the other shape
# Rule 3: If in any dimension the sizes disagree and neither is equal to 1, error is raised

In [7]:
# Lets make rules clear with examples
# Lets add a 2D array to a 1D array
M = np.ones((2, 3))
a = np.arange(3)
# Considering an operation on the array, we see the corresponding shapes:
# M.shape = (2, 3)
# a.shape = (3,)
# By rule 1, array a has fewer dimesions, so we pad it on the left with ones
# M.shape = (2, 3)
# a.shape = (1, 3)
# By rule 2, we see that the first dimension disagrees, so we stretch this dimension to match
# M.shape = (2, 3)
# a.shape = (2, 3)
# The shapes match (2, 3)
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

In [8]:
# Lets look at an example where BOTH arrays need to be broadcast
a = np.arange(3).reshape((3, 1))
b = np.arange(3)
# Lets write out the shapes:
# a.shape = (3, 1)
# b.shape = (3,)
# Rule 1 says we pad the shape of b with ones
# a.shape = (3, 1)
# b.shape = (1, 3)
# Rule 2 says we upgrade each one of these to match the corresponding size of the other array
# a.shape = (3, 3)
# b.shape = (3, 3)
# Results match and shapes are compatible
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [9]:
# Lets look at an example where 2 arrays are not compatible
M = np.ones((3, 2))
a = np.arange(3)
# Shapes:
# M.shape = (3, 2)
# a.shape = (3,)
# Rule 1 says pad shape a with ones on the left
# M.shape = (3, 2)
# a.shape = (1, 3)
# Rule 2 says first dimension of a is stretched to match that of M:
# M.shape = (3, 2)
# a.shape = (3, 3)
# Now we hit rule 3, final shapes do not match, therefore these arrays are not incompatible as observed by the error below
M + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [None]:
# Note that these broadcasting rules apply to ANY binary ufunc

In [None]:
# Many different applications for broadcasting, lets take a look at 2 examples
# Previously we saw that ufuncs allow a NumPy user to remove need to explicitly write slow Python loops
# Broadcasting extends this ability 