# Numpy broadcasting

In [19]:
import numpy as np

In [20]:
a = np.array([0,1,2])
b = np.array([5,5,5])

In [21]:
a + b

array([5, 6, 7])

In [22]:
M = np.ones((3,3))

In [23]:
M + a

array([[ 1.,  2.,  3.],
       [ 1.,  2.,  3.],
       [ 1.,  2.,  3.]])

In [24]:
a = np.arange(3)
b = np.arange(3)[:,np.newaxis]

In [25]:
a

array([0, 1, 2])

In [26]:
b

array([[0],
       [1],
       [2]])

In [27]:
a+b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [28]:
a*b

array([[0, 0, 0],
       [0, 1, 2],
       [0, 2, 4]])

### Rules of broadcasting

<ul>
<li><b>Rule 1</b>: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is <i>padded</i> with ones on its leading (left) side.</li>

<li><b>Rule 2</b>: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.</li>

<li><b>Rule 3</b>: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.</li> 
</ul>

### Centering an array with broadcasting

In [29]:
X = np.random.random((10,3))

In [30]:
X

array([[ 0.76517555,  0.52675915,  0.70453876],
       [ 0.0040741 ,  0.87626947,  0.84579996],
       [ 0.39374281,  0.36818714,  0.27562228],
       [ 0.38715921,  0.34144891,  0.04076278],
       [ 0.67701085,  0.51964104,  0.040641  ],
       [ 0.68829718,  0.81144729,  0.01043936],
       [ 0.6389074 ,  0.55277319,  0.69241166],
       [ 0.88364677,  0.7703855 ,  0.50770049],
       [ 0.85288009,  0.7494011 ,  0.10311742],
       [ 0.46099756,  0.14545464,  0.15305121]])

In [46]:
#Using axis=0 means collapse the 0 axis(rows) 
Xmean = X.mean(0)

In [36]:
Xmean

array([ 0.57518915,  0.56617674,  0.33740849])

In [40]:
#broadcast Xmean 
X_centered = X - Xmean

In [41]:
X_centered

array([[ 0.1899864 , -0.03941759,  0.36713027],
       [-0.57111505,  0.31009273,  0.50839147],
       [-0.18144634, -0.1979896 , -0.06178621],
       [-0.18802994, -0.22472784, -0.29664571],
       [ 0.1018217 , -0.0465357 , -0.29676749],
       [ 0.11310803,  0.24527055, -0.32696914],
       [ 0.06371825, -0.01340355,  0.35500317],
       [ 0.30845762,  0.20420876,  0.170292  ],
       [ 0.27769094,  0.18322436, -0.23429107],
       [-0.11419159, -0.42072211, -0.18435728]])

In [45]:
#To within machine precision, the mean is now 0
X_centered.mean(0)

array([ -5.55111512e-17,   2.22044605e-17,   3.33066907e-17])

### Masking

In [57]:
#RandomState uses the mersenne twister pseudo-random number generator. https://en.wikipedia.org/wiki/Mersenne_Twister
rng = np.random.RandomState(0)

In [58]:
x = rng.randint(10, size=(3,4))

In [61]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [62]:
x > 3

array([[ True, False, False, False],
       [ True,  True, False,  True],
       [False,  True,  True,  True]], dtype=bool)

In [64]:
#Using a boolean mask to select from the array x
x[x>3]

array([5, 7, 9, 5, 4, 7, 6])

In [65]:
# how many values less than 6?
np.count_nonzero(x<6)

8

In [67]:
#False is interpreted as 0 and True as 1, which allows us to use np.sum
np.sum(x<6)

8

In [68]:
#This has the benefit of allowing us to use the axis keyword. How many elements have values less than 6 in each row?
np.sum(x<6, axis=1)

array([4, 2, 2])

In [69]:
#Are there any values greater than 8?
np.any(x>8)

True

In [70]:
#Are all values greater than 8?
np.all(x>8)

False

In [73]:
#Are all the values in each row greater than 2?
np.all(x>2, axis=1)

array([False,  True, False], dtype=bool)