# NumPy

This notebook presents [NumPy](https://numpy.org), the main scientific package of Python (see also [SciPy](https://docs.scipy.org/doc/scipy/reference/index.html)).<br> NumPy allows very fast operations on vectors and arrays.

The outline is the following:
* Arrays
* Copy
* Shaping
* Concatenation
* Indexing & slicing
* Splitting
* Operations
* Broadcasting
* Boolean masks
* Advanced indexing

For further reference, check the [Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/index.html).

In [1]:
import numpy as np

## Arrays

In [23]:
# from a list
a = np.array([0, 4, 2])
b = np.array([1, 2, 5.])
c = np.array(["alberton", "barbara", "carol"])

In [24]:
a

array([0, 4, 2])

In [25]:
len(a)

3

In [26]:
print(a.dtype)

int32


In [27]:
b

array([1., 2., 5.])

In [28]:
print(b.dtype)

float64


In [29]:
c

array(['alberton', 'barbara', 'carol'], dtype='<U8')

In [30]:
print(c.dtype)

<U8


In [31]:
# be careful with arrays of strings!
c[0] = "abracadabra"

In [32]:
c

array(['abracada', 'barbara', 'carol'], dtype='<U8')

In [33]:
c[0]

'abracada'

In [34]:
# specify type
c = np.array(["albert", "barbara", "carol"], dtype='<U32')

In [35]:
c[0] = "abracadabra"

In [36]:
c

array(['abracadabra', 'barbara', 'carol'], dtype='<U32')

In [37]:
# mixing types
d = np.array([1., 'albert', [2, 3, 4]], dtype=object)

In [38]:
d

array([1.0, 'albert', list([2, 3, 4])], dtype=object)

In [39]:
for element in d:
    print(type(element))

<class 'float'>
<class 'str'>
<class 'list'>


In [40]:
# init arrays
a = np.zeros(10)
b = np.ones(5)
c = -np.ones(3)

In [41]:
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [42]:
b

array([1., 1., 1., 1., 1.])

In [43]:
c

array([-1., -1., -1.])

In [44]:
# specify type
a = np.zeros(10, dtype=int)

In [45]:
a

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [46]:
a.dtype

dtype('int32')

In [47]:
# bytes
a = np.zeros(10, dtype=np.uint8)

In [48]:
a[0] = 1000

In [49]:
a

array([232,   0,   0,   0,   0,   0,   0,   0,   0,   0], dtype=uint8)

In [52]:
1000 % 256

232

In [53]:
# memory in bytes
a.nbytes

10

In [54]:
b = np.zeros(10, dtype=int)

In [55]:
b.nbytes

40

In [56]:
# 2-D, 3-D, ...
a = np.zeros((3, 4))
b = np.ones((3, 4, 5))
c = np.zeros(tuple(4 * [2]))

In [57]:
print(a)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [58]:
a.shape

(3, 4)

In [59]:
a.ndim

2

In [60]:
a.size

12

In [61]:
print(b)

[[[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]]


In [62]:
b.shape

(3, 4, 5)

In [63]:
b.ndim

3

In [64]:
b.size

60

In [65]:
print(c)

[[[[0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]]]


 [[[0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]]]]


In [66]:
c.shape

(2, 2, 2, 2)

In [67]:
c.ndim

4

In [68]:
c.size

16

In [69]:
# vectors are 1-D arrays
a = np.zeros(4)
a.shape

(4,)

In [70]:
a.ndim

1

In [71]:
# note the difference!
b = np.zeros((1, 4))

In [72]:
b

array([[0., 0., 0., 0.]])

In [73]:
b.shape

(1, 4)

In [74]:
b = np.zeros((4, 1))

In [75]:
b

array([[0.],
       [0.],
       [0.],
       [0.]])

In [76]:
b.shape

(4, 1)

In [77]:
# empty array
a = np.array([])
a.shape

(0,)

In [78]:
len(a)

0

In [79]:
a.ndim

1

In [80]:
b = np.ones(0)
b.shape

(0,)

In [81]:
len(b)

0

In [82]:
b.ndim

1

In [83]:
b = np.ones((0, 0))
b.shape

(0, 0)

In [84]:
len(b)

0

In [85]:
b.ndim

2

In [86]:
# from a list of list
a = np.array([[3, 1, 4], [1, 5, 9]])

In [87]:
a

array([[3, 1, 4],
       [1, 5, 9]])

In [88]:
len(a)

2

In [89]:
len(np.zeros((4, 5)))

4

In [90]:
# range of integers
a = np.arange(10)
b = np.arange(5, 10)
c = np.arange(5, 15, 2)
d = np.arange(10, 0, -1)

In [91]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [92]:
b

array([5, 6, 7, 8, 9])

In [93]:
c

array([ 5,  7,  9, 11, 13])

In [94]:
d

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [98]:
# linear spacing
a = np.linspace(0, 1, 11)

In [99]:
a

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [100]:
len(a)

11

In [105]:
# Exercise: Build a matrix of size 4 x 5 whose entry (i, j) is i * (j+1)
M = np.zeros((4,5))
for i in range(M.shape[0]):
    for j in range(M.shape[1]):
        M[i,j] = i * (j+1)
M

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  4.,  6.,  8., 10.],
       [ 3.,  6.,  9., 12., 15.]])

## Copy

In [106]:
# assignement
a = np.arange(10)
b = a

In [107]:
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [108]:
a[0] = -1

In [109]:
b

array([-1,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [110]:
# copy
a = np.arange(10)
b = a.copy()

In [111]:
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [112]:
a[0] = -1

In [113]:
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [117]:
# Exercise: What is a[0]?
a = np.ones(5)
b = a
c = a.copy()
b[0] += 1
c[0] += 1
a[0] += c[0]
# a[0] = 4
a[0]

4.0

## Shaping

In [118]:
# from 1D to 2D
a = np.arange(1, 10)
b = a.reshape(3, 3)
print(b)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [119]:
shape = (3, 3)
c = a.reshape(shape)
print(c)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [120]:
d = a.reshape(3, -1)
print(d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [121]:
# from 2D to 1D
print(b)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [125]:
b.reshape(9,)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [126]:
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [127]:
b.reshape(-1,)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [128]:
b.flatten()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [129]:
# from 1D to 3D
a = np.arange(24)
b = a.reshape(3, 2, 4)

In [130]:
print(b)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]

 [[16 17 18 19]
  [20 21 22 23]]]


In [131]:
a = np.arange(24)
c = a.reshape(3, 2, -1)

In [132]:
print(c)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]

 [[16 17 18 19]
  [20 21 22 23]]]


In [133]:
a = np.arange(24)
d = a.reshape(3, -1, 4)

In [134]:
print(d)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]

 [[16 17 18 19]
  [20 21 22 23]]]


In [135]:
# from 2D to 2D
a = np.arange(10)
b = a.reshape(2, 5)

In [136]:
print(b)

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [137]:
c = b.reshape(5, 2)

In [138]:
print(c)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [139]:
# from 1D to 2D (single row or column)
a = np.ones(4, dtype=int)

In [140]:
b = a.reshape(1, 4)
b.shape

(1, 4)

In [141]:
b = a.reshape(1, -1)
b.shape

(1, 4)

In [142]:
b = a.reshape(-1, 1)
b.shape

(4, 1)

In [143]:
# new axis
a = np.ones(4)
a.shape

(4,)

In [144]:
b = a[:, np.newaxis]
b.shape

(4, 1)

In [145]:
b = a[np.newaxis, :]
b.shape

(1, 4)

In [146]:
# more compact
b = a[np.newaxis]
b.shape

(1, 4)

In [147]:
# adding multiple axes
a = np.ones((4, 3))
a.shape

(4, 3)

In [148]:
b = a[np.newaxis, :, np.newaxis, :]
b.shape

(1, 4, 1, 3)

In [149]:
# from 1D to 1D
a = np.arange(5)
print(a)

[0 1 2 3 4]


In [154]:
np.insert(a, 2, -1)

array([ 0,  1, -1,  2,  3,  4])

In [155]:
np.delete(a, 3)

array([0, 1, 2, 4])

In [156]:
# from 2D to 2D
a = np.arange(10).reshape(2, -1)
print(a)

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [157]:
np.insert(a, 1, -1, axis=0)

array([[ 0,  1,  2,  3,  4],
       [-1, -1, -1, -1, -1],
       [ 5,  6,  7,  8,  9]])

In [158]:
np.insert(a, 3, -1, axis=1)

array([[ 0,  1,  2, -1,  3,  4],
       [ 5,  6,  7, -1,  8,  9]])

In [159]:
np.delete(a, 3, axis=1)

array([[0, 1, 2, 4],
       [5, 6, 7, 9]])

In [162]:
# Exercise: What is a[1, 1, 1]? --> 7
a = np.arange(8).reshape(2, 2, -1)
print(a)
a[1, 1, 1]

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


7

## Concatenation

In [163]:
# vectors
a = np.ones(4)
b = np.zeros(5)
c = np.arange(6)

In [164]:
np.concatenate([a, b])

array([1., 1., 1., 1., 0., 0., 0., 0., 0.])

In [165]:
np.concatenate([a, b, c])

array([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1., 2., 3., 4., 5.])

In [166]:
# 2D arrays
a = np.ones((3, 5))
b = np.zeros((2, 5))
c = np.ones((3, 5))
d = np.zeros((3, 4))

In [167]:
# concatenation = along first axis
np.concatenate([a, b])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [168]:
np.concatenate([a, b, c])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [171]:
# equivalent
np.vstack([a, b, c])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [172]:
# concatenation along second axis
np.concatenate([a, d], axis=1)

array([[1., 1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0.]])

In [173]:
np.concatenate([a, d, c], axis=1)

array([[1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.]])

In [174]:
# equivalent
np.hstack([a, d, c])

array([[1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.]])

In [175]:
# Mix of 1D and 2D
a = np.ones((3, 5))
b = np.arange(5)
c = np.zeros((2, 5))

In [176]:
np.vstack([a, b])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 1., 2., 3., 4.]])

In [177]:
np.vstack([a, b, c])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 1., 2., 3., 4.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [180]:
a = np.ones((3, 4))
b = np.arange(3)
c = np.zeros((3, 5))

In [181]:
# need to reshape the vector for horizontal stack!
np.hstack([a, b.reshape(3, 1)])

array([[1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 2.]])

In [208]:
# Exercise: Build a matrix of size 3 x 5 whose columns are all [2, 5, 8].
v = np.array([2, 5, 8]).reshape(-1, 1)
M = np.hstack([v, v, v, v, v])
M

array([[2, 2, 2, 2, 2],
       [5, 5, 5, 5, 5],
       [8, 8, 8, 8, 8]])

## Indexing & slicing

In [209]:
# vector
a = np.arange(10)
print(a)

[0 1 2 3 4 5 6 7 8 9]


In [210]:
# indexing
for i in [1, -1, -2]:
    print(a[i])

1
9
8


In [211]:
# slicing
for i in [0, 1, -1, -2, 10, 20, -20]:
    print(a[i:])

[0 1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9]
[9]
[8 9]
[]
[]
[0 1 2 3 4 5 6 7 8 9]


In [212]:
for i in [0, 1, -1, -2, 10, 20, -20]:
    print(a[:i])

[]
[0]
[0 1 2 3 4 5 6 7 8]
[0 1 2 3 4 5 6 7]
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[]


In [214]:
for i, j in [(2, 5), (5, 2), (0, -1), (4, None)]:
    print(a[i:j])

[2 3 4]
[]
[0 1 2 3 4 5 6 7 8]
[4 5 6 7 8 9]


In [215]:
a[1:8:2]

array([1, 3, 5, 7])

In [216]:
a[1::2]

array([1, 3, 5, 7, 9])

In [217]:
a[:8:2]

array([0, 2, 4, 6])

In [218]:
a[::2]

array([0, 2, 4, 6, 8])

In [219]:
a[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [220]:
a[::-2]

array([9, 7, 5, 3, 1])

In [221]:
a[5:2:-2]

array([5, 3])

In [222]:
a[:2:-2]

array([9, 7, 5, 3])

In [223]:
# 2D array
a = np.arange(20).reshape(4, -1)

In [224]:
print(a)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [225]:
# indexing
for i, j in [(0, 0), (1, 3), (-1, 0), (-1, -1), (-2, -2)]:
    print(a[i, j])

0
8
15
19
13


In [226]:
# row indexing
a[1]

array([5, 6, 7, 8, 9])

In [227]:
# column indexing
a[:, 2]

array([ 2,  7, 12, 17])

In [228]:
# row slicing
a[1:4]

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [229]:
# column slicing
a[:, 2:4]

array([[ 2,  3],
       [ 7,  8],
       [12, 13],
       [17, 18]])

In [230]:
# subarray
a[1:3, 2:4]

array([[ 7,  8],
       [12, 13]])

In [231]:
# be careful with copy
b = a[1:3, 2:4]
b[0, 0] = -1
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6, -1,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [235]:
# Exercice: what is a[1::2, -2:]? [[6, 7], [14,15]]
a = np.arange(16).reshape(4, -1)
print(a)
a[1::2, -2:]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


array([[ 6,  7],
       [14, 15]])

## Splitting - continue here

In [None]:
a = np.arange(20).reshape(4, -1)
print(a)

In [None]:
# vertical split 
np.split(a, 2)

In [None]:
np.split(a, [1, 3])

In [None]:
np.split(a, [3, 1])

In [None]:
# horizontal split 
np.split(a, [2], axis=1)

In [None]:
np.split(a, [1, 3], axis=1)

In [None]:
np.array_split(a, 3, axis=1)

## Operations

In [None]:
a = np.arange(1, 10)

In [None]:
b = 1 / a
c = a + 1
d = np.cos(a)
e = a * b

In [None]:
# test performance
a = np.arange(1, 10**6) 

In [None]:
%%time
b = np.zeros(a.shape)
for i in range(len(a)):
    b[i] = 1 / a[i]

In [None]:
%%time
b = 1 / a

In [None]:
# aggregations
a = np.arange(10)
print(a)
for operator in [np.sum, np.max, np.argmax, np.mean, np.std, np.median]:
    print(operator(a))

In [None]:
# max vs maximum
a = np.arange(10)
b = np.flip(a)
np.maximum(a, b)

In [None]:
np.max(a)

In [None]:
# cumulative sum
print(a)
np.cumsum(a)

In [None]:
# equivalent
np.add.accumulate(a)

In [None]:
# same with maximum
np.random.shuffle(a)
print(a)
np.maximum.accumulate(a)

In [None]:
# specific ranges
print(a)
np.maximum.reduceat(a, [0, 3, 6])

In [None]:
# sliding window
print(a)
b = np.convolve(a, [1, 1, 1])
print(b)

In [None]:
# count unique elements
a = [0, 0, 1, 1, 1, 3]
np.unique(a, return_counts=True)

In [None]:
# 2D
a = np.arange(20).reshape(4, 5)

In [None]:
b = a + 1
c = a * b
d = a.mean(axis=1)

In [None]:
# matrix-vector multiplication
u = np.ones(5)
a.dot(u)

In [None]:
# left multiplication
v = np.ones(4)
a.T.dot(v)

In [None]:
# matrix-matrix multiplication
b = np.arange(10).reshape(5, 2)
a.dot(b)

In [None]:
# left multiplication by reshaping
v[np.newaxis].dot(a)

In [None]:
# outer product
a = np.arange(4)
b = np.arange(5)
np.outer(a, b)

In [None]:
# Exercise: Using outer, build a matrix of size 3 x 5 whose columns are all [2, 5, 8].

## Broadcasting

In [None]:
# simple example
a = np.arange(5)
a + 5

In [None]:
# more interesting case
a = np.arange(5)
b = np.arange(3).reshape(-1, 1)
a + b

In [None]:
# centering along axis 0
a = np.arange(20) % 7
a = a.reshape(4, 5)
print(a)

In [None]:
a - a.mean(axis=0)

In [None]:
# centering along axis 1
a - a.mean(axis=1).reshape(-1, 1)

In [None]:
# Exercise: Using broadcasting, build a matrix of size 3 x 5 whose columns are all [2, 5, 8].

## Boolean masks

In [None]:
a = np.arange(5)
print(a)

In [None]:
# mask
b = a > 2
print(b)

In [None]:
# count
np.sum(a > 2)

In [None]:
# indices
np.where(a > 2)[0]

In [None]:
# comparison
np.all(a > 2)

In [None]:
b = 2 * a
np.all(b >= a)

In [None]:
np.all(a == b)

In [None]:
np.any(a > 2)

In [None]:
# extraction
a[a > 2]

In [None]:
b = a**2
b[a > 2]

In [None]:
# multiple conditons
a[(a > 0) & (b < 5)]

In [None]:
# negation
a[~((a > 0) & (b < 5))]

In [None]:
# 2D
a = np.arange(20).reshape(4, 5)
print(a)

In [None]:
a <= 7

In [None]:
a[a <= 7]

In [None]:
np.where(a <= 7)

In [None]:
# Exercise: Count the number of values in each interval [0, 0.5], [0.5, 0.75], [0.75, 1]
values = np.random.rand(100)
thresholds = np.array([0, 0.5, 0.75, 1])

## Advanced indexing

In [None]:
a = np.arange(1, 20, 2)
print(a)

In [None]:
b = [0, 3, 0, 2]

In [None]:
a[b]

In [None]:
# sequential assignment!
a[b] = np.arange(4)

In [None]:
a[b]

In [None]:
# 2D
a = np.arange(12).reshape(3, 4)
print(a)

In [None]:
# element-wise
row = [0, 1, 2]
col = [0, 1, 3]
a[row, col]

In [None]:
# sub-matrix
row = np.array([0, 2])
col = np.array([0, 1, 3])
a[row[:, np.newaxis], col]

In [None]:
# mask
row = a.sum(axis=1) < 20
col = a.sum(axis=0) > 12
a[row, col]

In [None]:
# mixing index and mask
row = np.array([0, 2])
col = np.sum(a, axis=0) > 12
a[row[:, np.newaxis], col]

In [None]:
# Exercise: Predict the word...
a = np.array(list("abracadabra"))
index = np.argwhere(a == "a").flatten()
word = ''.join(list(a[(index[:-1] + 1)]))