[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SeoulTechPSE/CompThinking/blob/master/01_numpy.ipynb)

# 01. Numpy: Vectors, Matrices and Multidimensional Arrays

Creator: Robert Johansson, Modifier: Kee-Youn Yoo

Updated source code listings for Numerical Python - A Practical Techniques Approach for Industry (ISBN 978-1-484205-54-9)

 [<img src="https://numpy.org/doc/stable/_static/numpylogo.svg" width="500">](https://numpy.org/)

In [1]:
import numpy as np

## The Numpy array object

In [2]:
data = np.array([[1, 2], [3, 4], [5, 6]])

In [3]:
type(data)

numpy.ndarray

In [4]:
data

array([[1, 2],
       [3, 4],
       [5, 6]])

In [5]:
data.ndim

2

In [6]:
data.shape

(3, 2)

In [7]:
data.size

6

In [8]:
data.dtype

dtype('int64')

In [9]:
data.nbytes

48

## Data types

In [10]:
d0 = np.array([1, 2, 3], dtype=int)

In [11]:
d1 = np.array([1, 2, 3], dtype=float)

In [12]:
d1.dtype

dtype('float64')

In [13]:
d2 = np.array([1, 2, 3], dtype=complex)

In [14]:
d2.dtype

dtype('complex128')

### Type casting

In [15]:
data = np.array([1, 2, 3], dtype=float)

In [16]:
data.dtype

dtype('float64')

In [17]:
data

array([1., 2., 3.])

In [18]:
data = np.array(data, dtype=int)

In [19]:
data.dtype

dtype('int64')

In [20]:
data

array([1, 2, 3])

In [23]:
data = np.array([1.6, 2, 3], dtype=float)

In [24]:
data.astype(int)

array([1, 2, 3])

### Type promotion

In [25]:
d1 = np.array([1, 2, 3], dtype=float)

In [26]:
d2 = np.array([1, 2, 3], dtype=complex)

In [27]:
d1 + d2

array([2.+0.j, 4.+0.j, 6.+0.j])

In [28]:
(d1 + d2).dtype

dtype('complex128')

### Type-depending operation

In [29]:
np.sqrt(np.array([-1, 0, 1]))

  np.sqrt(np.array([-1, 0, 1]))


array([nan,  0.,  1.])

In [30]:
np.sqrt(np.array([-1, 0, 1], dtype=complex))

array([0.+1.j, 0.+0.j, 1.+0.j])

### Real and imaginary parts 

In [31]:
data = np.array([1, 2, 3], dtype=complex); data

array([1.+0.j, 2.+0.j, 3.+0.j])

In [32]:
data.real

array([1., 2., 3.])

In [33]:
data.imag

array([0., 0., 0.])

In [34]:
np.real(data)

array([1., 2., 3.])

In [35]:
np.imag(data)

array([0., 0., 0.])

### Order of array data in memory

* <font color="red">Multidimensional arrays are stored as contiguous data in memory</font>. $~$Consider the case of a two-dimensional array, $~$containing rows and columns: $~$One possible way to store this array as a consecutive sequence of values is to store the rows after each other, and another equally valid approach is to store the columns one after another

* The former is called <font color="green">row-major format</font> and the latter is <font color="green">column-major format</font>. Whether to use row-major or column-major is a matter of conventions, and the **row-major format** is used for example in the **C** programming language, and **Fortran** uses the **column-major format**

* A `numpy` array can be specified to be stored in row-major format, using the keyword argument `order='C'`, and column-major format, using the keyword argument `order='F'`, when the array is created or reshaped. <font color="blue">The default format is *row-major*</font> 

* In general, the `numpy` array attribute `ndarray.strides` defines exactly how this mapping is done. The `strides` attribute is a tuple of the same length as the number of axes (dimensions) of the array. Each value in `strides` is the factor by which the index for the corresponding axis is multiplied when calculating the *memory offset (in bytes)* for a given index expression

In [36]:
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)

In [37]:
data.strides

(12, 4)

In [38]:
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32, order='F')

In [39]:
data.strides

(4, 8)

## Creating arrays

### Arrays created from lists and other array-like objects

In [40]:
data = np.array([1, 2, 3, 4])

In [41]:
data.ndim

1

In [42]:
data.shape

(4,)

In [43]:
data = np.array([[1, 2], [3, 4]])

In [44]:
data.ndim

2

In [45]:
data.shape

(2, 2)

### Arrays filled with constant values

In [46]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [47]:
np.ones(4)

array([1., 1., 1., 1.])

In [48]:
data = np.ones(4)

In [49]:
data.dtype

dtype('float64')

In [50]:
data = np.ones(4, dtype=np.int64)

In [51]:
data.dtype

dtype('int64')

---

In [52]:
5.4 * np.ones(10)

array([5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4])

In [53]:
np.full(10, 5.4) # slightly more efficient

array([5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4])

---

In [54]:
x1 = np.empty(5); x1.fill(3.0)

In [55]:
x1

array([3., 3., 3., 3., 3.])

### Arrays filled with incremental sequences

In [56]:
np.arange(0, 11, 1)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [57]:
np.linspace(0, 10, 11)  # generally recommended

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

### Arrays filled with logarithmic sequences 

In [58]:
np.logspace(0, 2, 10)  # 5 data points between 10**0=1 to 10**2=100

array([  1.        ,   1.66810054,   2.7825594 ,   4.64158883,
         7.74263683,  12.91549665,  21.5443469 ,  35.93813664,
        59.94842503, 100.        ])

### Mesh grid arrays

In [59]:
x = np.array([-1, 0, 1])
y = np.array([-2, 0, 2])

In [60]:
X, Y = np.meshgrid(x, y)

In [61]:
X

array([[-1,  0,  1],
       [-1,  0,  1],
       [-1,  0,  1]])

In [62]:
Y

array([[-2, -2, -2],
       [ 0,  0,  0],
       [ 2,  2,  2]])

In [63]:
Z = (X + Y)**2; Z

array([[9, 4, 1],
       [1, 0, 1],
       [1, 4, 9]])

### Creating uninitialized arrays

In [64]:
x = np.empty(3, dtype=float); x

array([1., 2., 3.])

### Creating arrays with properties of other arrays

In [65]:
def f(x):    
    y = np.ones_like(x)    # compute with x and y    
    return y

x = np.array([[1, 2, 3], [4, 5, 6]])
y = f(x); y

array([[1, 1, 1],
       [1, 1, 1]])

### Creating matrix arrays

In [66]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [67]:
np.eye(4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [68]:
np.eye(4, k=-1)

array([[0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]])

In [69]:
np.diag(np.arange(0, 20, 5))

array([[ 0,  0,  0,  0],
       [ 0,  5,  0,  0],
       [ 0,  0, 10,  0],
       [ 0,  0,  0, 15]])

## Indexing and slicing

### One-dimensional arrays

In [70]:
a = np.arange(0, 11); a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [71]:
a[0]

0

In [72]:
a[-1]

10

In [73]:
a[4]

4

---

In [74]:
a[1:-1]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [75]:
a[1:-1:2]

array([1, 3, 5, 7, 9])

---

In [76]:
a[:5]

array([0, 1, 2, 3, 4])

In [77]:
a[-5:]

array([ 6,  7,  8,  9, 10])

In [78]:
a[::-2]

array([10,  8,  6,  4,  2,  0])

### Multidimensional arrays

In [79]:
f = lambda m, n: n + 10*m

In [80]:
A = np.fromfunction(f, (6, 6), dtype=int); A  # please search for numpy.fromfunction at google

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

---

In [81]:
A[:, 1]  # the second column

array([ 1, 11, 21, 31, 41, 51])

In [82]:
A[1, :]  # the second row

array([10, 11, 12, 13, 14, 15])

In [83]:
A[:3, :3]

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22]])

In [84]:
A[3:, :3]

array([[30, 31, 32],
       [40, 41, 42],
       [50, 51, 52]])

In [85]:
A[::2, ::2]

array([[ 0,  2,  4],
       [20, 22, 24],
       [40, 42, 44]])

In [86]:
A[1::2, 1::3]

array([[11, 14],
       [31, 34],
       [51, 54]])

### Views

* Subarrays that are extracted from arrays using slice operations are **alternative views** of the same underlying array data. That is, $~$they are arrays that refer to the same data in memory as the original array, $~$but with a different `strides` configuration. 

* When elements in a view are assigned new values, $~$the values of the original
array are therefore also updated. For example,

In [87]:
B = A[1:5, 1:5]; B

array([[11, 12, 13, 14],
       [21, 22, 23, 24],
       [31, 32, 33, 34],
       [41, 42, 43, 44]])

In [88]:
B[:, :] = 0; A

array([[ 0,  1,  2,  3,  4,  5],
       [10,  0,  0,  0,  0, 15],
       [20,  0,  0,  0,  0, 25],
       [30,  0,  0,  0,  0, 35],
       [40,  0,  0,  0,  0, 45],
       [50, 51, 52, 53, 54, 55]])

When **a copy rather than a view** is needed, $~$the view can be copied explicitly by using
the `copy` method of the `ndarray` instance

In [89]:
C = B[1:3, 1:3].copy(); C

array([[0, 0],
       [0, 0]])

In [90]:
C[:, :] = 1; C

array([[1, 1],
       [1, 1]])

In [91]:
B

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

### Fancy indexing and boolean-valued indexing

In [92]:
A = np.linspace(0, 1, 11); A

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [93]:
A[np.array([0, 2, 4])]

array([0. , 0.2, 0.4])

In [94]:
A[[0, 2, 4]]

array([0. , 0.2, 0.4])

---

In [95]:
A > 0.5

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True])

In [96]:
A[A > 0.5]

array([0.6, 0.7, 0.8, 0.9, 1. ])

Unlike arrays created by using slices, $~$**the arrays returned using fancy indexing and Boolean-valued
indexing are not views**, $~$but rather new independent arrays

In [97]:
A = np.arange(10)

In [98]:
indices = [2, 4, 6]

In [99]:
B = A[indices]

In [100]:
B[0] = -1; A

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [101]:
A[indices] = -1; A

array([ 0,  1, -1,  3, -1,  5, -1,  7,  8,  9])

---

In [102]:
A = np.arange(10)

In [103]:
B = A[A > 5]

In [104]:
B[0] = -1; A

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [105]:
A[A > 5] = -1; A

array([ 0,  1,  2,  3,  4,  5, -1, -1, -1, -1])

### Summery

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_01.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_02.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_03.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_04.png"></td>
</table>

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_05.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_06.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_07.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_08.png"></td>
</table>

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_09.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_10.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_11.png"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_indexing_12.png"></td>
</table>

## Reshaping and resizing

Reshaping an array does not require modifying the underlying array data; $~$it only changes in how the
data is interpreted, by redefining the array’s `strides` attribute

In [106]:
data = np.array([[1, 2], [3, 4]])

In [107]:
data1 = np.reshape(data, (1, 4)); data1

array([[1, 2, 3, 4]])

In [108]:
data1[0, 1] = -1; data

array([[ 1, -1],
       [ 3,  4]])

In [109]:
data2 = data.reshape(4); data2

array([ 1, -1,  3,  4])

In [110]:
data2[1] = -2; data

array([[ 1, -2],
       [ 3,  4]])

---

In [111]:
data = np.array([[1, 2], [3, 4]])

In [112]:
data1 = np.ravel(data); data1

array([1, 2, 3, 4])

In [113]:
data1[0] = -1; data

array([[-1,  2],
       [ 3,  4]])

The `ndarray` method `flatten` perform the same function, but
returns a **copy** instead of a view

In [None]:
data2 = data.flatten(); data2

In [None]:
data2[0] = -2; data

---

In [None]:
data = np.arange(0, 5); data.shape

In [None]:
column = data[:, np.newaxis]; column

In [None]:
column.shape

In [None]:
row = data[np.newaxis, :]; row

In [None]:
row.shape

In [None]:
row[0, 0] = -1; data 

In [None]:
np.expand_dims(data, axis=1) 

In [None]:
row = np.expand_dims(data, axis=0); row

In [None]:
row[0, 0] = 0; data

---

In [None]:
data = np.arange(5); data

In [None]:
np.vstack((data, data, data))

In [None]:
np.hstack((data, data, data))

In [None]:
data = data[:, np.newaxis]; data.shape

In [None]:
np.hstack((data, data, data))

In [None]:
data1 = np.array([[1, 2], [3, 4]])

In [None]:
data2 = np.array([[5, 6]])

In [None]:
np.concatenate((data1, data2), axis=0)

In [None]:
np.concatenate((data1, data2.T), axis=1)

## Vectorized expressions

### Arithmetic operations

In [None]:
x = np.array([[1, 2], [3, 4]])

In [None]:
y = np.array([[5, 6], [7, 8]])

In [None]:
x + y

In [None]:
y - x

In [None]:
x * y

In [None]:
y / x

---

In [None]:
x * 2

In [None]:
2**x

In [None]:
y / 2

In [None]:
(y / 2).dtype

---

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_broadcasting_1.png"></td>
</table>

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_broadcasting_2.png"></td>
</table>

In [None]:
x = np.array([1, 2, 3, 4]).reshape(2, 2)
y = np.array([1, 2, 3, 4])
x / y

In [None]:
z = np.array([[2, 4]]); z.shape

In [None]:
x / z

In [None]:
zz = np.vstack((z, z)); zz

In [None]:
x / zz

---

In [None]:
z = np.array([[2], [4]]); z.shape

In [None]:
x / z

In [None]:
zz = np.concatenate([z, z], axis=1); zz

In [None]:
x / zz

---

In [None]:
x = z = np.array([1, 2, 3, 4])
y = np.array([5, 6, 7, 8])
x = x + y  # x is reassigned to a new array

In [None]:
x, z

In [None]:
x = z = np.array([1, 2, 3, 4])
y = np.array([5, 6, 7, 8])
x += y  # the values of array x are updated in place

In [None]:
x, z

### Elementwise functions

In [None]:
x = np.linspace(-1, 1, 11); x

In [None]:
y = np.sin(np.pi * x)

In [None]:
np.round(y, decimals=4)

In [None]:
np.add(np.sin(x)**2, np.cos(x)**2)

In [None]:
np.sin(x)**2 + np.cos(x)**2

---

In [None]:
def heaviside(x):
    return 1 if x > 0 else 0

In [None]:
heaviside(-1)

In [None]:
heaviside(1.5)

In [None]:
x = np.linspace(-5, 5, 11)
heaviside(x)

In [None]:
heaviside = np.vectorize(heaviside)
heaviside(x)

In [None]:
def heaviside(x):  # much better way
    return 1 * (x > 0)

heaviside(x)

### Aggregate functions

In [None]:
data = np.random.normal(size=(15, 15)) 

In [None]:
np.mean(data)

In [None]:
data.mean()

---

In [None]:
data = np.random.normal(size=(5, 10, 15))

In [None]:
data.sum(axis=0).shape

In [None]:
data.sum(axis=(0, 2)).shape

In [None]:
data.sum()

---

In [None]:
data = np.arange(1, 10).reshape(3, 3); data

In [None]:
data.sum()

In [None]:
data.sum(axis=0)

In [None]:
data.sum(axis=1)

<table>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_aggregation_1.png" width="300"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_aggregation_2.png" width="300"></td>
<td><img src="https://raw.githubusercontent.com/SeoulTechPSE/CompThinking/main/figs/array_aggregation_3.png" width="300"></td>
</table>

### Boolean arrays and conditional expressions

In [None]:
a = np.array([1, 2, 3, 4])
b = np.array([4, 3, 2, 1])

In [None]:
a < b

---

In [None]:
np.all(a < b)

In [None]:
np.any(a < b)

In [None]:
if np.all(a < b): 
    print("All elements in a are smaller than their corresponding elements in b")
elif np.any(a < b):
    print("Some elements in a are smaller than their corresponding elements in b")
else:
    print("All elements in b are smaller than their corresponding elements in a")

---

In [None]:
x = np.array([-2, -1, 0, 1, 2])

In [None]:
x > 0

In [None]:
1 * (x > 0)

In [None]:
x * (x > 0)

---

In [None]:
def pulse(x, position, height, width):
    return height * (x >= position) * (x <= (position + width))

#   return height *np.logical_and(x >= position, x <= (position + width))                                       

In [None]:
x = np.linspace(-5, 5, 31)

In [None]:
pulse(x, position=-2, height=1, width=5)

In [None]:
pulse(x, position=1, height=2, width=2)

---

In [None]:
x = np.linspace(-4, 4, 9); x

In [None]:
np.where(x < 0, x**2, x**3)

In [None]:
np.select([x < -1, x < 2, x>= 2], [x**2, x**3, x**4])

In [None]:
np.choose([0, 0, 0, 1, 1, 1, 2, 2, 2], [x**2, x**3, x**4])

In [None]:
x[np.abs(x) > 2]

### Set operations

In [None]:
a = np.unique([1, 2, 3, 3]); a

In [None]:
b = np.unique([2, 3, 4, 4, 5, 6, 5]); b

In [None]:
np.in1d(a, b)

In [None]:
1 in a, 1 in b

In [None]:
np.all(np.in1d(a, b))  # to test if a is a subset of b

---

In [None]:
np.union1d(a, b)

In [None]:
np.intersect1d(a, b)

In [None]:
np.setdiff1d(a, b)

In [None]:
np.setdiff1d(b, a)

### Operations on arrays

In [None]:
data = np.arange(9).reshape(3, 3); data

In [None]:
np.transpose(data)

---

In [None]:
data = np.random.randn(1, 2, 3, 4, 5)

In [None]:
data.shape

In [None]:
data.T.shape

## Matrix and vector operations

In [None]:
A = np.arange(1, 7).reshape(2, 3); A

In [None]:
B = np.arange(1, 7).reshape(3, 2); B

In [None]:
np.dot(A, B)

In [None]:
np.dot(B, A)

In [None]:
A @ B  # python 3.5 above

---

In [None]:
A = np.arange(9).reshape(3, 3); A

In [None]:
x = np.arange(3); x

In [None]:
np.dot(A, x)

In [None]:
A.dot(x)

In [None]:
A @ x

---

In [None]:
A = np.random.rand(3, 3)
B = np.random.rand(3, 3)

In [None]:
Ap = np.dot(B, np.dot(A, np.linalg.inv(B)))

In [None]:
Ap = B.dot(A.dot(np.linalg.inv(B)))

In [None]:
B @ A @ np.linalg.inv(B)

---

In [None]:
A1 = np.matrix(A)  # It is no longer recommended to use this class, even for linear algebra. Instead use regular arrays.
B1 = np.matrix(B)

In [None]:
Ap = B1 * A1 * B1.I

           or

In [None]:
A2 = np.asmatrix(A)  # Unlike matrix, asmatrix does not make a copy if the input is already a matrix or an ndarray 
B2 = np.asmatrix(B)

In [None]:
Ap = B2 * A2 * B2.I

In [None]:
Ap = np.asarray(Ap)

---

In [None]:
np.inner(x, x)

In [None]:
np.dot(x, x)

In [None]:
y = x[:, np.newaxis]; y

In [None]:
np.dot(y.T, y)

---

In [None]:
x = np.array([1, 2, 3])

Given two vectors, `a = [a_0, a_1, ..., a_M]` and `b = [b_0, b_1, ..., b_N]`, $~$the outer product is

```
 [[a_0*b_0  a_0*b_1 ... a_0*b_N ]
  [a_1*b_0    .
  [ ...          .
  [a_M*b_0              a_M*b_N ]]
```

In [None]:
np.outer(x, x)

In [None]:
np.kron(x, x)

In [None]:
np.kron(x[:, np.newaxis], x[np.newaxis, :])

In [None]:
np.kron(np.ones((2, 2)), np.identity(2))

In [None]:
np.kron(np.identity(2), np.ones((2, 2)))

---

In [None]:
x = np.array([1, 2, 3, 4])
y = np.array([5, 6, 7, 8])

In [None]:
np.einsum("n,n", x, y)

In [None]:
np.inner(x, y)

In [None]:
A = np.arange(9).reshape(3, 3)
B = A.T

In [None]:
np.einsum("mk,kn", A, B)

In [None]:
np.alltrue(np.einsum("mk,kn", A, B) == np.dot(A, B))

## Version

In [None]:
print("numpy: ", np.__version__)