In [1]:
import numpy as np

## One-dimensional arrays

### Creating arrays

In [2]:
arr1 = np.array([1, 2, 3])
arr1

array([1, 2, 3])

In [5]:
arr_ones1 = np.ones(3)
arr_ones2 = np.ones((3, 4))
arr_ones3 = np.ones((3, 4, 2))
display(arr_ones1)
display(arr_ones2)
display(arr_ones3)

array([1., 1., 1.])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

array([[[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]]])

In [6]:
arr_zeros1 = np.zeros(3)
arr_random1 = np.random.random(3)
display(arr_zeros1)
display(arr_random1)

array([0., 0., 0.])

array([0.90452396, 0.99622735, 0.24150439])

### Array arithmetic 

In [7]:
arr_arith1 = np.array([1, 2])
arr_arith2 = np.ones(2)
display(arr_arith1)
display(arr_arith2)

array([1, 2])

array([1., 1.])

In [12]:
display(arr_arith1 + arr_arith2)
display(arr_arith1 - arr_arith2)
display(arr_arith1 * arr_arith1)
display(arr_arith1 / arr_arith1)

array([2., 3.])

array([0., 1.])

array([1, 4])

array([1., 1.])

In [14]:
display(arr_arith1 * 4.5)  # Boradcasting

array([4.5, 9. ])

### Indexing 

In [15]:
data = np.array([1, 2, 3])

In [16]:
display(data)
display(data[0])
display(data[1])
display(data[0:2])
display(data[1:])

array([1, 2, 3])

1

2

array([1, 2])

array([2, 3])

### Aggreagation 

In [19]:
display(data.max())
display(data.min())
display(data.sum())
display(data.mean())
display(data.std())
display(data.prod())

3

1

6

2.0

0.816496580927726

6

## Multi-dimensional arrays 

### Creating matrices 

In [20]:
mx_1 = np.array([[1, 2], [3, 4]])  ## In 2D, can view as SPECIFY ROW-BY-ROW.
mx_1

array([[1, 2],
       [3, 4]])

In [21]:
# See above for multidimensional use of np.ones()

### Matrix arithmetic

In [22]:
mx_ones = np.ones((2, 2))
mx_ones

array([[1., 1.],
       [1., 1.]])

In [23]:
mx_1 + mx_ones

array([[2., 3.],
       [4., 5.]])

**Unintuitive broadcasting:**

In [24]:
mx_2 = np.array([[1, 2], [3, 4], [5, 6]])
mx_2

array([[1, 2],
       [3, 4],
       [5, 6]])

In [27]:
ones_row = np.ones(2)
ones_row

array([1., 1.])

In [29]:
display(mx_2 + ones_row)
display(mx_1 + ones_row)
display(mx_ones + ones_row)

array([[2., 3.],
       [4., 5.],
       [6., 7.]])

array([[2., 3.],
       [4., 5.]])

array([[2., 2.],
       [2., 2.]])

### Dot product / matrix multiplication

In [30]:
a = np.array([1, 2, 3])
display(a)  # 1x3
b = np.array([[1, 10], [100, 1000], [10000, 100000]])
display(b)  # 3x2

array([1, 2, 3])

array([[     1,     10],
       [   100,   1000],
       [ 10000, 100000]])

In [31]:
# Apply matrix multiplication:
ab = a.dot(b)
ab  # 1x2, as expected.

array([ 30201, 302010])

In [32]:
# Dimensions must match:
wrong_dim = np.array([[1], [2], [3]])
display(wrong_dim)
fails = wrong_dim.dot(b)

array([[1],
       [2],
       [3]])

ValueError: shapes (3,1) and (3,2) not aligned: 1 (dim 1) != 3 (dim 0)

### Matrix indexing

In [36]:
display(mx_2)
display(mx_2[0, 1])  # In 2D, can view as: matrix[ROW, COLUMN]
display(mx_2[2, 0])
display(mx_2[1:3])  # Slice rows
display(mx_2[0:2, 0])  # Slice rows, take columns

array([[1, 2],
       [3, 4],
       [5, 6]])

2

5

array([[3, 4],
       [5, 6]])

array([1, 3])

### Matrix aggregation

In [37]:
# Aggregations across *all* dimensions.
display(mx_2.sum())
display(mx_2.max())
display(mx_2.std())

21

6

1.707825127659933

In [39]:
# Aggregations along axes.
display(mx_2.sum(axis=0))
display(mx_2.sum(axis=1))

array([ 9, 12])

array([ 3,  7, 11])

### Transposing and reshaping

In [42]:
mx_2_T = mx_2.T
display(mx_2)
display(mx_2_T)

array([[1, 2],
       [3, 4],
       [5, 6]])

array([[1, 3, 5],
       [2, 4, 6]])

In [44]:
# Reshaping:
data_as_1d = np.array([1, 2, 3, 4, 5, 6])
display(data_as_1d)
data_as_2d_2by3 = data_as_1d.reshape(2, 3)
display(data_as_2d_2by3)
data_as_2d_3by2 = data_as_1d.reshape(3, 2)
display(data_as_2d_3by2)

array([1, 2, 3, 4, 5, 6])

array([[1, 2, 3],
       [4, 5, 6]])

array([[1, 2],
       [3, 4],
       [5, 6]])

In [47]:
# Using -1 to infer the last dimension:
data_as_2d_2byX = data_as_1d.reshape(2, -1)
data_as_2d_Xby2 = data_as_1d.reshape(-1, 2)
display(data_as_2d_2byX)
display(data_as_2d_Xby2)

array([[1, 2, 3],
       [4, 5, 6]])

array([[1, 2],
       [3, 4],
       [5, 6]])

## More than 2D arrays

In [49]:
arr_3d_1 = np.array([ [[1, 2], [3, 4]], [[5, 6], [7, 8]] ])
arr_3d_1

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

![array_3d_1](http://jalammar.github.io/images/numpy/numpy-3d-array.png)

In [53]:
arr_3d_ones_4x3x2 = np.ones((4, 3, 2))
arr_3d_ones_4x3x2

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

![arr_3d_ones_4x3x2](http://jalammar.github.io/images/numpy/numpy-3d-array-creation.png)

**Note the difference in representation vs print() above!**

The FIRST dimension (as in the argument order) is always the highest level goruping when using print()

The image above doesn't really show that representation correctly.

## Practical usage

In [54]:
predictions = np.array([1, 4, 8])
labels = np.array([1, 3, 8])
display(predictions, labels)

array([1, 4, 8])

array([1, 3, 8])

In [56]:
n = predictions.shape[0]
n

3

In [59]:
mse = (1/n) * np.sum(np.square(predictions - labels))  # Note: np.square(), not np.sqr)()
mse

0.3333333333333333