<a href="https://colab.research.google.com/github/PaulToronto/Stanford-Andrew-Ng-Machine-Learning-Specialization/blob/main/2_1_5_Vectorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2.1.5 Vectorization

## Imports

In [1]:
import sympy as sym
import numpy as np

## Functions

In [2]:
def sigmoid(z):
    """
    Compute the sigmoid of z

    Parameters
    ----------
    z : array_like
        A scalar or numpy array of any size.

    Returns
    -------
     g : array_like
         sigmoid(z)
    """
    z = np.clip( z, -500, 500 )           # protect against overflow
    g = 1.0/(1.0+np.exp(-z))

    return g

## 2.1.4.1 How neural networks are implemented efficiently

### Forward propagation code for a single layer

<img src='https://drive.google.com/uc?export=view&id=16Q6UQu5igmo1mProwbUOqlIWsGO7aM9b'>

## 2.1.4.2 Matrix multiplication

### Dot Product

In [3]:
a, b, c, d, e, f, g, h = sym.symbols('a b c d e f g h')

In [4]:
A = sym.Matrix([a, b])
B = sym.Matrix([c, d])

In [5]:
A.dot(B)

a*c + b*d

In [6]:
# alternate
A.T * B

Matrix([[a*c + b*d]])

In [7]:
# alternate
A.T @ B

Matrix([[a*c + b*d]])

In [8]:
A = A.subs({a: 1, b: 2})
B = B.subs({c: 3, d: 4})

In [9]:
A.dot(B)

11

In [10]:
A.T * B

Matrix([[11]])

In [11]:
A.T @ B

Matrix([[11]])

### Vector matrix multiplication

In [12]:
A = sym.Matrix([a, b])
A

Matrix([
[a],
[b]])

In [13]:
A.T

Matrix([[a, b]])

In [14]:
M = sym.Matrix([[c, e],
                [d, f]])
M

Matrix([
[c, e],
[d, f]])

In [15]:
A.T * M

Matrix([[a*c + b*d, a*e + b*f]])

In [16]:
sym.Matrix([A.dot(M.col(0)), A.dot(M.col(1))]).T

Matrix([[a*c + b*d, a*e + b*f]])

In [17]:
A = A.subs({a: 1, b: 2})
M = M.subs({c: 3, d: 4, e: 5, f: 6})

In [18]:
A.T * M

Matrix([[11, 17]])

### Matrix matrix multiplication

In [19]:
A = sym.Matrix([[a, b],
                [c, d]])
B = sym.Matrix([[e, f],
                [g, h]])

In [20]:
A

Matrix([
[a, b],
[c, d]])

In [21]:
B

Matrix([
[e, f],
[g, h]])

In [22]:
A * B

Matrix([
[a*e + b*g, a*f + b*h],
[c*e + d*g, c*f + d*h]])

In [23]:
sym.Matrix([[A.row(0).dot(B.col(0)), A.row(0).dot(B.col(1))],
            [A.row(1).dot(B.col(0)), A.row(1).dot(B.col(1))]])

Matrix([
[a*e + b*g, a*f + b*h],
[c*e + d*g, c*f + d*h]])

In [24]:
A = A.subs({a: 1, b: -1, c: 2, d: -2})
A

Matrix([
[1, -1],
[2, -2]])

In [25]:
A.T

Matrix([
[ 1,  2],
[-1, -2]])

In [26]:
M

Matrix([
[3, 5],
[4, 6]])

In [27]:
A.T * M

Matrix([
[ 11,  17],
[-11, -17]])

In [28]:
A.T @ M

Matrix([
[ 11,  17],
[-11, -17]])

### A way to think about matrices used for vectorization

- Think of a matrix as being a collection of column vectors
- Think of the transpose of a matrix as being a collection of row vectors

## 2.1.4.3 Matrix multiplication rules

In [29]:
W = np.array([[3, 5, 7, 9],
              [4, 6, 8, 0]])
W

array([[3, 5, 7, 9],
       [4, 6, 8, 0]])

In [30]:
Wsym = sym.Matrix(W)
Wsym

Matrix([
[3, 5, 7, 9],
[4, 6, 8, 0]])

In [31]:
A = np.array([[1, -1, 0.1],
               [2, -2, 0.2]])
A

array([[ 1. , -1. ,  0.1],
       [ 2. , -2. ,  0.2]])

In [32]:
Asym = sym.Matrix(A)
Asym

Matrix([
[1.0, -1.0, 0.1],
[2.0, -2.0, 0.2]])

Think of $Asym$ as 3 column vectors.

In [33]:
a1 = Asym.col(0)
a2 = Asym.col(1)
a3 = Asym.col(2)
display(a1, a2, a3)

Matrix([
[1.0],
[2.0]])

Matrix([
[-1.0],
[-2.0]])

Matrix([
[0.1],
[0.2]])

In [34]:
A.T

array([[ 1. ,  2. ],
       [-1. , -2. ],
       [ 0.1,  0.2]])

In [35]:
Asym.T

Matrix([
[ 1.0,  2.0],
[-1.0, -2.0],
[ 0.1,  0.2]])

Think of `Asym.T` as 3 row vectors.

In [36]:
a1T = Asym.T.row(0)
a2T = Asym.T.row(1)
a3T = Asym.T.row(2)
display(a1T, a2T, a3T)

Matrix([[1.0, 2.0]])

Matrix([[-1.0, -2.0]])

Matrix([[0.1, 0.2]])

Think of `Wsym` as 4 column vectors

In [37]:
w1 = Wsym.col(0)
w2 = Wsym.col(1)
w3 = Wsym.col(2)
w4 = Wsym.col(3)
display(w1, w2, w3, w4)

Matrix([
[3],
[4]])

Matrix([
[5],
[6]])

Matrix([
[7],
[8]])

Matrix([
[9],
[0]])

$$
Z = A^{T}W
$$

In [38]:
Z = A.T @ W
Z

array([[ 11. ,  17. ,  23. ,   9. ],
       [-11. , -17. , -23. ,  -9. ],
       [  1.1,   1.7,   2.3,   0.9]])

In [39]:
Z = np.matmul(A.T, W)
Z

array([[ 11. ,  17. ,  23. ,   9. ],
       [-11. , -17. , -23. ,  -9. ],
       [  1.1,   1.7,   2.3,   0.9]])

In [40]:
Zsym = Asym.T @ Wsym
Zsym

Matrix([
[ 11.0,  17.0,  23.0,  9.0],
[-11.0, -17.0, -23.0, -9.0],
[  1.1,   1.7,   2.3,  0.9]])

In [41]:
Asym.T.shape, Wsym.shape

((3, 2), (2, 4))

The product will be a $3 \times 4$ matrix

In [42]:
sym.Matrix([[a1T.dot(w1), a1T.dot(w2), a1T.dot(w3), a1T.dot(w4)],
            [a2T.dot(w1), a2T.dot(w2), a2T.dot(w3), a2T.dot(w4)],
            [a3T.dot(w1), a3T.dot(w2), a3T.dot(w3), a3T.dot(w4)]])

Matrix([
[ 11.0,  17.0,  23.0,  9.0],
[-11.0, -17.0, -23.0, -9.0],
[  1.1,   1.7,   2.3,  0.9]])

In [43]:
A.T @ W

array([[ 11. ,  17. ,  23. ,   9. ],
       [-11. , -17. , -23. ,  -9. ],
       [  1.1,   1.7,   2.3,   0.9]])

In [44]:
np.matmul(A.T, W)

array([[ 11. ,  17. ,  23. ,   9. ],
       [-11. , -17. , -23. ,  -9. ],
       [  1.1,   1.7,   2.3,   0.9]])

## 2.1.4.4 Matrix multiplication code

In [45]:
A = np.array([[200],
              [17]])
A.T.shape, A.T

((1, 2), array([[200,  17]]))

In [46]:
W = np.array([[1, -3, 5],
              [-2, 4, -6]])
W.shape, W

((2, 3),
 array([[ 1, -3,  5],
        [-2,  4, -6]]))

In [47]:
B = np.array([[-1, 1, 2]])
B.shape, B

((1, 3), array([[-1,  1,  2]]))

In [48]:
Z = np.matmul(A.T, W) + B
Z

array([[ 165, -531,  900]])

In [49]:
sigmoid(Z)

array([[1.00000000e+000, 7.12457641e-218, 1.00000000e+000]])

In [50]:
def dense(A, W, B):
    Z = np.matmul(A.T, W) + B
    a_out = sigmoid(Z)
    return a_out

dense(A, W, B) # [1, 0, 1]

array([[1.00000000e+000, 7.12457641e-218, 1.00000000e+000]])

In [51]:
# this is how it is implemented in the video
def dense2(AT, W, B):
    Z = np.matmul(AT, W) + B
    a_out = sigmoid(Z)
    return a_out

dense2(A.T, W, B) # [1, 0, 1]

array([[1.00000000e+000, 7.12457641e-218, 1.00000000e+000]])