# Chapter 2: Linear Algebra

Material: http://www.deeplearningbook.org/contents/linear_algebra.html

Slides: http://www.deeplearningbook.org/slides/02_linear_algebra.pdf

In [1]:
import numpy as np

## 2.1 Scalars, Vectors, Matrices and Tensors

https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html

#### Row Vector

In [2]:
x = np.array([[1, 2, 3]])
x

array([[1, 2, 3]])

In [3]:
type(x)

numpy.ndarray

#### Column Vector (the book uses column vectors)

In [4]:
y = np.array([[3],
              [4],
              [5]])
y

array([[3],
       [4],
       [5]])

#### Matrix

In [5]:
A = np.array([
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]
    ])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

#### Transpose: A<sup>T</sup>

In [6]:
A.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

#### Column vector defined via row vector and transpose

In [7]:
y = np.array([[3,4,5]]).T
y

array([[3],
       [4],
       [5]])

#### Scalar Multiplication/Addition

In [8]:
3 * A + 7

array([[10, 13, 16],
       [19, 22, 25],
       [28, 31, 34]])

#### Matrix Addition

In [9]:
B = np.ones((3, 3), dtype=int)
B

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [10]:
A + B

array([[ 2,  3,  4],
       [ 5,  6,  7],
       [ 8,  9, 10]])

## 2.2 Multiplying Matrices and Vectors

#### Element-wise or Hadamard Product

In [11]:
A *= 2
A

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

A * B

In [12]:
A * A

array([[  4,  16,  36],
       [ 64, 100, 144],
       [196, 256, 324]])

#### Dot Prodcut (the usual meaning of matrix multiplication: C = AB)

In [13]:
A.dot(B)

array([[12, 12, 12],
       [30, 30, 30],
       [48, 48, 48]])

#### Matrix multiplication is distributive: A(B + C) = AB + AC

In [14]:
A = np.array([
        [1, 5],
        [3, 0]
    ])
B = np.array([
        [3, 2],
        [4, 3]
    ])
C = np.array([
        [4, 0],
        [2, 1]
    ])

In [15]:
A.dot(B + C)

array([[37, 22],
       [21,  6]])

In [16]:
A.dot(B) + A.dot(C)

array([[37, 22],
       [21,  6]])

#### Matrix multiplication is associative: A(BC) = (AB)C

In [17]:
A.dot(B.dot(C))

array([[126,  17],
       [ 48,   6]])

In [18]:
(A.dot(B)).dot(C)

array([[126,  17],
       [ 48,   6]])

#### Matrix multiplication is NOT commutative: AB = BA is not always true

In [19]:
A.dot(B)

array([[23, 17],
       [ 9,  6]])

In [20]:
B.dot(A)

array([[ 9, 15],
       [13, 20]])

####  Dot product between two vectors IS commutative: x<sup>T</sup>y = y<sup>T</sup>x

In [21]:
x = np.array([[1], [2], [3]])
y = np.array([[4], [5], [6]])

In [22]:
x

array([[1],
       [2],
       [3]])

In [23]:
x.T

array([[1, 2, 3]])

In [24]:
y

array([[4],
       [5],
       [6]])

In [25]:
y.T

array([[4, 5, 6]])

In [26]:
x.T.dot(y)

array([[32]])

In [27]:
y.T.dot(x)

array([[32]])

#### Transpose: (AB)<sup>T</sup> = B<sup>T</sup>A<sup>T</sup>

In [28]:
A.dot(B).T

array([[23,  9],
       [17,  6]])

In [29]:
(B.T).dot(A.T)

array([[23,  9],
       [17,  6]])

## 2.3 Identity and Inverse Matrices

#### Identity Matrix: I<sub>n</sub>

https://docs.scipy.org/doc/numpy/reference/generated/numpy.eye.html

In [30]:
np.eye(3, dtype=int)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

#### Inverse Matrix: A<sup>-1</sup>

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.inv.html

In [31]:
from numpy.linalg import inv

In [32]:
inv(A)

array([[ 0.        ,  0.33333333],
       [ 0.2       , -0.06666667]])

#### A<sup>-1</sup>A = I<sub>n</sub>

In [33]:
inv(A).dot(A)

array([[ 1.,  0.],
       [ 0.,  1.]])

## 2.4 Linear Dependence and Span

In [34]:
x

array([[1],
       [2],
       [3]])

In [35]:
y

array([[4],
       [5],
       [6]])

In [36]:
2 * x + 5 * y

array([[22],
       [29],
       [36]])

#### Linearly Independent Vectors

In [37]:
u1 = np.array([[1, 0]])
u2 = np.array([[0, 1]])

In [38]:
2 * u1 + 4 * u2

array([[2, 4]])

#### Linearly Dependent Vectors

In [39]:
v1 = np.array([[2, 1]])
v2 = np.array([[4, 2]])

In [40]:
2 * v1

array([[4, 2]])

## 2.5 Norms

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.norm.html

In [41]:
from numpy.linalg import norm

In [42]:
norm(u1)

1.0

In [43]:
norm(u1 + u2)

1.4142135623730951

In [44]:
import math

In [45]:
math.sqrt((u1[0][0] - u2[0][0]) **  2 + (u1[0][1] - u2[0][1]) ** 2)

1.4142135623730951

## 2.6 Special Kinds of Matrices and Vectors

#### Symmetric: A<sup>T</sup> = A

In [46]:
A = np.array([
    [1,2,3],
    [2,1,2],
    [3,2,1]
])

In [47]:
A

array([[1, 2, 3],
       [2, 1, 2],
       [3, 2, 1]])

In [48]:
A.T

array([[1, 2, 3],
       [2, 1, 2],
       [3, 2, 1]])

#### Identity

In [49]:
np.eye(3, dtype=int)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

## 2.7 Eigendecomposition

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html

In [50]:
from numpy.linalg import eig

In [51]:
A

array([[1, 2, 3],
       [2, 1, 2],
       [3, 2, 1]])

In [52]:
w, V = eig(A)

#### Eigenvalues

In [53]:
w

array([ 5.70156212, -2.        , -0.70156212])

#### Eigenvectors

In [54]:
V

array([[ -6.05912800e-01,  -7.07106781e-01,   3.64512933e-01],
       [ -5.15499134e-01,   5.28883617e-17,  -8.56890100e-01],
       [ -6.05912800e-01,   7.07106781e-01,   3.64512933e-01]])

#### A = V diag(w) V<sup>T</sup>

In [55]:
np.diag(w)

array([[ 5.70156212,  0.        ,  0.        ],
       [ 0.        , -2.        ,  0.        ],
       [ 0.        ,  0.        , -0.70156212]])

In [56]:
V.dot(np.diag(w)).dot(V.T)

array([[ 1.,  2.,  3.],
       [ 2.,  1.,  2.],
       [ 3.,  2.,  1.]])

#### The matrix is singular if and only if any of the eigenvalues are zero.

In [57]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [6, 9, 12]
])

In [58]:
# Third row is a linear combination of the first two
2 * A[0] + A[1]

array([ 6,  9, 12])

In [59]:
A[2]

array([ 6,  9, 12])

In [60]:
w, V = eig(A)

In [61]:
# The 3rd eigenvalue is basically 0
w

array([  1.81651514e+01,  -1.65151390e-01,   1.06387725e-15])

In [62]:
V

array([[-0.20452947, -0.5443286 ,  0.40824829],
       [-0.45672051,  0.78003936, -0.81649658],
       [-0.86577946, -0.30861783,  0.40824829]])

## 2.8 SVD

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.svd.html

In [63]:
from numpy.linalg import svd

In [64]:
A = np.array([
    [1,2,3],
    [2,1,2],
    [3,2,1]
])
U, s, V = svd(A)

In [65]:
U

array([[ -6.05912800e-01,   7.07106781e-01,   3.64512933e-01],
       [ -5.15499134e-01,   5.22029675e-17,  -8.56890100e-01],
       [ -6.05912800e-01,  -7.07106781e-01,   3.64512933e-01]])

In [66]:
s

array([ 5.70156212,  2.        ,  0.70156212])

In [67]:
D = np.diag(s)
D

array([[ 5.70156212,  0.        ,  0.        ],
       [ 0.        ,  2.        ,  0.        ],
       [ 0.        ,  0.        ,  0.70156212]])

In [68]:
V

array([[ -6.05912800e-01,  -5.15499134e-01,  -6.05912800e-01],
       [ -7.07106781e-01,  -5.88338788e-17,   7.07106781e-01],
       [ -3.64512933e-01,   8.56890100e-01,  -3.64512933e-01]])

#### A = UDV<sup>T</sup> (or A = UDV with numpy.linalg.svd)

In [69]:
U.dot(D).dot(V)

array([[ 1.,  2.,  3.],
       [ 2.,  1.,  2.],
       [ 3.,  2.,  1.]])

In [70]:
A

array([[1, 2, 3],
       [2, 1, 2],
       [3, 2, 1]])

## 2.9 The Moore-Penrose Pseudoinverse

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.pinv.html

In [71]:
from numpy.linalg import pinv

In [72]:
A = np.array([
    [1, 2, 3],
    [4, 0, 6]
])

In [73]:
pinv(A)

array([[-0.14754098,  0.13934426],
       [ 0.42622951, -0.18032787],
       [ 0.09836066,  0.07377049]])

In [74]:
A = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

In [75]:
pinv(A)

array([[-1.33333333, -0.33333333,  0.66666667],
       [ 1.08333333,  0.33333333, -0.41666667]])

## 2.10 Trace

#### The trace operator gives the sum of all of the diagonal entries of a matrix

In [76]:
A = np.array([
        [1, 5],
        [3, 0]
    ])
B = np.array([
        [3, 2],
        [4, 3]
    ])
C = np.array([
        [4, 0],
        [2, 1]
    ])

In [77]:
np.trace(A)

1

In [78]:
np.trace(B)

6

In [79]:
np.trace(C)

5

#### Tr(ABC) = Tr(CAB) = Tr(BCA)

In [80]:
np.trace(A.dot(B).dot(C))

132

In [81]:
np.trace(C.dot(A).dot(B))

132

In [82]:
np.trace(B.dot(C).dot(A))

132

#### Tr(AB) = Tr(BA) for m x n A and n x m B even if m ≠ n

In [83]:
A = np.array([
        [1, 2],
        [3, 4],
        [5, 6]
    ])
B = np.array([
        [9, 8, 7],
        [6, 5, 4]
    ])

In [84]:
np.trace(A.dot(B))

124

In [85]:
np.trace(B.dot(A))

124

## 2.11 Determinant

#### The determinant is equal to the product of all theeigenvalues of the matrix.

https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.det.html

In [86]:
from numpy.linalg import det

In [87]:
A = np.array([
        [1, 5],
        [3, 0]
    ])

In [88]:
val, Q = eig(A)
val, Q

(array([ 4.40512484, -3.40512484]), array([[ 0.82653237, -0.75033198],
        [ 0.56288918,  0.66106121]]))

In [89]:
det(A)

-15.0

In [90]:
val[0] * val[1]

-15.0

## 2.12 Example: Principal Components Analysis

http://alexhwilliams.info/itsneuronalblog/2016/03/27/pca/#some-things-you-maybe-didnt-know-about-pca