In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
import numpy.linalg as la

## Linear Algebra

https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.linalg.html

* Vectors
    * Norm
    * Dot Product
    * Similarity
    * Projection
    * Linear Independence
    
* Matricies
    * Operations
    * System of linear equations
    * Eigenvalues eigenvectors
    * Matrix Decomposition


## Vectors

* **Definition** A vector is an object that has a magnitude and a direction

###  Magnitude of a vector 

* Length of a vector v = $(v_1,v_2,...,v_n)$ is called its **norm**
 
 <div style="font-size: 115%;">
$$ ||v|| = \sqrt{\sum_{i=1}^{n}v_i^2}$$
</div>

* Often referred to as the L2 Norm, $||v||_2$

#### Unit Vector
* Denote the unit vector for a vector v by $\hat{v}$

<div style="font-size: 115%;">
$$ \hat{v} = \frac{v}{||v||}$$
</div>

In [None]:
def Norm(v):
    return np.sqrt(np.sum(v**2))
v = np.array([1,2,3,4,5])
Norm_v = Norm(v)
Unit_v = v/Norm_v
print(f'Norm v : {Norm_v} \nUnit v: {Unit_v}\nNorm Unit v: {Norm(Unit_v)}' )

### Direction of a vector

Given a vector $v = (v_1,v_2,...,v_n)$, the direction of $v$ is the vector $(\frac{v_1}{||v||},\frac{v_2}{||v||},...,\frac{v_n}{||v||})$

In [None]:
Unit_v

### Dot product 

* Multiply vectors to get a scalar

<div style="font-size: 115%;">
$$  v = (v_1,v_2,...,v_n), w = (w_1,w_2,...,w_n)$$
</div>    

#### Algebraic Definition

<div style="font-size: 115%;">
$$  v\centerdot{ w} = \sum_i^n v_i*w_i $$
</div>

#### Geometric Definition

<div style="font-size: 115%;">
$$  v\centerdot{ w} = cos(\theta)\Vert v \Vert \Vert w\Vert$$
</div>

In [None]:
v = np.array([1,2,3,4,5])
w = np.array([2,4,6,8,10])
np.vdot(v,w),v.dot(w),w.dot(v)

In [None]:
v = np.array([2,2])
w = np.array([2,0])
np.vdot(v,w)

In [None]:
import math

round(math.cos(math.radians(45)) * la.norm(v) * la.norm(w),10)


### Measuresof similarity of two vectors

* Support Vector Machines 
* NLP: semantic similarity

In [None]:
v = np.array([1,1,1,1])

w = np.array([2,2,2,2])
np.dot(v,w)/(la.norm(v)*la.norm(w))

#### Orthogonal (perpendicular) vectors
 
* $a\cdot{b} = 0$

In [None]:
a = np.array((1,1))
b= np.array((-1,1))
adotb = np.vdot(a,b)
adotb

### Projection

![](proj.png)

#### Vector Projection of x onto y

<div style="font-size: 115%;">
$$ proj_y x = \frac{x\cdot{y}}{||y||^2}y$$
</div>

#### Scalar Projection of x onto y

* The length of the projection

<div style="font-size: 115%;">
$$ proj_y x = \frac{x\cdot{y}}{||y||}$$
</div>

* **$x\cdot{y}$ is the length of the projection of x onto the unit vector $\hat{y}$** ( $\frac{y}{||y||}$ )

In [None]:
y = np.array([3,0])
x = np.array([2,1])
unit_y = y/Norm(y)
np.vdot(x,y)/Norm(y)

In [None]:
theta = np.arcsin(1/Norm(x))
Norm(x)*np.cos(theta)                  

### Linear Independence

* A set of  vectors, $(v_1,v_2...v_n)$ are linearly dependent if one of the vectors can be expressed as a linear combination of the others.
    - At least two of the vectors lie on the same line (they differ in magnitude only)

* Vectors that are not linearly dependent are linearly independent.
* A set of vectors are linearly independent if:
<div style="font-size: 115%;"> 
$$ a_1 v_1 + a_2 v_2 + ...+ a_n v_n = 0 \text{ iff all } a_i = 0$$
</div>


In [None]:
# Not linear independent since 2v-1w = 0, 
v = np.array([1,2,3,4])
w = np.array([2,4,6,8])

np.sum(2*v + (-1*w))


In [None]:
v = np.array([1,2,3,4])
w = np.array([3,5,7,9])
np.sum(2*v + (-1*(w - 1)))

In [None]:
print(f'cosine of angle between the vectors: {np.round(np.dot(v,w)/(la.norm(v)*la.norm(w)),2)}')

## Matricies


### Matrix Multiplication Operations

#### Matrix Multiplication

* Must be compatible: number of columns of 1st matrix = number of rows of second matrix
* Result: number of rows of 1st matrix and number of columns of 2nd 
    

In [None]:
# numpy matmul
A = np.array([1,2,3,4,5,6,7,8]).reshape(4,2)
B = np.array([1,2,3,4,5,6]).reshape(2,3)
# 4x2 * 2x3 = 4x3
M = np.matmul(A,B)
print("Shape: ", M.shape)
print("M\n",M)

In [None]:
# numpy dot product
np.dot(A,B)

In [None]:
# Pure Python
print(A@B)

#### Matrix by a scalar

In [None]:
A*2

In [None]:
np.dot(A,2)

#### Hadamard Product

* Element by element multiplication

* Often denoted as $A \circ B$

* In python, it is the * operator



In [None]:
A = np.array( [[1,1],[0,1]] )
B = np.array( [[2,0],[3,4]] )

print("A\n",A)
print("B\n",B)
M = A*B # Hadamard
print("Shape of A*B: ", M.shape)
print("M\n", M)

* Matrices must have same shape or can broadcast one into the other

In [None]:
C = np.array([5]*8).reshape(4,2)
A*C

In [None]:
D = np.array([[5],[6]]).reshape(2,1)
print("A\n",A)
print("D\n",D)
print(A.shape,D.shape)
print(A*D)

#### Matrix times a vector

* Matrix as a transformation operator on a vector
    - Changes orientation and length of a vector (rotates and stretches or shrinks a vector
* np.matmul or np.dot

In [None]:
A= np.arange(10).reshape(5,2)
v = np.array([11,12])
print("A\n",A)
print("v\n",v)
b = A.dot(v)
print("b\n",b)

In [None]:
print(np.matmul(A,v))

### Diagonal and Trace

* Diagonal: $a_{ij}$ where i=j elements
* trace: sum of diagonal elements

In [None]:
A = np.array([1,2,3])
np.diag(A)


In [None]:
np.eye(5)

In [None]:
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
print("A\n",A)
print(f'Trace of A: {np.trace(A)}')

### Transpose

In [None]:
A = np.array([[1,2,3],[4,5,6]])
print("A\n",A)
print("A-transpose\n",A.transpose())
print("<AA-transpose>\n",np.dot(A,A.transpose())) #Notice its a square matrix


### Determinant
 
* Only for square matrices
* For 2x2 matrix $\begin{bmatrix}a & b \\ c & d\end{bmatrix}$, det = ad-bc 
* If non-zero, then matrix has an inverse

In [None]:
A = np.arange(4).reshape(2,2)
la.det(A)

###  Matrix Inversion ($A^{-1}$)

$$ AA^{-1} = I$$

#### Must be a square matrix

* To be invertible, A must have non-zero determinant
    - If det(A) = 0 then A is called a singular matrix

In [None]:
A = np.arange(9).reshape(3,3)
A[0,0] = 1
A[2,2] = 1
print("A\n",A)
B = la.inv(A)
print("A Inverse\n",B)
print("AA-inverse\n",np.round(np.matmul(A,B)))


In [None]:
A = np.array([[1,3],[2,6]])
la.inv(A)

#### Moore-Penrose Inverse (for real-valued matrices)

* For any mxn matrix (not necessarily square) A that has full rank (i.e. independent rows or columns)
* If columns of A are linearly independent then:
$$ A^+ = (A^TA)^{-1}A^T$$
$$\text{Left Inverse } A^+A = I$$
* If the rows of A are linearly independent:
$$ A^+ = A^T(AA^T)^{-1}$$
$$\text{Right Inverse } AA^+ = I$$

In [None]:
A = np.array([[1,2,3,4],
              [5,7,9,10]]).reshape(4,2)

print("Linearly independent columns\n",A)

AT_A = np.matmul(A.T,A)
A_plus = np.matmul(la.inv(AT_A),A.T)
print(f'A+\n{A_plus}')
I = np.matmul(A_plus,A).round(2)
I

In [None]:
A_plus = la.pinv(A)
I = np.matmul(A_plus,A)

print(f'Left inverse: {A_plus}\nI = {I.round(4)}')

### Linear operations on vectors (in a vector space)
 
* Matrices are linear operators acting on column vectors
       
#### Linear system of equations
<div style="font-size: 115%;"> 
$$  Ax = b$$
</div>

$$
\begin{bmatrix}
    1 & 2 & 3 \\
    4 & 5 & 6 \\
    7 & 8 & 9
\end{bmatrix}
\begin{bmatrix} x_1 \\ x_2 \\ x_3\end{bmatrix}
=
\begin{bmatrix} 21 \\ 32 \\ 43\end{bmatrix}
$$
  
* To solve $x = A^{-1}b$ you must find $A^{-1}$ 
* Finding the inverse means solving $AA^{-1} = I$
* This is inefficient (and can induce numerical error) because you still had to solve a linear system of equations
* More efficient algorithms use matrix decomposition such as LU decomposition
    - Decomposes A into an Upper Triangular (U) and a Lower Triangular Matrix (L)
    - Solves L(U(x)) = b
   


In [None]:
A = np.array((2,4,3,6,16,10,4,12,9)).reshape(3,3)
b = np.array((21,32,43))
x = la.solve(A,b)
print("Solution\n",x)

### Eigenvalues and Eigenvectors

<div style="font-size: 115%;">
$$Ax =  \lambda x$$
</div>

* $\lambda$ is an eigenvalue, x is an eigenvector
* The eigenvectors of an linear operator(matrix) are those vectors that don't change direction under the linear transformation.    
* They stretch (or shrink) by the amount indicated by the eigenvalue.  
* nxn matrices only   
* A nxn matrix will have n eigenvectors  
* Sum of eigenvalues = trace of A, 
* Product of eigenvalues = det(A) 
*   Many uses of eigenvalues/eigenvectors in statistics, machine learning, etc
    - Principal Component Analysis
    
#### To find all n eigenvalues and eigenvectors solve:
 <div style="font-size: 115%;">   
$$(A - \lambda I)x = 0$$ 
</div>

* $\lambda$ is the vector of eigenvalues
* x is the matrix of eigenvectors (i.e. the columns of x)

* numpy linear algebra module has function eig
     

In [None]:
A = np.array([i for i in range(9)]).reshape(3,3)
print("A\n",A)
E = la.eig(A)

# Eigenvalues
print("The eigenvalues are: ", np.round(E[0],5))

# Eigenvectors

print("The eigenvecors are: \n" , np.round(E[1],3))

In [None]:
e,v = la.eig(A)
print("Eigenvalues\n",e)
print("Eigenvectors\n",v)

In [None]:
for i in range(len(E[0])):
    print(f"Eigenvalue {i+1}: {E[0][i]}, Eigenvector: {E[1][:,i]} \n")

##### Positive Definite Matrices
* Symmetric
* A matrix is a positive definite matrix its eigenvalues are all > 0
    - $\ge0$ for semi-definite
* A positive definite matrix guarantees that:

<div style="font-size: 115%;"> 
$$x^tAx > 0 \text{ for all non-zero x} \in R^n $$
</div>
* A positive semi-definite matrix guarantees:

<div style="font-size: 115%;"> 
$$x^tAx \ge 0 \text{ for all non-zero x} \in R^n $$  
</div>

* For 2x2 matrix, 
$\begin{bmatrix}a & b \\b & c\end{bmatrix}\text{ Positive Definite if } ac-b^2 > 0 \text{ for a > 0}$
 
* Generalization of positive real numbers to matricies
    - Positive time a positive is positive
    - Can take square roots



### Eigendecomposition

* If an n×n matrix A has n linearly independent eigenvectors, then A may be decomposed as follows:

<div style="font-size: 115%;"> 
$$A = B \Lambda B^{-1}$$
</div>

$\Lambda$ is a diagonal matrix of the eigenvalues
B is a matrix whose columns are the independent eigenvectors



In [None]:
A = np.array([[0,1,1],[2,1,0],[3,4,5]])
print("A\n",A)
u, V = la.eig(A)
print(f'B\n {V}\nLAMBDA\n {np.diag(u)}\nB-inverse\n {la.inv(V)}')

#### $B \Lambda B^{-1}$

In [None]:
print(np.dot(V,np.dot(np.diag(u), la.inv(V))).round(5))


### Singular Value Decomposition
$$A = UDV^t$$

* Where U and V are orthogonal, i.e. 
      
$$U^{-1}=U^t$$ 

$$V^{-1} = V^t$$

* D is a diagonal matrix, the singular values of A
* Used in Latent Semantic Analysis, Principal Component Analysis

In [None]:
X = np.array((1,1/2,1/3,1/2,1/3,1/4,1/3,1/4,1/5)).reshape(3,3)
print("X\n",X)
S = la.svd(X)
print("U: \n", np.round(S[0],3))
print("D: \n", np.round(S[1],3))
print("V-transpose: \n", np.round(S[2],3))

### Exercises

Show that Linear Regression has a closed form soluition by calculating the Coefficients for the data below using the Normal equation

In [None]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error

In [None]:
howell = pd.read_csv("Howell.csv",sep=';')


adult = howell.query("age > 17")
adult.tail()

In [None]:
X = adult.loc[:,['weight','age']].values
y = adult.loc[:,'height'].values

model = LinearRegression().fit(X, y)
print(f'Intercept: {model.intercept_} \nCoefficients:: {model.coef_} ')

In [None]:
# Your code here

#### Show that the sum of the eigenvalues equals the trace of the matrix A

In [None]:
# Your code here

In [None]:
# Your code here

#### Show that the product of the eigenvalues equals the determinant of the matrix A

In [None]:
np.isclose(np.prod(e) - la.det(A),0)