## Eigenvectors and Eigenvalues
### Use numpy only
#### Find eigenvalues and eigenvevtors of the following:

$$\begin{bmatrix} 1&0 \\ 0&2 \end{bmatrix}$$
$$\begin{bmatrix} 1&2 \\ 3&-4 \end{bmatrix}$$

In [1]:
import numpy as np

In [8]:
a = np.array([[1,0],
            [0,2]])

In [6]:
val,vec = np.linalg.eig(a)

In [7]:
val, vec

(array([1., 2.]),
 array([[1., 0.],
        [0., 1.]]))

In [12]:
a = np.array([[1,2],
            [3,-4]])

In [13]:
val,vec = np.linalg.eig(a)

In [14]:
val, vec

(array([ 2., -5.]),
 array([[ 0.89442719, -0.31622777],
        [ 0.4472136 ,  0.9486833 ]]))

# PCA Using Eigen Decomposition

### Create a matrix contains the following data

In [26]:
x= np.array([[1,2,3,4],
        [5,5,6,7],
        [1,4,2,3],
        [5,3,2,1],
        [8,1,2,2]])
x

array([[1, 2, 3, 4],
       [5, 5, 6, 7],
       [1, 4, 2, 3],
       [5, 3, 2, 1],
       [8, 1, 2, 2]])

### Step 1: Standardize the dataset (Subtract mean and divide by standard deviation).

In [28]:
mean = x.mean(axis=0)
std = x.std(axis=0)
mean, std

(array([4. , 3. , 3. , 3.4]),
 array([2.68328157, 1.41421356, 1.54919334, 2.05912603]))

In [29]:
x = (x - mean) / std
x

array([[-1.11803399, -0.70710678,  0.        ,  0.29138576],
       [ 0.372678  ,  1.41421356,  1.93649167,  1.74831455],
       [-1.11803399,  0.70710678, -0.64549722, -0.19425717],
       [ 0.372678  ,  0.        , -0.64549722, -1.16554303],
       [ 1.49071198, -1.41421356, -0.64549722, -0.6799001 ]])

### Step 2: Calculate the covariance matrix for the features in the dataset.
#### Use the formula (X.T@X) / n then confirm using np.cov()

In [39]:
con_m  = np.dot(x.T,x)/x.shape[0]

In [42]:
np.cov(x.T,ddof=0)

array([[ 1.        , -0.31622777,  0.04811252, -0.18098843],
       [-0.31622777,  1.        ,  0.63900965,  0.61812254],
       [ 0.04811252,  0.63900965,  1.        ,  0.94044349],
       [-0.18098843,  0.61812254,  0.94044349,  1.        ]])

Using np.cov():
[[ 1.         -0.31622777  0.04811252 -0.18098843]
 [-0.31622777  1.          0.63900965  0.61812254]
 [ 0.04811252  0.63900965  1.          0.94044349]
 [-0.18098843  0.61812254  0.94044349  1.        ]]


### Step 3: Calculate the eigenvalues and eigenvectors for the covariance matrix.
### Step 4: Sort eigenvalues and their corresponding eigenvectors.

In [43]:
con_m = np.linalg.eig(con_m)
con_m

(array([2.51579324, 1.0652885 , 0.39388704, 0.02503121]),
 array([[ 0.16195986, -0.91705888, -0.30707099,  0.19616173],
        [-0.52404813,  0.20692161, -0.81731886,  0.12061043],
        [-0.58589647, -0.3205394 ,  0.1882497 , -0.72009851],
        [-0.59654663, -0.11593512,  0.44973251,  0.65454704]]))

In [44]:
con_m[0]

array([2.51579324, 1.0652885 , 0.39388704, 0.02503121])

In [45]:
con_m[1]

array([[ 0.16195986, -0.91705888, -0.30707099,  0.19616173],
       [-0.52404813,  0.20692161, -0.81731886,  0.12061043],
       [-0.58589647, -0.3205394 ,  0.1882497 , -0.72009851],
       [-0.59654663, -0.11593512,  0.44973251,  0.65454704]])

### Step 5: Pick k eigenvalues and form a matrix of eigenvectors.

#### Select the first 2 eigen vectors

In [49]:
con_m[1][:,:2]

array([[ 0.16195986, -0.91705888],
       [-0.52404813,  0.20692161],
       [-0.58589647, -0.3205394 ],
       [-0.59654663, -0.11593512]])

array([[ 0.16195986, -0.91705888],
       [-0.52404813,  0.20692161],
       [-0.58589647, -0.3205394 ],
       [-0.59654663, -0.11593512]])

### Step 6:Transform the original matrix.

In [61]:
np.dot(x,con_m[1][:,:2])

array([[ 1.56561741e-02,  8.45205482e-01],
       [-2.85829190e+00, -8.72549250e-01],
       [-5.75566043e-02,  1.40104719e+00],
       [ 1.13385419e+00,  2.66995807e-04],
       [ 1.76633814e+00, -1.37397042e+00]])

Transformed Data:
[[ 0.01565617  0.84520548]
 [-2.8582919  -0.87254925]
 [-0.0575566   1.40104719]
 [ 1.13385419  0.000267  ]
 [ 1.76633814 -1.37397042]]


## SVD
### Repeat using SVD and compare the results

In [54]:
svd = np.linalg.svd(x)
v = svd[2]
v

array([[ 0.16195986, -0.52404813, -0.58589647, -0.59654663],
       [-0.91705888,  0.20692161, -0.3205394 , -0.11593512],
       [-0.30707099, -0.81731886,  0.1882497 ,  0.44973251],
       [ 0.19616173,  0.12061043, -0.72009851,  0.65454704]])

In [80]:
(svd[1]/svd[1].sum(axis=0))[:2].sum()


0.7691537684455487

In [33]:
svd[1]

V:
[[ 0.16195986 -0.91705888 -0.30707099  0.19616173]
 [-0.52404813  0.20692161 -0.81731886  0.12061043]
 [-0.58589647 -0.3205394   0.1882497  -0.72009851]
 [-0.59654663 -0.11593512  0.44973251  0.65454704]]


In [56]:
svd[1]

array([3.54668383, 2.30790869, 1.40336568, 0.35377402])

In [82]:
(3.54668383*3.54668383)/5

2.515793237996694

In [58]:
svd[0]

array([[ 4.41431344e-03,  3.66221370e-01,  7.49835326e-01,
        -3.21884149e-01,  4.47213595e-01],
       [-8.05905470e-01, -3.78069225e-01, -8.51411283e-02,
        -1.81940451e-02,  4.47213595e-01],
       [-1.62282873e-02,  6.07063528e-01, -3.16021938e-01,
         5.75621968e-01,  4.47213595e-01],
       [ 3.19694183e-01,  1.15687336e-04, -5.41652025e-01,
        -6.35931364e-01,  4.47213595e-01],
       [ 4.98025260e-01, -5.95331360e-01,  1.92979765e-01,
         4.00387590e-01,  4.47213595e-01]])

In [63]:
np.dot(x,svd[2][:2].T)

array([[ 1.56561741e-02,  8.45205482e-01],
       [-2.85829190e+00, -8.72549250e-01],
       [-5.75566043e-02,  1.40104719e+00],
       [ 1.13385419e+00,  2.66995807e-04],
       [ 1.76633814e+00, -1.37397042e+00]])

Transformed Data:
[[ 0.01565617  0.84520548]
 [-2.8582919  -0.87254925]
 [-0.0575566   1.40104719]
 [ 1.13385419  0.000267  ]
 [ 1.76633814 -1.37397042]]
