# PCA Using Eigen Decomposition

### Import the required libraries

In [None]:
import numpy as np

### Create a matrix contains the following data

![image.png](attachment:image.png)

In [None]:
matrix = np.array([[1,2,3,4],
                   [5,5,6,7],
                   [1,4,2,3],
                   [5,3,2,1],
                   [8,1,2,2]])
matrix

array([[1, 2, 3, 4],
       [5, 5, 6, 7],
       [1, 4, 2, 3],
       [5, 3, 2, 1],
       [8, 1, 2, 2]])

## Step 1: Standardize the dataset.

### Define a function to calculate data mean.
#### Hint: do not use np.mean() built in function. However, you can use np.sum().

In [None]:
def get_mean(matrix):
    list_mean = []
    for col in range(matrix.shape[1]):
         list_mean.append(matrix[:,col].sum()/matrix.shape[0])
    return np.array(list_mean)

### Check your function on the first and second features

In [None]:
mean_vec = get_mean(matrix)
mean_vec

array([4. , 3. , 3. , 3.4])

In [None]:
mean_vec[0:2]

array([4., 3.])

In [None]:
matrix_mean = matrix - mean_vec
matrix_mean

array([[-3. , -1. ,  0. ,  0.6],
       [ 1. ,  2. ,  3. ,  3.6],
       [-3. ,  1. , -1. , -0.4],
       [ 1. ,  0. , -1. , -2.4],
       [ 4. , -2. , -1. , -1.4]])

### Define a function to calculate standard deviation of the data.
#### Hint: do not use np.std() built in function. However, you can use np.sum().

![image-3.png](attachment:image-3.png)

In [None]:
def get_std(matrix, mean_vec):
    res_list = []
    for col in range(matrix.shape[1]):
        vec_mean = ((matrix[:,col] - mean_vec[col])**2).sum()
        vec_mean_over_n = vec_mean/(matrix.shape[0]-1)
        res_list.append(np.sqrt(vec_mean_over_n))
        
    return np.array(res_list)

### Check your function on the given features

In [None]:
get_std(matrix, mean_vec)

array([3.        , 1.58113883, 1.73205081, 2.30217289])

In [None]:
std_vec = get_std(matrix, mean_vec)

![image.png](attachment:image.png)

### Define a function to standardize the dataset.

In [None]:
def standardize_data(matrix):
    mean_vec = get_mean(matrix)
    std_vec = get_std(matrix, mean_vec)
    new_matrix = []
    for col in range(matrix.shape[1]):
        new_sample = [((val-mean_vec[col])/std_vec[col]) for val in matrix[:,col]]
        new_matrix.append(new_sample)
        
    return np.array(new_matrix).T

### Use your function to standardize the data

In [None]:
standardize_matrix = standardize_data(matrix)

In [None]:
standardize_matrix

array([[-1.        , -0.63245553,  0.        ,  0.26062335],
       [ 0.33333333,  1.26491106,  1.73205081,  1.56374007],
       [-1.        ,  0.63245553, -0.57735027, -0.1737489 ],
       [ 0.33333333,  0.        , -0.57735027, -1.04249338],
       [ 1.33333333, -1.26491106, -0.57735027, -0.60812114]])

![image.png](attachment:image.png)

## Step 2: Calculate the covariance matrix for the features in the dataset.

### Calculate covariance matrix using the formula: ![image-2.png](attachment:image-2.png)
##### This formula is used because the data has <b>zero</b> mean and <b>1</b> standard deviation.
<b>Note:</b> with respect to this formula our matrix is already transposed <b>(5*4)</b>

In [None]:
def get_covariance_matrix(matrix_stnd):
    return (matrix_stnd.T@matrix_stnd)/(matrix_stnd.shape[0])

In [None]:
cov_matrix = get_covariance_matrix(standardize_matrix)
cov_matrix

array([[ 0.8       , -0.25298221,  0.03849002, -0.14479075],
       [-0.25298221,  0.8       ,  0.51120772,  0.49449803],
       [ 0.03849002,  0.51120772,  0.8       ,  0.75235479],
       [-0.14479075,  0.49449803,  0.75235479,  0.8       ]])

![image.png](attachment:image.png)

## Step 3: Calculate the eigenvalues and eigenvectors for the covariance matrix.
## Step 4: Sort eigenvalues and their corresponding eigenvectors.

### Find eigen values and the corresponding eigen vectors for the covariance matrix
#### You can use NumPy built in function for this purpose 

In [None]:
eigen_value, eigen_vec = np.linalg.eig(cov_matrix)

In [None]:
def get_sorted_eigen(cov_matrix):
    eigen_value, eigen_vec = np.linalg.eig(cov_matrix)
    sorted_eigen_index = eigen_value.argsort()[::-1]
    sorted_eigen_value = []
    sorted_eigen_vec = []
    for index in sorted_eigen_index:
        sorted_eigen_value.append(eigen_value[index])
        sorted_eigen_vec.append(eigen_vec[index])
        
    return sorted_eigen_value, np.array(sorted_eigen_vec)

In [None]:
eigen_value, eigen_vec = get_sorted_eigen(cov_matrix)
print(eigen_value)
print(eigen_vec)

[2.012634592647956, 0.8522308028065152, 0.3151096350419572, 0.020024969503572694]
[[ 0.16195986 -0.91705888 -0.30707099  0.19616173]
 [-0.52404813  0.20692161 -0.81731886  0.12061043]
 [-0.58589647 -0.3205394   0.1882497  -0.72009851]
 [-0.59654663 -0.11593512  0.44973251  0.65454704]]


![image.png](attachment:image.png)

## Step 5: Pick k eigenvalues and form a matrix of eigenvectors.

### Select the first eigen vectors

In [None]:
eigen_vec[:,0]

array([ 0.16195986, -0.52404813, -0.58589647, -0.59654663])

![image.png](attachment:image.png)

## Step 6:Transform the original matrix.

In [None]:
def get_pca_features(standardize_matrix, eigen_vec, n_comp):
    return standardize_matrix @ eigen_vec[0:n_comp].T

![image.png](attachment:image.png)

## Congratulations 
![image.png](attachment:image.png)

In [None]:
n_cmpt = 2

new_features = get_pca_features(standardize_matrix, eigen_vec, n_cmpt)
new_features

array([[ 0.46916343,  0.42461331],
       [-1.33112789, -1.13997971],
       [-0.59875419,  1.10584019],
       [ 0.02677683,  0.17146098],
       [ 1.43394182, -0.56193477]])

In [None]:
np.set_printoptions(suppress=True)
new_features

array([[ 0.46916343,  0.42461331],
       [-1.33112789, -1.13997971],
       [-0.59875419,  1.10584019],
       [ 0.02677683,  0.17146098],
       [ 1.43394182, -0.56193477]])