In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# STEP 1 : STANDARDIZATION

features = np.array([[1, 2, 3, 4],
                     [5, 5, 6, 7],
                     [1, 4, 2, 3],
                     [5, 3, 2, 1],
                     [8, 1, 2, 2]])

new_features = (features - features.mean(axis=0)) / (features.std(axis=0, ddof=1))

new_features

array([[-1.        , -0.63245553,  0.        ,  0.26062335],
       [ 0.33333333,  1.26491106,  1.73205081,  1.56374007],
       [-1.        ,  0.63245553, -0.57735027, -0.1737489 ],
       [ 0.33333333,  0.        , -0.57735027, -1.04249338],
       [ 1.33333333, -1.26491106, -0.57735027, -0.60812114]])

In [3]:
# STEP 2 : CALCULATE COVARIANCE MATRIX COMPUTATION

covar_mat = np.cov(new_features.T, ddof=0)

covar_mat

array([[ 0.8       , -0.25298221,  0.03849002, -0.14479075],
       [-0.25298221,  0.8       ,  0.51120772,  0.49449803],
       [ 0.03849002,  0.51120772,  0.8       ,  0.75235479],
       [-0.14479075,  0.49449803,  0.75235479,  0.8       ]])

In [4]:
#STEP 3 : COMPUTE THE EIGENVECTORS AND EIGENVALUES OF THE COVARIANCE MATRIX 

eigenValues, eigenVectors = np.linalg.eig(covar_mat)

for i in range(len(eigenValues)):
    eigvec = eigenVectors[:, i].reshape(4,1)
    print(f'Eigenvector for Feature {i+1}: \n{eigvec}')
    print(f'Eigenvalue: {eigenValues[i]}')
    print("-"*40)

Eigenvector for Feature 1: 
[[ 0.16195986]
 [-0.52404813]
 [-0.58589647]
 [-0.59654663]]
Eigenvalue: 2.0126345926479567
----------------------------------------
Eigenvector for Feature 2: 
[[-0.91705888]
 [ 0.20692161]
 [-0.3205394 ]
 [-0.11593512]]
Eigenvalue: 0.8522308028065159
----------------------------------------
Eigenvector for Feature 3: 
[[-0.30707099]
 [-0.81731886]
 [ 0.1882497 ]
 [ 0.44973251]]
Eigenvalue: 0.31510963504195705
----------------------------------------
Eigenvector for Feature 4: 
[[ 0.19616173]
 [ 0.12061043]
 [-0.72009851]
 [ 0.65454704]]
Eigenvalue: 0.02002496950357272
----------------------------------------


In [5]:
# STEP 4 : SORTING EIGENVALUES AND THEIR CORRESPONDING EIGENVECTORS

eig_sorted = [(np.abs(eigenValues[i]), eigenVectors[:, i]) for i in range(len(eigenValues))]
eig_sorted.sort(reverse=True)
for i in eig_sorted:
    print(f"Eigenvalue = {i[0]}, Eigenvector = {i[1]}")

Eigenvalue = 2.0126345926479567, Eigenvector = [ 0.16195986 -0.52404813 -0.58589647 -0.59654663]
Eigenvalue = 0.8522308028065159, Eigenvector = [-0.91705888  0.20692161 -0.3205394  -0.11593512]
Eigenvalue = 0.31510963504195705, Eigenvector = [-0.30707099 -0.81731886  0.1882497   0.44973251]
Eigenvalue = 0.02002496950357272, Eigenvector = [ 0.19616173  0.12061043 -0.72009851  0.65454704]


In [6]:
# STEP 5 : PIKE K VALUE

matrix_k = np.hstack((eig_sorted[0][1], eig_sorted[1][1])).reshape(2, 4)
matrix_k = np.transpose(matrix_k)
print (matrix_k)

print(features.shape , " * " , matrix_k.shape)

[[ 0.16195986 -0.91705888]
 [-0.52404813  0.20692161]
 [-0.58589647 -0.3205394 ]
 [-0.59654663 -0.11593512]]
(5, 4)  *  (4, 2)


In [7]:
# STEP 6 :TRANSFORM THE ORIGINAL MATRIX

multiplication = np.dot(new_features, matrix_k)
multiplication

array([[ 1.40033078e-02,  7.55974765e-01],
       [-2.55653399e+00, -7.80431775e-01],
       [-5.14801919e-02,  1.25313470e+00],
       [ 1.01415002e+00,  2.38808310e-04],
       [ 1.57986086e+00, -1.22891650e+00]])