## PCA with singular value decomposition (Without SVD libraries).

In [9]:
# Importing all the neccasary libraries
import numpy as np
import pandas as pd
import seaborn as sb
from numpy.linalg import multi_dot
from sklearn.preprocessing import StandardScaler
import statistics
import scipy
import time

start = time.process_time()

#Reading the provided dataset
df = pd.read_csv('DataB.csv')

X = df.iloc[:,1:784].values
y = df.iloc[:,785].values

# Centering of the training data ( Crucial step before performing PCA, as we need training data with zero expectation)
X= X.astype(float)
y= y.astype(float)

meanPoint = X.mean()

# Obtaining Centered matrix
m = X - meanPoint
m_t = m.T

# When it comes to SVD we need to tabulate the left and right singular matrices
s,U = np.linalg.eig(m.dot(m_t)) #computation of left singular matrix 
s = s.real
U = U.real
U_transpose=U.T

#Sorting out the top 20 eigenvalues 
idx = s.argsort()[::-1]   
s = s[idx[0:20]]
sigma = scipy.linalg.sqrtm(np.diag(s))
sigma = sigma.real
U = U[:,idx[0:20]]
U_transpose = U.T

print("===============================================")
print('Eigenvalues in descending order, for top 20 eigen-vectors:-')
print("===============================================")
print(s)
print("===============================================")
print('Left Singular Matrix :-')
print("===============================================")
print(U)
print("===============================================")

#Computation of right singular matrix 

z,V = np.linalg.eig((m_t.dot(m)))
z = z.real
V = V.real
idx = z.argsort()[::-1]   
z = z[idx[0:20]]
V = V[:,idx[0:20]]
V_transpose = V.T
print('Right Singular Matrix')
print("===============================================")
print(V_transpose)
print("===============================================")

SVD = multi_dot([U,sigma,V_transpose])

#Projection of data in PCA

X_Projected = U_transpose.dot(m)
print('PCA Projected_data:-')
print("===============================================")
print(X_Projected)
print("===============================================")

#time taken for the entire process
print('Time taken for the entire process in seconds :-')
print(time.process_time() - start)
print("===============================================")

Eigenvalues in descending order, for top 20 eigen-vectors:-
[2.87613387e+09 9.03999185e+08 5.67843707e+08 4.30183899e+08
 3.93447228e+08 3.01072918e+08 2.54144322e+08 2.11899603e+08
 1.86220206e+08 1.53945926e+08 1.43250024e+08 1.30837622e+08
 1.19055124e+08 1.11290034e+08 1.00947783e+08 9.36732324e+07
 8.33023059e+07 8.27355540e+07 7.88396661e+07 7.43448480e+07]
Left Singular Matrix :-
[[ 0.02608079 -0.03098419  0.02459456 ... -0.00326053 -0.02320769
   0.01853263]
 [ 0.02833284 -0.03053049  0.02607502 ... -0.0120465  -0.02389437
  -0.00341775]
 [ 0.03525069 -0.0098541   0.00039236 ...  0.06079212 -0.02777242
  -0.03670641]
 ...
 [ 0.01724118 -0.00524751 -0.03461332 ... -0.0013231   0.01247733
   0.00058542]
 [ 0.01360467  0.00639588 -0.02522251 ... -0.01632475 -0.0097955
   0.00940989]
 [ 0.02280473  0.02145591 -0.02320778 ...  0.03323365 -0.00653174
  -0.04841733]]
Right Singular Matrix
[[-0.02546045 -0.02544036 -0.02550589 ... -0.02546466 -0.02543407
  -0.02556298]
 [ 0.00709293  0

## Comparison :-
### Computation of SVD using SVD library for comparision with the obtained vectors

In [10]:
df = pd.read_csv('DataB.csv')

X = df.iloc[:,1:784].values
y = df.iloc[:,785].values

#Computing SVD with the SVD library
a,b,c = np.linalg.svd(m)
print("===============================================")
print('Eigenvalues in descending order, for top 20 eigen-vectors:-')
print("===============================================")
b= np.square(np.diag(b[0:20]))
b= b.dot([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
print(b)
print("===============================================")
print('Left Singular Matrix :-')
print("===============================================")
print(a)
print("===============================================")
print('Right Singular Matrix')
print("===============================================")
print(c)
print("===============================================")

Eigenvalues in descending order, for top 20 eigen-vectors:-
[2.87613387e+09 9.03999185e+08 5.67843707e+08 4.30183899e+08
 3.93447228e+08 3.01072918e+08 2.54144322e+08 2.11899603e+08
 1.86220206e+08 1.53945926e+08 1.43250024e+08 1.30837622e+08
 1.19055124e+08 1.11290034e+08 1.00947783e+08 9.36732324e+07
 8.33023059e+07 8.27355540e+07 7.88396661e+07 7.43448480e+07]
Left Singular Matrix :-
[[-2.60807859e-02  3.09841851e-02  2.45945641e-02 ... -3.52717451e-02
  -2.71579576e-03 -8.97115684e-03]
 [-2.83328350e-02  3.05304894e-02  2.60750178e-02 ...  1.24074976e-02
   1.47695924e-03  1.35266948e-02]
 [-3.52506869e-02  9.85410151e-03  3.92362518e-04 ... -4.38225561e-03
  -4.06832561e-03 -3.73096861e-02]
 ...
 [-1.72411829e-02  5.24751043e-03 -3.46133154e-02 ...  7.13589357e-01
   1.59510338e-02  8.12573500e-03]
 [-1.36046717e-02 -6.39587571e-03 -2.52225091e-02 ...  2.46999093e-02
   6.43524877e-01  3.72682925e-02]
 [-2.28047318e-02 -2.14559141e-02 -2.32077812e-02 ...  8.55771480e-03
   3.39799

### SVD performed with the SVD library is good match  with the one carried out without the library in the initial stages of the problem.
### The computed eigen vectors, have different directions as opposed to the one's tabulated with the SVD library, but the magnitude of the top 20 eigen vectors match with the top vectors computed by the SVD library.