
# Question 3.



# Importing Libraries

In [151]:
import numpy as np
from scipy.linalg import orth
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds,eigs
import matplotlib.pyplot as plt

# PCA class

In [152]:
class Denoiser:

  def __init__(self,data,final_dim) :
    self.data=data
    self.mean=data.mean(axis=0)
    self.std=data.std(axis=0)
    self.dim=final_dim

  #SVD Based
  def get_svs(self) :
    zero_mean=(self.data-self.mean)/self.std
    u,s,v=svds(zero_mean,k=self.dim)
    v=v.T
    return np.fliplr(v)

  #Covariance_matrix Based
  def get_eigvs(self):
    zero_mean=(self.data-self.mean)/self.std
    l=self.data.shape[0]
    cov=(1/l)*(zero_mean.dot(zero_mean.T))
    vals,eig=eigs(cov,k=self.dim)
    eig=np.real(eig)
    vals=np.real(vals)
    return eig

  def get_dim(self,D,svs=False,eigvs=False):
    Dt=(D-self.mean)/self.std
    if svs==True :
      v=self.get_svs()
      final=Dt.dot(v)
      return final,v
    if eigvs==True :
      v=self.get_eigvs()
      final=Dt.dot(v)
      return final

  def Reconstruct(self,D,svs=False,eigvs=False) :
    if svs==True:
      Final,v=self.get_dim(D,svs=svs)
    if eigvs==True:
      Final,v=self.get_dim(D,eigvs=eigvs)
    Recons=Final.dot(v.T)*self.std+self.mean
    return Recons

In [153]:
def Reconstructor(lower_dim_data,v):
  return np.dot(lower_dim_data,v.T)
def dis_by_rec(data,rec_data):
  d=data-rec_data
  Dis=np.sum(d**2,axis=1)**0.5
  return np.sum(Dis,axis=0)

#Computes Distance

In [154]:
def get_total_distance(data,projected_data):
  L_sqaured=np.sum(A*A,axis=1)
  proj_L_sqaured=np.sum(projected_data*projected_data,axis=1)
  distance_squared=L_sqaured-proj_L_sqaured
  Sum_of_distance=np.sum(distance_squared**0.5)
  return Sum_of_distance

# Lower Dimension Converter

In [155]:
def get_lower_dim(data,basis):#expects features in the columns and samples in the row of data and for basis matrix each column is a basis vector
  return np.dot(data,basis)

# Randomly Generated Data out of Gaussian Distribution

In [156]:
np.random.seed(0)
A=np.array([np.random.normal(size=200) for j in range(10)]) #columns are features and rows are samples. Randomly generated data matrix from Gaussian Distribution
A=A.T
A=(A-np.mean(A,axis=0))/np.std(A,axis=0)

In [157]:
PCA=Denoiser(A,final_dim=2).get_svs()#Get the singular vectors

In [158]:
Proj_A=get_lower_dim(A,PCA)

**The distance computed from projection vectors spanned by PCs**

In [159]:
get_total_distance(A,Proj_A)#Distance between the projection vector and the actual one when the basis is chosen from 2 PCs

524.377669166967

**Reconstruction Error**

In [160]:
dis_by_rec(A,Reconstructor(get_lower_dim(A,PCA),PCA))

524.377669166967

**The distance computed from projection vector and original vector spanned by random orthonormal basis**

In [161]:
np.random.seed(1)
G=[]
for i in range(1,51):
  plane=np.random.randn(10,10).astype(np.float64)
  plane=orth(plane)[:,:2]# generating the plane with 2 orthonormal Basis vectors
  Proj_A=get_lower_dim(A,plane)
  d=get_total_distance(A,Proj_A)
  D=dis_by_rec(A,Reconstructor(get_lower_dim(A,plane),plane))
  G.append([i,d,D])
  print(f"For plane {i:02d} the distance computed by Pythagoras formula is:  {d:.3f}     by reconstruction error is:  {D:.3f}")

For plane 01 the distance computed by Pythagoras formula is:  544.267     by reconstruction error is:  544.267
For plane 02 the distance computed by Pythagoras formula is:  551.709     by reconstruction error is:  551.709
For plane 03 the distance computed by Pythagoras formula is:  554.627     by reconstruction error is:  554.627
For plane 04 the distance computed by Pythagoras formula is:  550.303     by reconstruction error is:  550.303
For plane 05 the distance computed by Pythagoras formula is:  541.405     by reconstruction error is:  541.405
For plane 06 the distance computed by Pythagoras formula is:  548.053     by reconstruction error is:  548.053
For plane 07 the distance computed by Pythagoras formula is:  543.413     by reconstruction error is:  543.413
For plane 08 the distance computed by Pythagoras formula is:  547.161     by reconstruction error is:  547.161
For plane 09 the distance computed by Pythagoras formula is:  546.619     by reconstruction error is:  546.619
F

In [162]:
D=pd.DataFrame(G,columns=['Plane Number','Total Distance (By Pythagoras)','Reconstruction Error'])
D.set_index('Plane Number',inplace=True)
D.head()

Unnamed: 0_level_0,Total Distance (By Pythagoras),Reconstruction Error
Plane Number,Unnamed: 1_level_1,Unnamed: 2_level_1
1,544.267034,544.267034
2,551.708526,551.708526
3,554.626542,554.626542
4,550.302629,550.302629
5,541.404525,541.404525


In [163]:
D.to_csv('distance.csv')# Writing the table to CSV