
# Question 3.



# Importing Libraries

In [12]:
import numpy as np
from scipy.linalg import orth
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds,eigs
import matplotlib.pyplot as plt

# PCA class

In [13]:
class Denoiser:

  def __init__(self,data,final_dim) :
    self.data=data
    self.mean=data.mean(axis=0)
    self.std=data.std(axis=0)
    self.dim=final_dim

  #SVD Based
  def get_svs(self) :
    zero_mean=(self.data-self.mean)/self.std
    u,s,v=svds(zero_mean,k=self.dim)
    v=v.T
    return np.fliplr(v)

  #Covariance_matrix Based
  def get_eigvs(self):
    zero_mean=(self.data-self.mean)/self.std
    l=self.data.shape[0]
    cov=(1/l)*(zero_mean.dot(zero_mean.T))
    vals,eig=eigs(cov,k=self.dim)
    eig=np.real(eig)
    vals=np.real(vals)
    return eig

  def get_dim(self,D,svs=False,eigvs=False):
    Dt=(D-self.mean)/self.std
    if svs==True :
      v=self.get_svs()
      final=Dt.dot(v)
      return final,v
    if eigvs==True :
      v=self.get_eigvs()
      final=Dt.dot(v)
      return final

  def Reconstruct(self,D,svs=False,eigvs=False) :
    if svs==True:
      Final,v=self.get_dim(D,svs=svs)
    if eigvs==True:
      Final,v=self.get_dim(D,eigvs=eigvs)
    Recons=Final.dot(v.T)*self.std+self.mean
    return Recons

#Computes Distance

In [14]:
def get_total_distance(data,projected_data):
  L_sqaured=np.sum(A*A,axis=1)
  proj_L_sqaured=np.sum(projected_data*projected_data,axis=1)
  distance_squared=L_sqaured-proj_L_sqaured
  Sum_of_distance=np.sum(distance_squared**0.5)
  return Sum_of_distance

# Lower Dimension Converter

In [15]:
def get_lower_dim(data,basis):#expects features in the columns and samples in the row of data and for basis matrix each column is a basis vector
  return np.dot(data,basis)

# Randomly Generated Data out of Gaussian Distribution

In [16]:
np.random.seed(0)
A=np.array([[np.random.normal() for i in range(10)] for j in range(200)]) #columns are features and rows are samples. Randomly generated data matrix from Gaussian Distribution

In [17]:
PCA=Denoiser(A,final_dim=2).get_svs()#Get the principal components

In [18]:
Proj_A=get_lower_dim(A,PCA)

**The distance computed from projection vectors spanned by PCs**

In [19]:
get_total_distance(A,Proj_A)#Distance between the projection vector and the actual one when the basis is chosen from 2 PCs

520.2545438182989

**The distance computed from projection vector and original vector spanned by random orthonormal basis**

In [20]:
np.random.seed(1)
G=[]
for i in range(1,51):
  plane=np.random.randn(10,10).astype(np.float64)
  plane=orth(plane)[:,:2]# generating the plane with 2 orthonormal Basis vectors
  Proj_A=get_lower_dim(A,plane)
  d=get_total_distance(A,Proj_A)
  G.append([i,d])
  print("For plane {}  the distance is {} ".format(i,d))

For plane 1  the distance is 537.2295955476261 
For plane 2  the distance is 538.1080904812743 
For plane 3  the distance is 535.6530741995253 
For plane 4  the distance is 532.4919754316178 
For plane 5  the distance is 536.5445393665382 
For plane 6  the distance is 538.7546013210087 
For plane 7  the distance is 530.1999758865302 
For plane 8  the distance is 528.661625324711 
For plane 9  the distance is 537.211124980598 
For plane 10  the distance is 538.9288694066157 
For plane 11  the distance is 536.5780756860167 
For plane 12  the distance is 533.4718355432108 
For plane 13  the distance is 532.1988908939346 
For plane 14  the distance is 536.7741409783705 
For plane 15  the distance is 540.832625973793 
For plane 16  the distance is 529.0779368039488 
For plane 17  the distance is 535.4514262851429 
For plane 18  the distance is 532.4285991796196 
For plane 19  the distance is 537.4535684178197 
For plane 20  the distance is 532.861883557267 
For plane 21  the distance is 536

In [21]:
D=pd.DataFrame(G,columns=['Plane Number','Total Distance'])
D.set_index('Plane Number',inplace=True)
D.head()

Unnamed: 0_level_0,Total Distance
Plane Number,Unnamed: 1_level_1
1,537.229596
2,538.10809
3,535.653074
4,532.491975
5,536.544539


In [None]:
D.to_csv('distance.csv')# Writing the table to CSV