# INF 552 Assignment 3 -- PCA && Fastmap dimensionality reduction

Author: Zongdi Xu, Wenkai Xu

Date: Feb 28, 2019

## Part 1
### (1) PCA Implementation

In [1]:
import numpy as np

# Read data from file
input_f = open('pca-data.txt', 'r')
data = input_f.readlines()
x = []
for record in data:
    x.append(record.split())
x = np.array(x).astype(np.float)

In [2]:
# Get the mean of input data in every dimension
n, dimension = x.shape
n=x.shape[0]
mean=np.sum(x,axis=0)/n
mean

array([ 0.04641608, -0.0356265 ,  0.06334316])

In [3]:
# Step 1&2: get the covariance matrix
cov_mat = (x - mean).T.dot((x - mean))/(n-1)
print('NumPy covariance matrix: \n%s' %cov_mat)

NumPy covariance matrix: 
[[ 81.24199811 -15.84081415  31.66840483]
 [-15.84081415  13.70181418 -15.26445036]
 [ 31.66840483 -15.26445036  31.36677137]]


In [4]:
# Step 3: calculate eigenvalues and eigenvectors
eig_vals, eig_vecs = np.linalg.eig(cov_mat)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)

Eigenvectors 
[[ 0.86667137 -0.4962773  -0.0508879 ]
 [-0.23276482 -0.4924792   0.83862076]
 [ 0.44124968  0.71496368  0.54233352]]

Eigenvalues 
[101.61980038  19.89921519   4.79156808]


In [5]:
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort()
eig_pairs.reverse()

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
eig_pairs

Eigenvalues in descending order:


[(101.61980038291973, array([ 0.86667137, -0.23276482,  0.44124968])),
 (19.899215194176584, array([-0.4962773 , -0.4924792 ,  0.71496368])),
 (4.791568080870482, array([-0.0508879 ,  0.83862076,  0.54233352]))]

In this case, the eigenvalues of the 1st and 2nd dimensions are the greatest, so these dimensions will be preserved after reduction.

In [6]:
# Step 4: apply projection to input data points
U = []
target_dimension=2
for i in range(target_dimension):
    U.append(eig_pairs[i][1])
U = np.array(U)
print U

[[ 0.86667137 -0.23276482  0.44124968]
 [-0.4962773  -0.4924792   0.71496368]]


In [7]:
result_x = np.dot(x, U.T)
print result_x

[[ 10.95314032   7.41375984]
 [-12.60962969  -4.2089934 ]
 [  0.50902129   0.30680664]
 ...
 [ -2.84606985   2.45894692]
 [ 11.25964147   4.24329087]
 [ 14.30637164   5.68389356]]


### (2) Fastmap Implementation

## Part 2

### (1) PCA Software Familiarization

In [8]:
import numpy as np
from sklearn.decomposition import PCA

input_f = open('pca-data.txt', 'r')
data = input_f.readlines()
x = []
for record in data:
    x.append(record.split())
x = np.array(x).astype(np.float)
original_x = x.copy()

n, dimension = x.shape
target_dimension = 2
pca = PCA(n_components=target_dimension)
print pca.fit_transform(x)

[[-10.87667009   7.37396173]
 [ 12.68609992  -4.24879151]
 [ -0.43255106   0.26700852]
 ...
 [  2.92254009   2.41914881]
 [-11.18317124   4.20349275]
 [-14.2299014    5.64409544]]
