In [None]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
from skimage.transform import resize
import scipy.misc

In [None]:
def PCA_func(data, num_components):
    """ 
    Performs Principal Component Analysis. 
  
    Parameters: 
    data (ndarray of shape (num_samples, num_features)) : data to perform analysis on
    
    num_components (int): number of components to keep
  
    Returns: 
    mean (ndarray of shape (n_features,)): Mean of the input data
    
    S (ndarray of shape (n_features)): Contains the various singular values from SVD of centred data
    
    principal_components (ndarray of shape (num_components, num_features)):Principal axes in feature 
                                                                           space, representing the directions
                                                                           of maximum variance in the data.
                                                                           
    variance (ndarray of shape (n_components,)): amount of variance explained by each of the selected 
                                                 components
                                                 
    reduced_data (ndarray of shape (n_samples, n_components)): Input data transformed to lower dimensions
    
    reconstructed_data (array-like, shape (n_samples, n_features)): reduced_data transformed back to the original 
                                                                    space
                                                                    
    energy (float): fraction of energy captured in num_components
  
    """
    mean = np.mean(data , axis = 0) # calculating mean of the input data
    
    centred_data = data - mean # centering input data
    U,S,Vh = np.linalg.svd(centred_data, full_matrices=True) # SVD of centred input data
    
    U_subset=U[:,0:num_components] # selecting components = num_components from both U and S
    S_subset=S[0:num_components]
    
    reduced_data=U_subset*S_subset # Obtaining the reduced representation of data
    
    principal_components = Vh[0:num_components, :] # Getting the principal components
    
    variance = np.square(S)/(data.shape[0]-1) # Calculating variance 
    
    reconstructed_data = np.dot(reduced_data, principal_components) + mean # reconstructing the reduced data
    
    energy = np.sum(S_subset**2)/np.sum(S**2) # calculating the energy stored in truncated representation
    
    return mean, S, principal_components, variance, reduced_data, reconstructed_data, energy
    

# Part 1

In [None]:
fname="pca_dataset.txt"
pca_dataset=np.loadtxt(fname) # load pca_dataset
# perform PCA on pca_dataset
mean, S, prin_comp, var, pca_dataset_reduced, pca_dataset_reconstructed, energy = PCA_func(pca_dataset, 2) 

In [None]:
# Plotting the pca_dataset
plt.scatter(pca_dataset[:, 0], pca_dataset[:, 1], alpha=0.5)
plt.xlabel('x')
plt.ylabel('f(x)')

# plotting the two principal components   
for length, vector in zip(var, prin_comp):
    v = vector * 3 * np.sqrt(length)
    v1=mean
    v2=mean+v
    plt.plot((v1[0],v2[0]), (v1[1],v2[1]), color='black')
    
#plt.savefig("Task1.png")
print("Energy contained in first component: ", S[0])
print("Energy contained in second component: ", S[1])

# Part 2


In [None]:
racoon = scipy.misc.face(gray=True) # get gray scale racoon image
print("Original shape ",racoon.shape)
racoon_resized = resize(racoon, (249, 185)) # resize the image
plt.gray()
plt.imshow(racoon_resized)
#plt.savefig("2_org.png")
plt.show()

print("Resized shape ",racoon_resized.shape)

## (a) PCA with all components


In [None]:
# Perform PCA on racoon image with num_components = all components
_, _, _, _, _, racoon_recons_all, energy_all =PCA_func(racoon_resized.transpose(), 185)
plt.imshow(racoon_recons_all.transpose())
#plt.savefig("part2_a.png")
plt.show()

print("Percentage of energy lost in truncation: ", (1-energy_all)*100, "%")

## (b) PCA with 120 components

In [None]:
# Perform PCA on racoon image with num_components = 120
_, _, _, _, _, racoon_recons_120, energy_120 =PCA_func(racoon_resized.transpose(), 120)
plt.imshow(racoon_recons_120.transpose())
#plt.savefig("part2_b.png")
plt.show()

print("Percentage of energy lost in truncation: ", (1-energy_120)*100, "%")

## (c) PCA with 50 components

In [None]:
# Perform PCA on racoon image with num_components = 50
_, _, _, _, _, racoon_recons_50, energy_50 =PCA_func(racoon_resized.transpose(), 50)
plt.imshow(racoon_recons_50.transpose())
#plt.savefig("part2_c.png")
plt.show()

print("Percentage of energy lost in truncation: ", (1-energy_50)*100, "%")

## (d) PCA with 10 components

In [None]:
# Perform PCA on racoon image with num_components = 10
_, _, _, _, _, racoon_recons_10, energy_10 =PCA_func(racoon_resized.transpose(), 10)
plt.imshow(racoon_recons_10.transpose())
#plt.savefig("part2_d.png")
plt.show()

print("Percentage of energy lost in truncation: ", (1-energy_10)*100, "%")

# Part 3

In [None]:
vadere_fname="data_DMAP_PCA_vadere.txt"
vadere_dataset=np.loadtxt(vadere_fname) # loading the vadere trajectory data
print(vadere_dataset.shape)

first_pedestrian_path = vadere_dataset[:,0:2] # getting positions of first pedestrian over time

second_pedestrian_path= vadere_dataset[:,2:4] # getting positions of second pedestrian over time

plt.plot(first_pedestrian_path[:,0], first_pedestrian_path[:,1])

plt.plot(second_pedestrian_path[:,0], second_pedestrian_path[:,1])
#plt.savefig("3_paths.png")
plt.show()

In [None]:
# Performing PCA on trajectory data with num_components = 2
_, _, _, _, vadere_optimized2, _, energy_2comp = PCA_func(vadere_dataset, 2)
plt.plot(vadere_optimized2[:,0], vadere_optimized2[:,1])
#plt.savefig("3_2comp.png")
plt.show()
print("Energy contained in the two components: ", energy_2comp*100, "%")

In [None]:
# Performing PCA on trajectory data with num_components = 3
_, _, _, _, vadere_optimized3, _, energy_3comp = PCA_func(vadere_dataset, 3)
print("Energy contained in the three components: ", energy_3comp*100, "%")

Therefore, 3 compoments are needed to capture most (>90%) of the energy.