In [None]:
#1

import numpy as np
from scipy import linalg as la
import matplotlib.pyplot as plt
import random

#Dataset generation
N = 1000
x1 = np.random.normal(0., 1., N)
x2 = x1 + np.random.normal(0., 3., N)
x3 = 2 * x1 +x2

A = np.array([x1,x2,x3])


#Find the eigenvectors and eigenvalues of the covariance matrix of the dataset
covariance = np.cov(A) #Rowvar=False implies that each column represents a variable, while the rows contain observations.
print('The covariance matrix is')
print(covariance, '\n')

e_values, e_vectors = np.linalg.eig(covariance)
print('The eigenvalues of the covariance matrix are')
print(e_values, '\n')
print('And the eigenvectors are')
print(e_vectors[0,:], '\n')
print(e_vectors[1,:], '\n')
print(e_vectors[2,:],'\n')

#Find the eigenvectors and eigenvalues using SVD
U, spectrum, Vt = la.svd(A)
print('The eigenvalues of the covariance matrix obtained using SVD are')
print(spectrum**2/(N-1), '\n')
print('And the eigenvectors obtained using SVD are')
print(U[0,:], '\n')
print(U[1,:], '\n')
print(U[2,:], '\n')


#What percent of the total dataset's variability is explained by the principal components?
eigen_values = spectrum**2/(N-1)
PVE = eigen_values/eigen_values.sum()   #The Proportion of Variance Explained (PVE)
print('The vector of PVE for each principal component is \n',PVE)
print('x1 represents',PVE[0]*100,'percent of the variability, while x2', PVE[1]*100,'percent.')



#Redefine the data in the basis yielded by the PCA procedure
A_rot = np.dot(U.T,A)


fig, axs = plt.subplots(2, 3,figsize=(12,8))

axs[0, 0].scatter(A[0,:],A[1,:])
axs[0, 0].set_ylim([-15,15])
axs[0, 0].set_title('Original base')
axs[0, 0].set_xlabel('x1')
axs[0, 0].set_ylabel('x2')

axs[0, 1].scatter(A[0,:],A[2,:],color='orange')
axs[0, 1].set_ylim([-15,15])
axs[0, 1].set_title('Original base')
axs[0, 1].set_xlabel('x1')
axs[0, 1].set_ylabel('x3')

axs[0, 2].scatter(A[1,:],A[2,:],color='green')
axs[0, 2].set_ylim([-15,15])
axs[0, 2].set_title('Original base')
axs[0, 2].set_xlabel('x2')
axs[0, 2].set_ylabel('x3')

axs[1, 0].scatter(A_rot[0,:],A_rot[1,:])
axs[1, 0].set_ylim([-5,5])
axs[1, 0].set_title('New base')
axs[1, 0].set_xlabel('x1')
axs[1, 0].set_ylabel('x2')

axs[1, 1].scatter(A_rot[0,:],A_rot[2,:],color='orange')
axs[1, 1].set_ylim([-5,5])
axs[1, 1].set_title('New base')
axs[1, 1].set_xlabel('x1')
axs[1, 1].set_ylabel('x3')

axs[1, 2].scatter(A_rot[1,:],A_rot[2,:],color='green')
axs[1, 2].set_ylim([-5,5])
axs[1, 2].set_title('New base')
axs[1, 2].set_xlabel('x2')
axs[1, 2].set_ylabel('x3')
plt.tight_layout()
plt.show()