### Importing Code and Initializing Some Variables

In [42]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

n_components = 2

### Loading in Dataset

In [43]:
df = pd.read_csv("data\\pca.csv")
df

Unnamed: 0,x,y,z
0,0.35,1.43,4.23
1,3.71,2.98,1.32
2,2.1,1.29,0.11
3,4.2,0.99,4.67
4,3.63,2.01,0.19


### Converting dataframe to Numpy matrix

In [44]:
data = df.to_numpy()
data

array([[0.35, 1.43, 4.23],
       [3.71, 2.98, 1.32],
       [2.1 , 1.29, 0.11],
       [4.2 , 0.99, 4.67],
       [3.63, 2.01, 0.19]])

### Creating Standard Scaler

In [45]:
scaler = StandardScaler(with_std=False)
scaler.fit(data)
scaler.mean_

array([2.798, 1.74 , 2.104])

In [46]:
centered = scaler.transform(data)
centered

array([[-2.448, -0.31 ,  2.126],
       [ 0.912,  1.24 , -0.784],
       [-0.698, -0.45 , -1.994],
       [ 1.402, -0.75 ,  2.566],
       [ 0.832,  0.27 , -1.914]])

### Creating Covariance Matrix

In [47]:
cov_mat = np.cov(centered, rowvar=0)
cov_mat

array([[ 2.49237,  0.34425, -0.63064],
       [ 0.34425,  0.6179 , -0.7938 ],
       [-0.63064, -0.7938 ,  4.83958]])

### Finding eigenvectors and eigenvalues and retrieving top k eigenvectors with the largest eigenvalues

In [48]:
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
sort_idx = np.argsort(eig_vals)[::-1]
components = eig_vecs[:, sort_idx]
components = components[:, :n_components]
print(f"Eigenvalues: \n{eig_vals}\n")
print(f"Eigenvectors: \n{eig_vecs}\n")
print(f"Principal Components: \n{components}\n")

Eigenvalues: 
[5.15906349 2.34347792 0.44730859]

Eigenvectors: 
[[ 0.24872458  0.96166423 -0.11549018]
 [ 0.18504419  0.06986171  0.98024384]
 [-0.95073379  0.26518153  0.16057406]]

Principal Components: 
[[ 0.24872458  0.96166423]
 [ 0.18504419  0.06986171]
 [-0.95073379  0.26518153]]



### Projecting data onto lower dimensional space using principal components

In [49]:
transformed = centered @ components
transformed

array([[-2.6875015 , -1.81203525],
       [ 1.2016669 ,  0.75576398],
       [ 1.63888353, -1.23145137],
       [-2.22965418,  1.97631277],
       [ 2.07660525,  0.31140986]])

### Using Scikit's PCA

In [None]:
pca = PCA(n_components=2)
pca.fit_transform(centered)

array([[ 2.6875015 , -1.81203525],
       [-1.2016669 ,  0.75576398],
       [-1.63888353, -1.23145137],
       [ 2.22965418,  1.97631277],
       [-2.07660525,  0.31140986]])