# Development of PCA code
This notebook serves as debudding and development environment for the PCA calculation itself.

In [1]:
# Imports
import pandas as pd
import numpy as np

In [5]:
# Read in data
data = pd.read_csv("src/assets/data_points.csv")
points_data = data.values  # Assuming columns are x, y, z
data.values

array([[1.566945  , 2.35562819, 1.5278859 ],
       [2.15638114, 2.53802701, 0.91921348],
       [3.95435659, 3.20565333, 0.7149398 ],
       [2.46249943, 2.65987346, 1.42995661],
       [3.51021533, 3.5427914 , 0.92307503],
       [0.63012198, 0.90930669, 1.19497778],
       [2.71514147, 2.65903694, 1.4500855 ],
       [1.94354145, 2.35981616, 1.18227095],
       [3.9452154 , 3.58172707, 1.26396111]])

In [6]:
# Center the data
points_meaned = points_data - np.mean(points_data, axis=0)

In [28]:
# Get transformation matrix
cov_matrix = np.cov(points_meaned, rowvar=False)
eigen_values, eigen_vectors = np.linalg.eigh(cov_matrix)

sorted_indices = np.argsort(eigen_values)[::-1]  # Get indices that would sort eigen_values in descending order
eigen_values_sorted = eigen_values[sorted_indices]
eigen_vectors_sorted = eigen_vectors[:, sorted_indices]
eigen_vectors_sorted_1 = eigen_vectors_sorted.copy()
eigen_vectors_sorted_1

array([[-0.81568263, -0.28542268, -0.50318559],
       [-0.57309181,  0.51733715,  0.63555334],
       [ 0.07891526,  0.80678136, -0.58555633]])

In [26]:
# Different approach (using the calculation from the video script)
num_features = data.shape[1]
cov_matrix = (1/num_features) * (points_meaned.T @ points_meaned)
eigen_values, eigen_vectors = np.linalg.eigh(cov_matrix)

sorted_indices = np.argsort(eigen_values)[::-1]  # Get indices that would sort eigen_values in descending order
eigen_values_sorted = eigen_values[sorted_indices]
eigen_vectors_sorted = eigen_vectors[:, sorted_indices]
eigen_vectors_sorted

array([[-0.81568263, -0.28542268, -0.50318559],
       [-0.57309181,  0.51733715,  0.63555334],
       [ 0.07891526,  0.80678136, -0.58555633]])

In [30]:
# We see that both approaches to calculating the covariance matrix yield the same eigenbasis
eigen_vectors_sorted == eigen_vectors_sorted_1

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

## Transform the data

In [34]:
transformed = (eigen_vectors_sorted.T @ points_meaned.T).T
transformed

array([[ 0.98976359,  0.41029925,  0.10200294],
       [ 0.35640595, -0.15464308,  0.27774334],
       [-1.50890288, -0.48724235, -0.08304612],
       [ 0.07718681,  0.23307756, -0.09791994],
       [-1.32341059, -0.01814069,  0.23283369],
       [ 2.55651733, -0.33913013, -0.15087923],
       [-0.12682104,  0.17677464, -0.23736401],
       [ 0.65290602,  0.02614097,  0.11754373],
       [-1.67364519,  0.15286383, -0.16091439]])