# Principal Components Analysis (PCA)

Resources:
- [Scikit-learn PCA](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)
- [Scikit-learn iris data](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html)
- [A tutorial on Principal Components Analysis](http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf)
- [What is the difference between whitening and PCA?](https://www.physicsforums.com/threads/what-is-the-difference-between-whitening-and-pca.635358/)
- [How to reverse PCA and reconstruct original variables from several principal components?](https://stats.stackexchange.com/questions/229092/how-to-reverse-pca-and-reconstruct-original-variables-from-several-principal-com)
- [PCA scores for only the first principal components are of "wrong" signs](https://stackoverflow.com/questions/67258885/pca-scores-for-only-the-first-principal-components-are-of-wrong-signs)


## Imports

In [None]:
# Libraries
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Scripts
from pca import TwoDimensionStandardizer, PrincipalComponentAnalysis


In [None]:
# TESTING: Standardizer
raw_data = np.array([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]]).T  # shape: (10, 2)

display(raw_data)

skl_scaler = StandardScaler()
display(skl_scaler.fit_transform(raw_data))

my_scaler = TwoDimensionStandardizer()
display(my_scaler.fit_transform(data=raw_data))

np.allclose(skl_scaler.fit_transform(raw_data), my_scaler.fit_transform(data=raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: PCA

n_comp = raw_data.shape[1]
whiten_bool = False

skl_pca = PCA(n_components=n_comp, whiten=whiten_bool)
skl_pca.fit(raw_data)

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)

display(skl_pca.transform(raw_data), skl_pca.components_)
display(my_pca.transform(raw_data), my_pca.components)

np.allclose(skl_pca.transform(raw_data), my_pca.transform(raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: PCA (feature reduction)

n_comp = raw_data.shape[1] - 1
whiten_bool = False

skl_pca = PCA(n_components=n_comp, whiten=whiten_bool)
skl_pca.fit(raw_data)

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)

display(skl_pca.transform(raw_data), skl_pca.components_)
display(my_pca.transform(raw_data), my_pca.components)

np.allclose(skl_pca.transform(raw_data), my_pca.transform(raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: Whitener

n_comp = raw_data.shape[1]
whiten_bool = True

skl_pca = PCA(n_components=n_comp, whiten=whiten_bool)
skl_pca.fit(raw_data)

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)

display(skl_pca.transform(raw_data), skl_pca.components_)
display(my_pca.transform(raw_data), my_pca.components)

np.allclose(skl_pca.transform(raw_data), my_pca.transform(raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: Reverse PCA transform (full recovery)

n_comp = raw_data.shape[1]
whiten_bool = False

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)
trans_data = my_pca.transform(raw_data)

display(raw_data)
display(my_pca.reverse_transform(data=trans_data))

np.allclose(raw_data, my_pca.reverse_transform(data=trans_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: Reverse PCA transform (partial recovery)

n_comp = raw_data.shape[1] - 1
whiten_bool = False

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)
trans_data = my_pca.transform(raw_data)

display(raw_data)
display(my_pca.reverse_transform(data=trans_data))

np.allclose(raw_data, my_pca.reverse_transform(data=trans_data), rtol=0.1, atol=0.1)


In [None]:
# TESTING: Reverse PCA transform (with whitening)

n_comp = raw_data.shape[1]
whiten_bool = True

my_pca = PrincipalComponentAnalysis(n_components=n_comp, whiten=whiten_bool)
my_pca.fit(raw_data)
trans_data = my_pca.transform(raw_data)

display(raw_data)
display(my_pca.reverse_transform(data=trans_data))

np.allclose(raw_data, my_pca.reverse_transform(data=trans_data), rtol=1e-09, atol=1e-09)
