# Principal Components Analysis (PCA)

Task:
- Download the Iris data set.
- Use the handmade pca.py to process data.
- Compare to sklearn version.

Resources:
- [Scikit-learn PCA](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)
- [Scikit-learn iris data](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html)
- [A tutorial on Principal Components Analysis](http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf)
- [What is the difference between whitening and PCA?](https://www.physicsforums.com/threads/what-is-the-difference-between-whitening-and-pca.635358/)


## Imports

In [None]:
# Libraries
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Scripts
from pca import TwoDimensionStandardizer, PrincipalComponentAnalysis


In [None]:
# TESTING: standardizer

np.random.seed(0)
raw_data = np.random.rand(5, 2).round(1)*10
raw_data = np.array([[90, 60, 90], [90, 90, 30], [60, 60, 60]])
raw_data = np.array([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]]).T

display(raw_data)

skl_scaler = StandardScaler()
display(skl_scaler.fit_transform(raw_data))

my_scaler = TwoDimensionStandardizer()
display(my_scaler.fit_transform(data=raw_data))

np.allclose(skl_scaler.fit_transform(raw_data), my_scaler.fit_transform(data=raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: PCA

skl_pca = PCA(n_components=2, whiten=False)
skl_pca.fit(raw_data)

my_pca = PrincipalComponentAnalysis(n_components=2, whiten=False)
my_pca.fit(raw_data)

display(skl_pca.transform(raw_data), skl_pca.components_)
display(my_pca.transform(raw_data), my_pca.feature_vector)

np.allclose(skl_pca.transform(raw_data), my_pca.transform(raw_data), rtol=1e-09, atol=1e-09)


In [None]:
# TESTING: whitener

skl_pca = PCA(n_components=2, whiten=True)
skl_pca.fit(raw_data)

my_pca = PrincipalComponentAnalysis(n_components=2, whiten=True)
my_pca.fit(raw_data)

display(skl_pca.transform(raw_data), skl_pca.components_)
display(my_pca.transform(raw_data), my_pca.feature_vector)

np.allclose(skl_pca.transform(raw_data), my_pca.transform(raw_data), rtol=1e-09, atol=1e-09)
