In [14]:
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.datasets import make_moons
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
 

##Some data
X, y = make_moons()
X_train, X_test, y_train, y_test = X[:80], X[20:], y[:80], y[20:]
##

##PCA
#manually using singular value decomposition.  X = U · Σ · VT
X_centered = X - X.mean(axis=0)
U, s, V = np.linalg.svd(X_centered)
c1 = V.T[:, 0]
c2 = V.T[:, 1]
# X d‐proj = X · Wd
W2 = V.T[:, :2]
X2D = X_centered.dot(W2)
#automatically using sklearn pca (uses svd)
pca = PCA(n_components = 2)
X2D = pca.fit_transform(X) 

print(pca.explained_variance_ratio_)

#computing the minimum reduction needed to preserve 95% of variance before reduction
pca = PCA()
pca.fit(X)
cumsum = np.cumsum(pca.explained_variance_ratio_)
d = np.argmax(cumsum >= 0.95) + 1
print(d)
pca = PCA(n_components=0.95)
pca.fit(X)
print(pca.explained_variance_ratio_)
#setting n_components to be a float between 0.0 and 1.0, indicating ratio of variance to preserve:
pca = PCA(n_components=0.95)
X_reduced = pca.fit_transform(X)
print(pca.explained_variance_ratio_)
##



##MNIST
mnist = fetch_mldata('MNIST original')
print ( mnist )
X, y = mnist["data"], mnist["target"]
pca = PCA(n_components = 154)
X_mnist_reduced = pca.fit_transform(X)
X_mnist_recovered = pca.inverse_transform(X_mnist_reduced)

[0.81968748 0.18031252]
2
[0.81968748 0.18031252]
[0.81968748 0.18031252]
{'data': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'DESCR': 'mldata.org dataset: mnist-original', 'target': array([0., 0., 0., ..., 9., 9., 9.]), 'COL_NAMES': ['label', 'data']}
