In [None]:
import numpy as np
import sklearn

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [None]:
X,y=make_moons(n_samples=1000,random_state=42,noise=0.3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]

In [None]:
c1

array([ 0.95277471, -0.30367805])

In [None]:
c2

array([0.30367805, 0.95277471])

In [None]:
W2 = Vt.T[:, :2]
X2D = X_centered.dot(W2) # Reduced dimensions

In [None]:
X2D

array([[-0.74345522,  0.1348873 ],
       [ 0.87605593, -0.25329722],
       [ 0.21999332,  0.05960384],
       ...,
       [ 1.44338725, -0.32858951],
       [-1.41233029, -0.69698356],
       [ 0.12859924,  0.47131034]])

# PCA using sklearn

In [None]:
from sklearn.decomposition import PCA
pca=PCA(n_components=2)

In [None]:
X2D=pca.fit_transform(X)

In [None]:
X2D

array([[-0.74345522,  0.1348873 ],
       [ 0.87605593, -0.25329722],
       [ 0.21999332,  0.05960384],
       ...,
       [ 1.44338725, -0.32858951],
       [-1.41233029, -0.69698356],
       [ 0.12859924,  0.47131034]])

# Choosing the right number of dimensions

In [None]:
pca = PCA()
pca.fit(X_train)
cumsum = np.cumsum(pca.explained_variance_ratio_)
d = np.argmax(cumsum >= 0.95) + 1

In [None]:
d

2

In [None]:
# Decide the ratio of variance to be preserved
pca = PCA(n_components=0.95)
X_reduced = pca.fit_transform(X_train)

# Decompressing the data

In [None]:
pca=PCA()
x_reduced=pca.fit_transform(X_train)

In [None]:
x_reconstructed = pca.inverse_transform(x_reduced)

# Incremental PCA

In [None]:
from sklearn.datasets import load_digits
digits = load_digits()
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, digits.target, test_size=0.3, shuffle=False)

In [None]:
from sklearn.decomposition import IncrementalPCA
n_batches = 100
inc_pca = IncrementalPCA(n_components=154)
for X_batch in np.array_split(X_train, n_batches):
  inc_pca.partial_fit(X_batch)
X_reduced = inc_pca.transform(X_train)

# Kernal PCA

In [None]:
from sklearn.decomposition import KernelPCA
rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)

# Selecting a Kernal

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [None]:
clf = Pipeline([
("kpca", KernelPCA(n_components=2)),
("log_reg", LogisticRegression())
])
param_grid = [{
"kpca__gamma": np.linspace(0.03, 0.05, 10),
"kpca__kernel": ["rbf", "sigmoid"]}]

In [None]:
grid_search = GridSearchCV(clf, param_grid, cv=3)
grid_search.fit(X, y)

GridSearchCV(cv=3, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('kpca',
                                        KernelPCA(alpha=1.0, coef0=1,
                                                  copy_X=True, degree=3,
                                                  eigen_solver='auto',
                                                  fit_inverse_transform=False,
                                                  gamma=None, kernel='linear',
                                                  kernel_params=None,
                                                  max_iter=None, n_components=2,
                                                  n_jobs=None,
                                                  random_state=None,
                                                  remove_zero_eig=False,
                                                  tol=0)),
                                       ('log_reg',
                                 

In [None]:
grid_search.best_params_

{'kpca__gamma': 0.05, 'kpca__kernel': 'rbf'}

## LLE

In [None]:
from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10)
X_reduced = lle.fit_transform(X)