### PCA using SVD decomposition

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import numpy as np
from sklearn import datasets

iris = datasets.load_iris()
X = iris["data"][:, (0, 1, 2, 3)]  # features
y = (iris["target"] == 2).astype(np.float64)  # labels(2 and others)

In [3]:
X_center = X - X.mean(axis=0)  # mean in column for X
U, s, Vt = np.linalg.svd(X_center)
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]

In [4]:
X2D = X_center.dot(Vt.T[:, :2])
X2D_using_svd = X2D

In [5]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X2D_using_pca = pca.fit_transform(X)

In [6]:
X2D_using_pca[:5]

array([[-2.68420713,  0.32660731],
       [-2.71539062, -0.16955685],
       [-2.88981954, -0.13734561],
       [-2.7464372 , -0.31112432],
       [-2.72859298,  0.33392456]])

In [7]:
X2D_using_svd[:5]

array([[-2.68420713, -0.32660731],
       [-2.71539062,  0.16955685],
       [-2.88981954,  0.13734561],
       [-2.7464372 ,  0.31112432],
       [-2.72859298, -0.33392456]])

In [8]:
pca.explained_variance_ratio_

array([0.92461621, 0.05301557])

In [9]:
pca = PCA()
pca.fit(X)
cumsum = np.cumsum(pca.explained_variance_ratio_)
d = np.argmax(cumsum >= 0.95) + 1

In [10]:
pca = PCA(n_components=0.95)
X_reduced = pca.fit_transform(X)

In [11]:
X_reduced[:5]

array([[-2.68420713,  0.32660731],
       [-2.71539062, -0.16955685],
       [-2.88981954, -0.13734561],
       [-2.7464372 , -0.31112432],
       [-2.72859298,  0.33392456]])

In [12]:
from sklearn.decomposition import IncrementalPCA

n_batch = 5
inc_pca = IncrementalPCA(n_components=2)
for X_batch in np.array_split(X, n_batch):
    inc_pca.partial_fit(X_batch)
    
X_reduced = inc_pca.transform(X)
X_reduced[:5]

array([[-2.68416482,  0.32646263],
       [-2.71525244, -0.17241011],
       [-2.88984408, -0.13680588],
       [-2.7464986 , -0.31021911],
       [-2.72863867,  0.33556948]])

In [13]:
from sklearn.decomposition import KernelPCA

rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)
X_reduced[:5]

array([[-0.6166177 , -0.09180361],
       [-0.61589654, -0.04889014],
       [-0.64084145, -0.08454319],
       [-0.61808503, -0.04323666],
       [-0.62264344, -0.10107493]])

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

clf = Pipeline([
        ("kpca", KernelPCA(n_components=2)),
        ("log_reg", LogisticRegression())
    ])

param_grid = [{
        "kpca_gamma": np.linspace(0.03, 0.05, 10),
        "kpca_kernel": ["rbf", "sigmoid"]
    }]


rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0433,
                    fit_inverse_transform=True)
X_reduced = rbf_pca.fit_transform(X)
X_reduced[:5]

array([[-0.63201218, -0.09390954],
       [-0.6307634 , -0.05236269],
       [-0.65547145, -0.089506  ],
       [-0.63270681, -0.04741483],
       [-0.63793516, -0.10359277]])

In [15]:
from sklearn.manifold import LocallyLinearEmbedding

lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10)
X_reduced = lle.fit_transform(X)
X_reduced[:5]

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])