# Setup

In [None]:
import numpy as np
import pylab as plt

from sklearn.datasets import make_moons, make_circles, fetch_openml

In [None]:
# Vanilla PCA for comparison
class PCA:
    def __init__(self, n_components):
        self.n_components = n_components

    def fit(self, X):
        X = np.array(X)
        N, D = X.shape
        self.mu_ = np.mean(X, axis=0)
        S = ((X-self.mu_).T @ (X-self.mu_)) / N
        self.lmbdas_, self.U_ = np.linalg.eigh(S)
        sort_idx = np.argsort(-self.lmbdas_)
        self.lmbdas_ = self.lmbdas_[sort_idx]
        self.U_ = self.U_[:, sort_idx]
        self._determine_M()
        return self

    def transform(self, X):
        B = self.U_[:, :self.M_]
        X = np.array(X)
        Z = (X-self.mu_) @ B
        return Z

    def inverse_transform(self, Z):
        B = self.U_[:, :self.M_]
        Z = np.array(Z)
        X = Z @ B.T
        X += self.mu_
        return X

    def _determine_M(self):
        if self.n_components >= 1:
            self.M_ = self.n_components
        elif 0 < self.n_components < 1:
            cum_lmbdas = np.cumsum(self.lmbdas_ / np.sum(self.lmbdas_))
            self.M_ = np.argmax(cum_lmbdas >= self.n_components) + 1
        else:
            raise ValueError('Invalid `n_components` parameter.')

# **Principal Component Analysis - Extensions**
In this notebook, we will implement two extensions to the *principal component analysis* (PCA) algorithm.
First, we will implement the high-dimensional data version, which takes care of the case where the number of samples $N$ is smaller than the number of features $D$. Subsquently, we will implement the kernel version which allows to obtain projected data of a PCA in an unknown feature space.

Mathematically, we denote an obtained data set consisting of $N$ samples as a matrix $\mathbf{X} \in \mathbb{R}^{N \times D}$, where the $n$-th row of this matrix represents the $n$-th samples being a $D$-dimensional feature vector: $\mathbf{x}_n = (x_{n1}, \dots, x_{nD})^\mathrm{T}$.

## **High-dimensional PCA**

In [None]:
X_img_all, y_img_all = fetch_openml('mnist_784', version=1, return_X_y=True)
# convert from pandas to numpy if necessary

idx = np.random.choice(range(len(X_img_all)), replace=False, size=20)
X_img = X_img_all[idx]
y_img = y_img_all[idx]

> A corresponding class for high-dimensional PCA is to be implemented below.

In [None]:
class HighDimPCA(PCA):
    """ This class implements the principal component analysis for N < D,
        where N is the number of samples and D is the number of features.
    """
    def fit(self, X):
####################
# Your Code Here   #
####################

> Compare its runtime against the vanilla PCA implementation (e.g., using `%timeit`)

In [None]:
# Example for timeit
%timeit np.dot(5, 5)

In [None]:
####################
# Your Code Here   #
####################

## **Kernel PCA**

In [None]:
X_moons, y_moons = make_moons(n_samples=100, random_state=0)
X_circles, y_circles = make_circles(n_samples=200, factor=.2, noise=.1)

plt.subplot(121)
plt.scatter(X_moons[:, 0], X_moons[:, 1], c=y_moons)

plt.subplot(122)
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=y_circles)
plt.show()

> Implement a standard PCA algorithm using sklearn. Reduce the number of components and try to explain why standard PCA is not working here.

In [None]:
####################
# Your Code Here   #
####################

> Implement the Gaussian kernel given by: $$k_\sigma(\mathbf{x},\mathbf{x}^{\prime})=\exp(-\left\Vert \mathbf{x}-\mathbf{x}^{\prime}\right\Vert ^{2}/\sigma) $$

In [None]:
class GaussianKernel:
    def __init__(self, sigma):
####################
# Your Code Here   #
####################

    def __call__(self, x, y):
####################
# Your Code Here   #
####################

> A corresponding class for the KernelPCA is to be implemented below.

In [None]:
class KernelPCA:
    """This class implements the kernel version of the principal component analysis.

    Args:
        n_comonents (int): Number of components to consider.

    """
    def __init__(self, n_components, kernel_func):
        self.n_components = n_components
        self.kernel_func = kernel_func

    def fit(self, X):
        """Determine required parameters of the Kernel PCA.

        Args:
            X (array-like): Input samples.

        Returns:
            The fitted PCA object.
        """
####################
# Your Code Here   #
####################

    def get_gram_matrix(self, X1, X2=None):
        """ Computes the gram marix.

        Args:
            X (ndarray): Samples in the input space.

        Returns
            The gram matrix for all samples.
        """
####################
# Your Code Here   #
####################

    def center_gram_matrix(self, K):
        """ Computes the gram matrix with centered features.

        Args:
            K (ndarray): Gram matrix computed by a kernel.

        Returns
            Transformed gram matrix with mean zero in the projection space.
        """
####################
# Your Code Here   #
####################

    def transform(self, X):
        """ Transforms samples from the D-dimensional input space into
            the M-dimensional projection space.

        Args:
            X (ndarray): Samples in the input space.

        Returns
            Transformed samples in the projection space.
        """
####################
# Your Code Here   #
####################

> Apply the Kernel PCA to the two-dimensional toy data sets and plot the features space projected on some principal components.

In [None]:
####################
# Your Code Here   #
####################

In [None]:
####################
# Your Code Here   #
####################