# Machine Learning (Summer 2024)

## Practice Session 7

May 23rd, 2022

Ulf Krumnack & Lukas Niehaus

Institute of Cognitive Science,
University of Osnabrück

## Today's Session

* Show Sheet06
* PCA

## PCA Introduction
* Dimension Reduction
* Project Data on orthonormal vectors with highest variance
* Minimize the Mean Reconstruction Square Error between original data and projected data:


$$
E = \sum_{i=1\dots|D|}(\vec{z_i} - \vec{x_i})^2
$$

## PCA Visualization

The following cells provide some visualizations related to PCA.

### PCA Visualization 1: Mean squared reconstruction error and variance

Preparation: provide some 2D-data (and plot it).

In [None]:
import numpy as np
import matplotlib.pyplot as plt

X = np.random.randn(100,2)
X = X @ np.linalg.cholesky([[1, 0.6], [0.6, 0.6]])
X -= X.mean()

eigenval, eigenvec = np.linalg.eig(np.cov(X.T));

plt.figure()
plt.axis('equal')
plt.scatter(X[:,0], X[:,1], c='blue')
plt.show()

An interactive plot showing the mean squared reconstruction error and the variance depending on the choosen projection space.

In [None]:
%matplotlib ipympl 
import ipywidgets as widgets

plt.figure(figsize=(8,4))
axes = plt.subplot(1,2,1, label='2D Data'); plt.title('2D Data')
axes2 = plt.subplot(1,2,2, label='Projection'); plt.title('Projection')


def plot(p1):
    axes.cla()
    axes2.cla()
    # rotation angle of projection line
    alpha = p1*np.pi/180
    
    # rotation vector
    w = np.array([np.cos(alpha), np.sin(alpha)]).reshape(2,1)
    
    # plot the axis
    m = np.array([-1,1])
    axes.plot(w[0]*3.5*m, w[1]*3.5*m, c='seagreen',linewidth=.8)
    axes.plot(-w[1]*2*m, w[0]*2*m, c='mediumseagreen',linewidth=.8)

    # plot projection lines
    z = X @ w @ w.T
    for i in range(X.shape[0]):
        axes.plot([X[i,0], z[i,0]], [X[i,1], z[i,1]], 'cornflowerblue', linewidth=.8, alpha=.7)
    
    # plot projection points
    axes.scatter(z[:,0], z[:,1], c='orange', s=3, zorder=4)

    # plot data points
    axes.scatter(X[:,0], X[:,1], c='royalblue', s=3, zorder=3)#, 'b', 'filled')
    
    # plot the origin
    axes.scatter(0, 0, c='springgreen', marker='*', s=50, zorder=5)

    # plot direction of eigenvector with largest eigenvalue
    b = np.array([3.5, 4.5])
    axes.plot(eigenvec[0,0]*-b[::-1], eigenvec[1,0]*-b[::-1], c='darkred',linewidth=.8)
    axes.plot(eigenvec[0,0]*b, eigenvec[1,0]*b, c='darkred',linewidth=.8)
    axes.set_xlim([-4,4])
    axes.axis('equal')

    
    # projection
    z1 = X @ w
    zy = np.zeros_like(z1)
    axes2.scatter(z1, zy,  c='orange', zorder=2)
    axes2.hist(z1, bins=30, range=(-4,4), density=True, zorder=1)

    axes2.set_xlim([-4,4])
    axes2.set_ylim([-.03,1.1])
    axes2.spines['bottom'].set_position('zero')
    axes2.spines['bottom'].set_color('seagreen')

    # variance
    variance = np.var(z)
    axes2.set_xlabel('Variance: {:1.3f}'.format(variance))

    
    # Minimize the mean squared reconstruction error
    msre = np.sum((z - X)**2)
    
    axes.set_xlabel('MSRE: {:1.3f}'.format(msre))
    plt.show()


p1 = widgets.IntSlider(min=0, max=180, step=1, value=13, continuous_update=False)
ui = widgets.HBox([p1])

out = widgets.interactive_output(plot, {'p1': p1})

display(ui, out)


## PCA Visualization 2

Allow to vary the data set to see hoe principle components change:

In [None]:
%matplotlib ipympl 
import ipywidgets as widgets
from sklearn.datasets import make_blobs
from sklearn.decomposition import PCA

n_samples = 500
plt.figure(figsize=(5,5))
axes = plt.subplot(1,1,1, label='pca'); plt.title('pca')
pca = PCA()

def plot(std1, std2, angle):
    axes.cla()
    # create data
    X, y = make_blobs(n_samples=n_samples, cluster_std=[[std1, std2]],
                      centers=[[0,0]],random_state=12) # 

    rot_mat = np.array([[np.cos(angle), -np.sin(angle)],[np.sin(angle), np.cos(angle)]])
    X = X @ rot_mat
    
    # apply pca
    pca.fit(X)
    pcs = (pca.components_.T * np.sqrt(pca.explained_variance_)).T
    #pcs = (pca.components_.T * pca.explained_variance_).T
    
    # plot
    axes.scatter(X[:, 0], X[:, 1], 4, alpha=.5)
    arrow_o = np.zeros_like(pcs)
    axes.quiver(*arrow_o.T, *pcs.T, zorder=1, angles='xy', scale_units='xy', scale=1, color=['red','darkred'])
    axes.plot(0, 0, '*y', markersize=12, alpha=1)

    axes.set_xlim([-4,4])
    axes.set_ylim([-4,4])
    plt.show()
    
p1 = widgets.FloatSlider(min=0, max=2, step=.1, value=.5, continuous_update=True)
p2 = widgets.FloatSlider(min=0, max=2, step=.1, value=.5, continuous_update=True)
p3 = widgets.FloatSlider(min=0, max=np.pi, step=.1, value=.5*np.pi, continuous_update=True)
ui = widgets.HBox([p1,p2,p3])

out = widgets.interactive_output(plot, {'std1': p1, 'std2': p2, 'angle': p3})  

display(ui, out)

plot(1, 1, 0)