In [None]:
from time import time

import numpy as np
import matplotlib.pyplot as plt

import mnist
from sklearn.decomposition import PCA

from ipywidgets import interact, interactive
import ipywidgets as widgets

from pathlib import Path

In [None]:
import time
from pathlib import Path

tic=time.time()

# Asegure que el directorio de datos existe.
datadir='./mnist_data/'
Path(datadir).mkdir(parents=False, exist_ok=True)

# temporary_dir es una función que retorna el directorio temporal;
mnist.temporary_dir = lambda: datadir

# Prepare los datos de entrenamiento
train_images = mnist.train_images() # Esto es un tensor m x 28 x 28
train_labels = mnist.train_labels().astype(int)

m,h,w=train_images.shape
train_data = train_images.reshape((m,w*h)).astype(float)/255 # Esto es una matrix de m x 784

In [None]:
X=np.array(train_data)
y=np.array(train_labels)

_, axes = plt.subplots(1, 4)
images_and_labels = list(zip(X, y))
for ax, (image, label) in zip(axes, images_and_labels[:4]):
    ax.set_axis_off()
    ax.imshow(image.reshape(28,28), cmap=plt.cm.gray, interpolation='nearest')
    ax.set_title('Training: %s' % label)

In [None]:
n_components = 150

print("Extracting the top %d eigenfaces from %d faces"
      % (n_components, X.shape[0]))
t0 = time.time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=False).fit(X)
print("done in %0.3fs" % (time.time() - t0))

h=28
w=28
eigenfaces = pca.components_.reshape((n_components, h, w))

In [None]:
print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time.time()
X_pca = pca.transform(X)
print("done in %0.3fs" % (time.time() - t0))

In [None]:
plt.imshow(pca.mean_.reshape(h,w),cmap=plt.cm.gray)

In [None]:
def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())
        
eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces, eigenface_titles, h, w)

plt.show()

In [None]:
@interact(c=widgets.IntSlider(min=0,max=50,step=1,value=0,continuous_update=False), 
          f=widgets.FloatSlider(min=-3,max=3,step=0.1,value=0,continuous_update=True))
def eigenmnist(c,f):
    preimage=pca.mean_ + f*np.sqrt(pca.explained_variance_[c])*pca.components_[c]
    plt.imshow(preimage.reshape(28,28),cmap='gray')
    

In [None]:
## TO-DO: Plot the spectrum (accumulated variance vs # of components)

In [None]:
## TO-DO: Given a set of components, sliders and a button to randomly select a digit, 
## compute the principal components and reconstruct the digit, but allow with the 
## sliders to change the components.