# (Convolutional) Autoencoder on MNIST

### Outline
1. Feature Preparation
2. Model Definition
3. Training
4. Visual Evaluation (Comparison, t-SNE, ...)
5. Generate Plots for different encoding dimensionalities

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

#### Check if the server is using the GPU

In [None]:
# Check if using gpu
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

## 1. Data Preparation
Add new dimension (color channel) to MNIST images and scale to [0,1]

In [None]:
# DATA
from tensorflow.keras.datasets.mnist import load_data
(X_train, y_train), (X_test, y_test) = load_data()
X_train = X_train[:,:,:,np.newaxis] / 255.0
X_test = X_test[:,:,:,np.newaxis] / 255.0

X_val = X_train[50000:]
y_val = y_train[50000:]
X_train = X_train[:50000]
y_train = y_train[:50000]

#### Plot random images to check if images were loaded and prepared correctly

In [None]:
fig, axis = plt.subplots(1, 10)
fig.set_size_inches(16, 10)
for i, img_index in enumerate(np.random.randint(0, len(X_train), size=(10))):
    axis[i].imshow(X_train[img_index].reshape(28, 28), cmap='gray')

## 2. Define Model 

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dense, Flatten, Reshape, Conv2DTranspose
from tensorflow.keras.models import Model


def build_model(activation='relu', dense_embedding_size=0):
    """ Creates the computation graph for the autoencoder.
    Architecture inspired by blog.keras.io/building-autoencoders-in-keras.html
    
    Parameters
    ----------
    activation: ['relu', 'softmax', 'sigmoid', ...]
        Activation function used throughout the network. 
        See keras.io/activations/ for possible values.
    dense_embedding_size: int
        If larger than 0, the bottleneck of the autoencoder additionally uses two fully connected layers.
        The value specivies the size of the bottleneck-layer. 
        Otherwise no fully connected layers are used and the bottleneck comprises 4*4*8=128 values.
    
    Returns
    -------
    (encoder, decoder): tuple
    Two keras.models.Model objects representing the encoder part and the whole autoencoder
    
    """
    
    # Define Computation Graph
    input_img = Input(shape=(28, 28, 1))
    
    x = Conv2D(16, (3, 3), activation=activation, padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(8, (3, 3), activation=activation, padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(8, (3, 3), activation=activation, padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    encoded = Flatten()(x)
    
    if dense_embedding_size > 0:
        encoded = Dense(dense_embedding_size, activation=activation)(encoded)
        x = Dense(128, activation=activation)(encoded)
        x = Reshape((4, 4, 8))(x)

    x = Conv2D(8, (3, 3), activation=activation, padding='same', input_shape=(4, 4, 8))(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(8, (3, 3), activation=activation, padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(16, (3, 3), activation=activation)(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    # Use default parameters of Adam
    opt = tf.keras.optimizers.Adam()
    
    # Define Models
    encoder = Model(input_img, encoded)
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer=opt, loss='binary_crossentropy', metrics=[])
    
    return encoder, autoencoder

## 3. Train Model
We now specify a few hyperparameters and create and train our model.  
We could also perform e.g. a Grid-Search and parameterize more design choices of our model. For sake of simplicity we don't do this here.

In [None]:
EMBED_DIM = 40
ACTIVATION = 'relu'
MAX_EPOCHS = 80
EARLY_STOPPING_PATIENCE = 3
BATCH_SIZE = 32

In [None]:
encoder, autoencoder = build_model(activation=ACTIVATION, dense_embedding_size=EMBED_DIM)

In [None]:
autoencoder.summary()

In [None]:
history = autoencoder.fit(
          X_train, 
          X_train, 
          batch_size=BATCH_SIZE,
          epochs=MAX_EPOCHS,
          validation_data=(X_val, X_val),
          callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=EARLY_STOPPING_PATIENCE)])

In [None]:
loss = autoencoder.evaluate(X_test, X_test, verbose=False)
print("Loss: %f" % loss)

In [None]:
def plot_history(history, filename=None):
    """ Plot the training and validation loss for each epoch.

    Parameters
    ----------
    history: History
        Contains evolution of training and validation loss
    filename: string
        If not None, additionally saves plot as png
    """    
    ax = plt.subplot(111)
    ax.plot(history.history['loss'], label='Training Loss')
    ax.plot(history.history['val_loss'], label='Validation Loss')
    ax.legend()
    if filename:
        plt.savefig("Comparison/" + filename + ".png")

In [None]:
plot_history(history)

## 4. Visual Evaluation

In [None]:
from scipy import ndimage

def compare_reconstruction(reconstructions, n_images=8, filename=None, indices=None):
    """ Plot a comparison of the network's reconstructions and the original images from the test set.

    Parameters
    ----------
    reconstructions: sequence of images
    n_images: int
        Number of random images to show
    filename: string
        If not None, additionally saves plot as png
    indices: list
        Contains indices of images from the test set, used for visualization.
        Set to random values if not specified.
    
    """
    %matplotlib inline
    if indices is None:
        indices = np.random.random_integers(0, 10000, n_images)
    
    fig, axis = plt.subplots(3, len(indices))
    fig.set_size_inches(15, 8)
    
    for i, img_index in enumerate(indices):
        axis[0, i].set_title("Original")
        axis[0, i].imshow(X_test[img_index].reshape(28, 28), cmap='gray')
        
        axis[1, i].set_title("Reconstruction")
        axis[1, i].imshow(reconstructions[img_index].reshape(28, 28), cmap='gray')
        

        tresholded_reconstruction = reconstructions[img_index].reshape(28, 28) > 0.3
        tresholded_reconstruction = np.asmatrix(tresholded_reconstruction, dtype=np.float32)
        blurred = ndimage.gaussian_filter(tresholded_reconstruction, sigma=.8)
        axis[2, i].set_title("Thresh/Blur")
        axis[2, i].imshow(blurred, cmap='gray')
        
    if filename:
        fig.savefig("Comparison/" + filename + ".png")
        
    return None

In [None]:
%matplotlib inline

reconstr = autoencoder.predict(X_test)
compare_reconstruction(reconstr)

## Latent Space Visualization via t-SNE


In [None]:
from sklearn.manifold import TSNE

# Calculate encoding for images in test set
encodings = encoder.predict(X_test)

# CAUTION! Might take several minutes
tsne = TSNE(n_components=3, verbose=True)
projections = tsne.fit_transform(encodings)

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook

n_samples = 3000

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111, projection='3d')
plt.setp(ax.get_xticklabels(), visible=False)
plt.setp(ax.get_yticklabels(), visible=False)
plt.setp(ax.get_zticklabels(), visible=False)

ax.scatter(projections[:n_samples, 0],
           projections[:n_samples, 1],
           projections[:n_samples, 2],
           c=y_test[:n_samples])

def rotate(angle):
    ax.view_init(azim=angle)

anim = FuncAnimation(fig, rotate, frames=360, interval=60)
anim.save('t-SNE_dim_%i.gif' % EMBED_DIM, dpi=80, writer='imagemagick')

### Save Models to Disk

In [None]:
# Save Autoencoder first
with open("Models/autoencoder.json", "w") as f:
    json = autoencoder.to_json()
    f.write(json)

# Save Encoder separately. Could be improved to reduce redundancy
with open("Models/encoder.json", "w") as f:
    json = encoder.to_json()
    f.write(json)
    
# Save weights from whole graph
autoencoder.save_weights("Models/autoencoder_weights.h5")

In [None]:
# Load autoencoder when not training from scratch
from tensorflow.keras.models import model_from_json

with open("Models/autoencoder.json", "r") as f:
    autoencoder = model_from_json(f.read())
    
with open("Models/encoder.json", "r") as f:
    encoder = model_from_json(f.read())

# Load weights
autoencoder.load_weights('Models/autoencoder_weights.h5')
encoder.load_weights('Models/autoencoder_weights.h5', by_name=True)

### Compare models with different encoding sizes

In [None]:
dimensions = [0, 5, 10, 20, 40, 100, 200]
i_test_images = np.random.random_integers(0, 10000, 8)

for dim in dimensions:
    enc, auto = build_model(activation=ACTIVATION, dense_embedding_size=dim)    
    history = auto.fit(
          X_train, 
          X_train, 
          batch_size=BATCH_SIZE,
          epochs=MAX_EPOCHS,
          validation_data=(X_test, X_test),
          callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=EARLY_STOPPING_PATIENCE)])
    
    name = "dim_%i" % dim
    reconstr = auto.predict(X_test)
    compare_reconstruction(reconstr, filename=name + "_reconstruction", indices=i_test_images)
    
    loss = auto.evaluate(X_test, X_test, verbose=False)
    loss_str = ("%f" % np.round(loss, 5)).replace('.', "_")
    plot_history(history, filename=name + "_history_loss_" + loss_str)

In [None]:
# Briefly compare performance of a linear model using encodings vs original features

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

clf = LogisticRegression()
clf.fit(encoder.predict(X_train), y_train)
predictions = clf.predict(encoder.predict(X_test))
score = accuracy_score(predictions, y_test)
print(score)

In [None]:
clf = LogisticRegression()
clf.fit(X_train.reshape(60000, 784), y_train)
predictions = clf.predict(X_test.reshape(10000,784))
score = accuracy_score(predictions, y_test)
print(score)