In [1]:
from math import sqrt

import matplotlib.pyplot as plt
from matplotlib.offsetbox import TextArea, AnnotationBbox, OffsetImage
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPool2D, Dense, Flatten, Reshape
from tensorflow.keras.models import Model

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
# Standardize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

# Add a channel dimension of size 1
# (We only need 1 channel because the images are black and white)
train_images = train_images.reshape(60000, 28, 28, 1)
test_images = test_images.reshape(10000, 28, 28, 1)

In [None]:
# Define the encoder CNN using the tf.Keras functional API
# It should 2 conv/pool layers and a dense layer
def create_encoder(input_shape, encoding_dim):
    inputs = Input(shape=input_shape)
    conv1 = Conv2D(32, 3, padding="same", activation="relu")(inputs)
    pool1 = MaxPool2D()(conv1)
    conv2 = Conv2D(64, 3, padding="same", activation="relu")(pool1)
    pool2 = MaxPool2D()(conv2)
    flatten = Flatten()(pool2)
    dense = Dense(encoding_dim, activation="softmax")(flatten)
    
    encoder = Model(inputs, dense)
    encoder.summary()
    
    return encoder

In [None]:
# Define the decoder CNN using the tf.Keras functional API
# It should a reshape layer, 2 conv transpose layers, and a pixel-wise conv layer
def create_decoder(encoding_dim):
    if not sqrt(encoding_dim).is_integer():
        raise ValueError("Encoding dim must be a perfect square.")
    
    inputs = Input(shape=encoding_dim)
    reshape = Reshape((int(sqrt(encoding_dim)), int(sqrt(encoding_dim)), 1))(inputs)
    conv1 = Conv2DTranspose(64, 3, strides=2, padding="same", activation="relu")(reshape)
    conv2 = Conv2DTranspose(32, 3, strides=2, padding="same", activation="relu")(conv1)
    conv3 = Conv2D(1, 3, padding="same", activation="sigmoid")(conv2)
    
    decoder = Model(inputs, conv3)
    decoder.summary()
    
    return decoder

In [None]:
input_shape = train_images[0].shape
encoding_dim = 49 # Why did I pick 49?

In [None]:
# Instantiate the encoder and the decoder
encoder = create_encoder(input_shape, encoding_dim)
decoder = create_decoder(encoding_dim)

# Define the autoencoder architecture using the tf.Keras functional API
# Recall that it goes input -> encoder -> decoder
inputs = Input(shape=input_shape)
encoded = encoder(inputs)
decoded = decoder(encoded)

# Create and compile the autoencoder model
# Which loss function? (See slides)
autoencoder = Model(inputs, decoded)
autoencoder.summary()

autoencoder.compile(optimizer="adam", loss="mse")

In [None]:
# Fit the model for 250 epochs (make sure you're on the GPU)
autoencoder.fit(train_images, train_images, batch_size=256, epochs=250)

In [None]:
# Evaluate the model on the test set
test_loss = autoencoder.evaluate(test_images, test_images, batch_size=256)

In [None]:
# Predict on 5 images from the test set
# Store the predictions in a variable called preds
visual = test_images[:5]
preds = autoencoder.predict(visual)

# This plots the original and reconstructed images
plt.figure(figsize=(10,10))
for i in range(5):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_images[i].squeeze(), cmap=plt.cm.binary)
    plt.xlabel("Original")
for i in range(5):
    plt.subplot(5,5,i+6)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(preds[i].squeeze(), cmap=plt.cm.binary)
    plt.xlabel("Reconstructed")
plt.show()

In [None]:
# This function plots the latent space of the autoencoder
# Note that we isolate the encoder half of the autoencoder
# Also note that we are only plotting 2 dimensions of
# a multidimensional space.
def plot_latent(mode, count):
    idx = np.random.choice(len(test_images), count)
    inputs = test_images[idx]
    fig, ax = plt.subplots(figsize=(10, 7))
    ax.set_title("Autoencoder Latent Space")
    coords = encoder.predict(inputs)[:, :2]
    
    if mode == 'imgs':
        for image, (x, y) in zip(inputs, coords):
            im = OffsetImage(image.reshape(28, 28), zoom=1, cmap='gray')
            ab = AnnotationBbox(im, (x, y), xycoords='data', frameon=False)
            ax.add_artist(ab)
        ax.update_datalim(coords)
        ax.autoscale()
    elif mode == 'dots':
        classes = test_labels[idx]
        plt.scatter(coords[:, 0], coords[:, 1], c=classes)
        plt.colorbar()
        for i in range(10):
            class_center = np.mean(coords[classes == i], axis=0)
            text = TextArea('{} ({})'.format(class_names[i], i))
            ab = AnnotationBbox(text, class_center, xycoords='data', frameon=True)
            ax.add_artist(ab)
    plt.show()

In [None]:
# Call the plot_latent function with different modes and counts
# What do you observe?
plot_latent("dots", 10000)
plot_latent("imgs", 1000)

In [None]:
# Try varying the architecture to correspond with different dimensions of latent space
# What happens as the latent space becomes larger? Is there a "sweet spot"?
# In particular, try with a latent space dimension of 2 to obtain the most accurate visualization
# And try with a larger latent space dimension to obtain a more accurate prediction.
