In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import json

# Utilities
from PIL import Image
import glob # Khoa's stuff
from IPython import display
from tqdm import tqdm # for training progress

# Parameters
Change based on system

In [4]:
EPOCHS = 10 # Do not change unless testing
BATCH_SIZE = 200 # Based on system 
DATASET_PATH = "./dataset/" # Based on system

IMAGE_SHAPE = (64, 64, 3) # Should not change
LATENT_DIM = 512 # Do not change unless testing, the smaller the better

# Load Data
Done in batches

In [6]:
from keras.utils import image_dataset_from_directory

x_train, x_test = image_dataset_from_directory(
    DATASET_PATH, 
    labels=None, 
    batch_size=BATCH_SIZE, 
    image_size=IMAGE_SHAPE[:-1], 
    shuffle=False, # Don't think this matters
    validation_split=0.1, 
    subset="both",
    crop_to_aspect_ratio=True)

In [None]:
# Normalize dataset
x_train = x_train.map(lambda x: x/255)
x_test = x_test.map(lambda x: x/255)

Show a few images from the dataset

In [None]:
# Need to investigate memory usage, but should be good
train_iter, test_iter = x_train.as_numpy_iterator(), x_test.as_numpy_iterator()

fig = plt.figure(figsize=(2, 2))

plt.subplot(2, 2, 1)
plt.imshow(next(train_iter)[0]) # First image of batch 0 of x_train
plt.axis('off')

plt.subplot(2, 2, 2)
plt.imshow(next(train_iter)[1]) # Second image of batch 1 of x_train for variety's sakes
plt.axis('off')

plt.subplot(2, 2, 3)
plt.imshow(next(test_iter)[0]) # First image of batch 0 of x_test
plt.axis('off')

plt.subplot(2, 2, 4)
plt.imshow(next(test_iter)[1]) # Second image of batch 1 of x_test for variety's sakes
plt.axis('off')

plt.show()

# Model Definition

In [None]:
from keras import models, layers, Model

### Encoder Model

In [None]:
encoder = models.Sequential([
    layers.InputLayer(input_shape=(64, 64, 3)),
    # Fill in the rest of the model
    layers.Flatten(),
    layers.Dense(LATENT_DIM, activation='sigmoid') # Number of features we're condensing down to
], name="face_encoder")

### Decoder Model

In [None]:
decoder = models.Sequential([
    layers.InputLayer(input_shape=(LATENT_DIM)),
    # Fill in the rest of the model
    layers.Conv2DTranspose(3, (5, 5), strides=1, padding='same', use_bias=False, activation='relu'),
    layers.BatchNormalization()
], name="face_decoder")

### Variational Autoencoder Subclassing
Allow for the use of the Keras model API

In [None]:
class Autoencoder(Model):
  def __init__(self, encoder_model, decoder_model):
    super().__init__()
    self.encoder = encoder_model
    self.decoder = decoder_model

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

  def train_step(self, data):
    train_data = (data, data) # Im too lazy to customize dataset behaviour
    return super().train_step(train_data)

  def test_step(self, data):
    test_data = (data, data) # Im too lazy to customize dataset behaviour
    return super().test_step(test_data)

autoencoder = Autoencoder(encoder, decoder)
# Paper points: loss functions
autoencoder.compile(optimizer='adam', loss='mae')

# Train Model

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=EPOCHS,
                validation_data=(x_test, x_test))

# Test Model 

In [None]:
decoded_imgs = autoencoder.predict(x_test)

In [None]:
x_test_iterator = x_test.as_numpy_iterator()
test_batch = next(x_test_iterator)

n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
  # display original
  ax = plt.subplot(2, n, i + 1)
  plt.imshow(test_batch[i])
  plt.title("original")
  plt.gray()
  ax.get_xaxis().set_visible(False)
  ax.get_yaxis().set_visible(False)

  # display reconstruction
  ax = plt.subplot(2, n, i + 1 + n)
  plt.imshow(decoded_imgs[i])
  plt.title("decompressed")
  plt.gray()
  ax.get_xaxis().set_visible(False)
  ax.get_yaxis().set_visible(False)
plt.show()