In [1]:
# model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np

class ConvAutoencoder:
  @staticmethod
  def build(width, height, depth, filters=(32, 64), latent_dim=16):

    # initialize input shape to be channels last along with channels dimension itself
    input_shape = (height, width, depth)
    chan_dim = -1

    # define input to encoder
    inputs = Input(shape=input_shape)
    x = inputs

    # loop over the number of filters
    for f in filters:
      # apply CONV => RELU => BN operations
      x = Conv2D(f, (3, 3), strides=2, padding='same')(x)
      x = LeakyReLU(alpha=0.2)(x)
      x = BatchNormalization(axis=chan_dim)(x)

    # flatten the network and then consturct our latent vector
    volume_size = K.int_shape(x)
    x = Flatten()(x)
    latent = Dense(latent_dim, name='encoder')(x)

    # start building the decoder model which will accept output of encoder as its input
    x = Dense(np.prod(volume_size[1:]))(latent)
    x = Reshape((volume_size[1], volume_size[2], volume_size[3]))(x)

    # loop over number of filters again but this time in reverse order
    for f in filters[::-1]:
      
      # apply a Conv_transpose => RELU => BN operation
      x = Conv2DTranspose(f, (3, 3), strides=2, padding='same')(x)
      x = LeakyReLU(alpha=0.2)(x)
      x = BatchNormalization(axis=chan_dim)(x)

    # apply a single Conv_Transpose layer used to recover the original depth of image
    x = Conv2DTranspose(depth, (3, 3), padding='same')(x)
    outputs = Activation('sigmoid', name='decoder')(x)

    # autoencoder is encoder + decoder
    autoencoder = Model(inputs, outputs, name='autoencoder')

    return autoencoder

In [2]:
# training autoencoders

import matplotlib
matplotlib.use('Agg')
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
import cv2 as cv

# Function to help visualize predictions made by unsupervised autoencoder 
def visualize_prediction(decode, gt, samples=10):
  # initialize list of output images
  outputs = None

  # loop over our number of output samples
  for i in range(0, samples):
    # grab original image and reconstructed image
    original = (gt[i]*255).astype('uint8')
    recon = (decode[i]*255).astype('uint8')

    # stack original and reconstructed images side by side
    output = np.hstack([original, recon])

    # if the output array is empty, initialize it as the current side-by-side image display
    if outputs is None:
      outputs = output

    # otherwise vertically stack the output
    else:
      outputs = np.vstack([outputs, output])

  return outputs

# arguments
model_name = 'output/autoencoder.model'         # path to our output trained autoencoder
visu = "recon_vis.png"
plot = "plot.png"
epochs = 20
init_lr = 1e-3
batch_size = 32

# load mnist dataset
print('[INFO] loading MNIST dataset...')
((trainX, _), (testX, _)) = mnist.load_data()

# add channel dimension to every image in dataset and scale pixel intensities to range [0, 1]
trainX = np.expand_dims(trainX, axis=-1)
testX = np.expand_dims(testX, axis=-1)
trainX = trainX.astype('float32')/255.0
testX = testX.astype('float32')/255.0

# construct our Conv autoencoder
print('[INFO] building autoencoder...')
autoencoder = ConvAutoencoder.build(28, 28, 1)
opt = Adam(learning_rate=init_lr, decay=init_lr/epochs)
autoencoder.compile(loss='mse', optimizer=opt)

# train convolution autoencoder
H=autoencoder.fit(trainX, trainX, validation_data=(testX, testX), epochs=epochs, 
                    batch_size=batch_size)

# use the convolution auto encoder to make the predictions on the testing images, construct the 
# visualizations and save it to disk
decoded = autoencoder.predict(testX)
vis = visualize_prediction(decoded, testX)
cv.imwrite(visu, vis)

# construct a plot that plots and saves training history
N = np.arange(0, epochs)
plt.style.use('ggplot')
plt.figure()
plt.plot(N, H.history['loss'], label='train_loss')
plt.plot(N, H.history['val_loss'], label='validation_loss')
plt.title('Training loss and accuracy')
plt.xlabel('Epochs #')
plt.ylabel('Loss/Accuracy')
plt.legend(loc='lower left')
plt.savefig(plot)

# serialize the autoencoder model to disk
print('[INFO] saving autoencoder')
autoencoder.save(model_name, save_format='h5')

[INFO] loading MNIST dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[INFO] building autoencoder...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[INFO] saving autoencoder


In [3]:
# indexing images

from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import mnist
import numpy as np
import pickle

model_path = 'output/autoencoder.model'   # path to trained autoencoder model
index_path = 'output/index.pickle'        # path to output features index file

# load mnist dataset
print('[INFO] loading MNIST dataset...')
((trainX, _), (testX, _)) = mnist.load_data()

# add channel dimension to every image in dataset and scale pixel intensities to range [0, 1]
trainX = np.expand_dims(trainX, axis=-1)
trainX = trainX.astype('float32')/255.0

# load encoder model from disk
print('[INFO] loading autoencoder model...')
autoencoder = load_model(model_path)

# create the auto encoder model which consists of just the encoder portion of model
encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder').output)

# quantify the contents of our input images using encoder
print('[INFO] encoding images...')
features = encoder.predict(trainX)

# construct a dictionary that maps index of MNIST training image to its corresponding latent-space
# representation
indexes = list(range(0, trainX.shape[0]))
data = {'indexes': indexes, 'features' : features}

# write data dictionary to disk
print('[INFO] Saving index...')
f = open(index_path, 'wb')
f.write(pickle.dumps(data))
f.close()

[INFO] loading MNIST dataset...
[INFO] loading autoencoder model...
[INFO] encoding images...
[INFO] Saving index...
