In [1]:
import tensorflow.contrib.layers as lays

def autoencoder(inputs):
    # encoder
    # 32 x 32 x 1   ->  16 x 16 x 32
    # 16 x 16 x 32  ->  8 x 8 x 16
    # 8 x 8 x 16    ->  2 x 2 x 8
    net = lays.conv2d(inputs, 32, [5, 5], stride=2, padding='SAME')
    net = lays.conv2d(net, 16, [5, 5], stride=2, padding='SAME')
    net = lays.conv2d(net, 8, [5, 5], stride=4, padding='SAME')
    # decoder
    # 2 x 2 x 8    ->  8 x 8 x 16
    # 8 x 8 x 16   ->  16 x 16 x 32
    # 16 x 16 x 32  ->  32 x 32 x 1
    net = lays.conv2d_transpose(net, 16, [5, 5], stride=4, padding='SAME')
    net = lays.conv2d_transpose(net, 32, [5, 5], stride=2, padding='SAME')
    net = lays.conv2d_transpose(net, 1, [5, 5], stride=2, padding='SAME', activation_fn=tf.nn.tanh)
    return net

Reshape and Resize a batch of images

In [2]:
import numpy as np
from skimage import transform

def resize_batch(imgs):
    # A function to resize a batch of MNIST images to (32, 32)
    # Args:
    #   imgs: a numpy array of size [batch_size, 28 X 28].
    # Returns:
    #   a numpy array of size [batch_size, 32, 32].
    imgs = imgs.reshape((-1, 28, 28, 1))
    resized_imgs = np.zeros((imgs.shape[0], 32, 32, 1))
    for i in range(imgs.shape[0]):
        resized_imgs[i, ..., 0] = transform.resize(imgs[i, ..., 0], (32, 32))
    return resized_imgs

Now we create an autoencoder, define a square error loss and an optimizer.

In [3]:
import tensorflow as tf

lr = 0.001

# input to the network (Messy images)
ae_inputs = tf.placeholder(tf.float32, (None, 32, 32, 1))

# create the Autoencoder network
ae_outputs = autoencoder(ae_inputs)

# calculate the loss and optimize the network
# claculate the mean square error loss
loss = tf.reduce_mean(tf.square(ae_outputs - ae_inputs))  
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

# initialize the network
init = tf.global_variables_initializer()

Now we can read the batches, train the network and finally test the network by reconstructing a batch of test images.

In [5]:
from tensorflow.examples.tutorials.mnist import input_data
import os
import matplotlib.pyplot as plt

batch_size = 500  # Number of samples in each batch
epoch_num = 5     # Number of epochs to train the network
lr = 0.001        # Learning rate

# read MNIST dataset
mnist = input_data.read_data_sets("/home/geoffrey/ml/ML_env/converter_vhs_pal/dataset/test/", one_hot=True)

# calculate the number of batches per epoch
batch_per_ep = mnist.train.num_examples // batch_size

with tf.Session() as sess:
    sess.run(init)
    # epochs loop
    for ep in range(epoch_num):  
        # batches loop
        for batch_n in range(batch_per_ep):  
            batch_img, batch_label = mnist.train.next_batch(batch_size)  # read a batch
            batch_img = batch_img.reshape((-1, 28, 28, 1))               # reshape each sample to an (28, 28) image
            batch_img = resize_batch(batch_img)                          # reshape the images to (32, 32)
            _, c = sess.run([train_op, loss], feed_dict={ae_inputs: batch_img})
            print('Epoch: {} - cost= {:.5f}'.format((ep + 1), c))
    
    # test the trained network
    print('start test')
    batch_img, batch_label = mnist.test.next_batch(50)
    batch_img = resize_batch(batch_img)
    recon_img = sess.run([ae_outputs], feed_dict={ae_inputs: batch_img})[0]
    print('end test')
    
    # plot the reconstructed images and their ground truths (inputs)
    print('start reconstruction')
    plt.figure(1)
    plt.title('Reconstructed Images')
    for i in range(50):
        plt.subplot(5, 10, i+1)
        plt.imshow(recon_img[i, ..., 0], cmap='gray')
    plt.figure(2)
    plt.title('Input Images')
    for i in range(50):
        plt.subplot(5, 10, i+1)
        plt.imshow(batch_img[i, ..., 0], cmap='gray')
    plt.show()
    print('end plt show')

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /home/geoffrey/ml/ML_env/converter_vhs_pal/dataset/test/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /home/geoffrey/ml/ML_env/converter_vhs_pal/dataset/test/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /home/geoffrey/ml/ML_env/converter_vhs_pal/dataset/test/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /home/geoffrey/ml/ML_env/converter_vhs_pal/dataset/test/t10k-labels-idx1-ubyte.gz


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Epoch: 1 - cost= 0.10009
Epoch: 1 - cost= 0.10087
Epoch: 1 - cost= 0.10181
Epoch: 1 - cost= 0.09659
Epoch: 1 - cost= 0.09532
Epoch: 1 - cost= 0.09505
Epoch: 1 - cost= 0.09063
Epoch: 1 - cost= 0.08652
Epoch: 1 - cost= 0.08317
Epoch: 1 - cost= 0.08007
Epoch: 1 - cost= 0.07680
Epoch: 1 - cost= 0.07935
Epoch: 1 - cost= 0.07735
Epoch: 1 - cost= 0.07378
Epoch: 1 - cost= 0.07097
Epoch: 1 - cost= 0.06995
Epoch: 1 - cost= 0.06862
Epoch: 1 - cost= 0.06714
Epoch: 1 - cost= 0.06716
Epoch: 1 - cost= 0.06546
Epoch: 1 - cost= 0.06573
Epoch: 1 - cost= 0.06403
Epoch: 1 - cost= 0.06242
Epoch: 1 - cost= 0.06323
Epoch: 1 - cost= 0.06325
Epoch: 1 - cost= 0.06115
Epoch: 1 - cost= 0.06122
Epoch: 1 - cost= 0.06033
Epoch: 1 - cost= 0.05859
Epoch: 1 - cost= 0.05994
Epoch: 1 - cost= 0.05686
Epoch: 1 - cost= 0.05695
Epoch: 1 - cost= 0.05819
Epoch: 1 - cost= 0.05582
Epoch: 1 - cost= 0.05647
Epoch: 1 - cost= 0.05468
Epoch: 1 - cost= 0.05489
Epoch: 1 - cost= 0.05439
Epoch: 1 - cost= 0.05366
Epoch: 1 - cost= 0.05460


Epoch: 3 - cost= 0.01008
Epoch: 3 - cost= 0.01016
Epoch: 4 - cost= 0.00994
Epoch: 4 - cost= 0.00984
Epoch: 4 - cost= 0.01017
Epoch: 4 - cost= 0.00980
Epoch: 4 - cost= 0.00980
Epoch: 4 - cost= 0.01000
Epoch: 4 - cost= 0.01006
Epoch: 4 - cost= 0.00963
Epoch: 4 - cost= 0.00976
Epoch: 4 - cost= 0.00987
Epoch: 4 - cost= 0.00987
Epoch: 4 - cost= 0.00977
Epoch: 4 - cost= 0.00969
Epoch: 4 - cost= 0.00969
Epoch: 4 - cost= 0.00940
Epoch: 4 - cost= 0.00980
Epoch: 4 - cost= 0.00968
Epoch: 4 - cost= 0.00960
Epoch: 4 - cost= 0.01017
Epoch: 4 - cost= 0.00951
Epoch: 4 - cost= 0.00945
Epoch: 4 - cost= 0.00940
Epoch: 4 - cost= 0.00944
Epoch: 4 - cost= 0.00906
Epoch: 4 - cost= 0.00944
Epoch: 4 - cost= 0.00956
Epoch: 4 - cost= 0.00942
Epoch: 4 - cost= 0.00938
Epoch: 4 - cost= 0.00926
Epoch: 4 - cost= 0.00918
Epoch: 4 - cost= 0.00944
Epoch: 4 - cost= 0.00952
Epoch: 4 - cost= 0.00902
Epoch: 4 - cost= 0.00948
Epoch: 4 - cost= 0.00968
Epoch: 4 - cost= 0.00911
Epoch: 4 - cost= 0.00925
Epoch: 4 - cost= 0.00939


<Figure size 640x480 with 50 Axes>

<Figure size 640x480 with 50 Axes>