# Autoencoder For Image

This autoencoder has four convolution layers and four transpose convolution layers. 


In [3]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'

LEARNING_RATE = 0.001
BATCHSIZE = 128

In [7]:
def encoder(input_image, filter_sizes=[3, 3, 3, 3], n_filters=[16, 32, 64, 64]):
    encoder_weights = []
    encoder_shapes = []

    # input shape must be [N, H, W, C]
    assert len(input_image.shape) == 4, 'Unsupport input dimensions.'

    def _conv2d_relu(prev_layer, filter_size, num_output, layer_name):
        num_input = int(prev_layer.shape[3])
        with tf.variable_scope(layer_name) as scope:
            w = tf.Variable(
                tf.random_uniform([filter_size, filter_size, num_input, num_output],
                                  -1.0 / (num_input ** 0.5), 1.0 / (num_input ** 0.5)),
                name='W')
            # Store the weight
            encoder_weights.append(w)
            b = tf.Variable(tf.zeros([num_output]), name='b')
            conv = tf.nn.conv2d(prev_layer, w, strides=[1, 2, 2, 1], padding='SAME')
            next_layer = tf.nn.relu(conv + b, name=scope.name)
        return next_layer

    # build the convolution layers
    prev_layer = input_image
    for i in xrange(len(n_filters)):
        encoder_shapes.append(prev_layer.shape.as_list())
        next_layer = _conv2d_relu(prev_layer, filter_sizes[i], n_filters[i], layer_name='conv_relu_%d' % i)
        prev_layer = next_layer
    all_num_feat = prev_layer.shape.as_list()[1] * prev_layer.shape.as_list()[2] * prev_layer.shape.as_list()[3]
    prev_layer = tf.reshape(prev_layer, [-1, all_num_feat])
    # build the fully connected layers
    fc_dims = [4096, 1024, 4096]
    for i in xrange(len(fc_dims)):
        num_input = int(prev_layer.shape[1])
        with tf.variable_scope('encoder_fc_%d' % i) as scope:
            w = tf.Variable(tf.random_uniform([num_input, fc_dims[i]],
                                              -1.0 / (num_input ** 0.5), 1.0 / (num_input ** 0.5)), name='w')
            b = tf.Variable(tf.zeros([fc_dims[i]]))

        next_layer = tf.nn.relu(tf.matmul(prev_layer, w) + b, name=scope.name)
        prev_layer = next_layer

    return prev_layer, encoder_weights, encoder_shapes

In [5]:
# define the decoder
def decoder(prev_layer, encoder_weights, encoder_shapes):
    decoder_weights = encoder_weights[::-1]
    decoder_shapes = encoder_shapes[::-1]

    assert int(prev_layer.shape[1]) == 4096, 'Error with input dimension in decoder.'
    prev_layer = tf.reshape(prev_layer, [-1, 8, 8, 64])
    for i in xrange(len(decoder_shapes)):
        shape = decoder_shapes[i]
        w = decoder_weights[i]
        b = tf.Variable(tf.zeros([int(w.get_shape().as_list()[2])]))
        deconv = tf.nn.conv2d_transpose(prev_layer, w,
                                        output_shape=tf.stack([tf.shape(prev_layer)[0], shape[1], shape[2], shape[3]]),
                                        strides=[1, 2, 2, 1], padding='SAME')
        next_layer = tf.nn.relu(deconv + b)
        prev_layer = next_layer
    return prev_layer


In [8]:
# Bulid the autoencoder
input_shape = [None, 64, 64, 3]
input_image = tf.placeholder(tf.float32, shape=input_shape, name='Input')

prev_layer, encoder_weights, encoder_shapes = encoder(input_image)
prev_layer = decoder(prev_layer, encoder_weights, encoder_shapes)

In [10]:
print encoder_shapes

[[None, 64, 64, 3], [None, 32, 32, 16], [None, 16, 16, 32], [None, 8, 8, 64]]
