# Simple Autoencoder in TensorFlow, PyTorch & MXNet

An implementation of a simple autoencoder with TensorFlow, PyTorch and MXNet using the same architecture and hyperparameters as much as possible.

# Setup

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [17]:
import os
import numpy as np
import matplotlib.pyplot as plt

from __future__ import division, print_function, absolute_import

In [72]:
# Training Parameters
num_epochs    = 10
batch_size    = 128
weight_decay  = 1e-5
learning_rate = 1e-3

# Network Parameters
num_input    = 784 # MNIST data input (img shape: 28*28)
num_hidden_1 = 128
num_hidden_2 = 65
num_hidden_3 = 12
num_hidden_4 = 3

# MXNet

# TensorFlow

In [50]:
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

In [51]:
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [52]:
display_step = 1000
num_steps = num_epochs * 1000

In [54]:
# tf Graph input (only pictures)
X = tf.placeholder('float', [None, num_input])

In [55]:
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
    'encoder_h3': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_3])),
    'encoder_h4': tf.Variable(tf.random_normal([num_hidden_3, num_hidden_4])),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_4, num_hidden_3])),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_3, num_hidden_2])),
    'decoder_h3': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
    'decoder_h4': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
    'encoder_b3': tf.Variable(tf.random_normal([num_hidden_3])),
    'encoder_b4': tf.Variable(tf.random_normal([num_hidden_4])),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_3])),
    'decoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
    'decoder_b3': tf.Variable(tf.random_normal([num_hidden_1])),
    'decoder_b4': tf.Variable(tf.random_normal([num_input])),
}

In [56]:
# Building the encoder
def encoder(x):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1']))
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                                   biases['encoder_b2']))
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['encoder_h3']),
                                   biases['encoder_b3']))
    layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['encoder_h4']),
                                   biases['encoder_b4']))
    return layer_4

# Building the decoder
def decoder(x):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['decoder_h3']),
                                   biases['decoder_b3']))
    layer_4 = tf.nn.tanh(tf.add(tf.matmul(layer_3, weights['decoder_h4']),
                                   biases['decoder_b4']))
    return layer_4

In [57]:
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

In [58]:
# Prediction
y_pred = decoder_op

# Targets (Labels) are the input data.
y_true = X

In [59]:
saver = tf.train.Saver()

# Define loss and optimizer, minimize the squared error
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, weight_decay=weight_decay)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [62]:
# Start Training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    # Training
    for i in range(1, num_steps+1):
        # Prepare Data
        # Get the next batch of MNIST data (only images are needed, not labels)
        batch_x, _ = mnist.train.next_batch(batch_size)

        # Run optimization op (backprop) and cost op (to get loss value)
        _, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
        
        # Display logs per step
        if i % display_step == 0 or i == 1:
            print('Step %i: Minibatch Loss: %f' % (i, l))
    
    # Save model
    saver.save(sess, 'tf_autoencoder.model')

Step 1: Minibatch Loss: 1.117130
Step 1000: Minibatch Loss: 1.082487
Step 2000: Minibatch Loss: 1.080882
Step 3000: Minibatch Loss: 1.073486
Step 4000: Minibatch Loss: 1.074969
Step 5000: Minibatch Loss: 1.071067
Step 6000: Minibatch Loss: 1.054731
Step 7000: Minibatch Loss: 1.040796
Step 8000: Minibatch Loss: 1.014696
Step 9000: Minibatch Loss: 1.004351
Step 10000: Minibatch Loss: 1.005870


# PyTorch

In [65]:
import torch
import torchvision

from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

In [66]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [67]:
dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [68]:
class SimpleAutoencoder(nn.Module):
    def __init__(self):
        super(SimpleAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_input, num_hidden_1),
            nn.ReLU(True),
            nn.Linear(num_hidden_1, num_hidden_2),
            nn.ReLU(True),
            nn.Linear(num_hidden_2, num_hidden_3),
            nn.ReLU(True),
            nn.Linear(num_hidden_3, num_hidden_4)
        )
        self.decoder = nn.Sequential(
            nn.Linear(num_hidden_4, num_hidden_3),
            nn.ReLU(True),
            nn.Linear(num_hidden_3, num_hidden_2),
            nn.ReLU(True),
            nn.Linear(num_hidden_2, num_hidden_1),
            nn.ReLU(True),
            nn.Linear(num_hidden_1, num_input),
            nn.Tanh()
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [70]:
model = SimpleAutoencoder() # .cuda() for GPU

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [71]:
# Training
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.view(img.size(0), -1)
        img = Variable(img) # .cuda() for GPU
        
        # Forward pass
        output = model(img)
        loss = loss_fn(output, img)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print('Epoch [{}/{}], Loss: {:.4f}'
          .format(epoch + 1, num_epochs, loss.data[0]))
    
# Recommended best practice: saves model parameters
torch.save(model.state_dict(), './pt_autoencoder.model')

# Saves entire model
# torch.save(model, './pt_autoencoder.model')

Epoch [1/10], Loss: 0.1921
Epoch [2/10], Loss: 0.1741
Epoch [3/10], Loss: 0.1842
Epoch [4/10], Loss: 0.1677
Epoch [5/10], Loss: 0.1496
Epoch [6/10], Loss: 0.1468
Epoch [7/10], Loss: 0.1553
Epoch [8/10], Loss: 0.1486
Epoch [9/10], Loss: 0.1482
Epoch [10/10], Loss: 0.1495
