Deep Learning using Rectified Linear Units
===

## Overview

In this notebook, we explore the performance of a neural net with varying activation functions on an image classification task.

We load our dependencies.

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__version__ = '1.0.0'
__author__ = 'Abien Fred Agarap'

import matplotlib.pyplot as plt
from models.neural_net import NeuralNet
import numpy as np
import tensorflow as tf

We set up the GPU memory growth.

In [2]:
tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices('GPU')[0], True)

We set the random seeds for reproducibility.

In [3]:
np.random.seed(42)
tf.random.set_seed(42)

## Data Preparation

We load the MNIST dataset.

In [4]:
(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.mnist.load_data()

We scale the images.

In [5]:
train_features = train_features.astype('float32').reshape(-1, 784) / 255.
test_features = test_features.astype('float32').reshape(-1, 784) / 255.

We one-hot encode labels.

In [6]:
train_labels = tf.one_hot(train_labels, 10)
test_labels = tf.one_hot(test_labels, 10)

We create a `tf.data.Dataset` object for the training dataset.

In [7]:
dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
dataset = dataset.prefetch(4096).shuffle(2048).batch(1024, True)

W0901 23:35:52.758524 140481895573312 deprecation.py:323] From /home/darth/tf2/lib/python3.6/site-packages/tensorflow/python/data/util/random_seed.py:58: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## Model

Let's write our helper functions for training our model.

We define our loss function.

In [8]:
def loss_fn(logits, labels):
    softmax_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    return tf.reduce_mean(softmax_loss)

We define a function for a training step.

In [9]:
def train_step(model, loss, features, labels):
    with tf.GradientTape() as tape:
        logits = model(features)
        train_loss = loss(logits=logits, labels=labels)
    gradients = tape.gradient(train_loss, model.trainable_variables)
    model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return train_loss

We define a training function.

In [10]:
def train(model, loss_fn, dataset, epochs):
    epoch_accuracy = []
    epoch_loss = []
    for epoch in range(epochs):
        train_accuracy = []
        train_loss = 0
        for batch_features, batch_labels in dataset:
            batch_features += tf.random.normal(stddev=(1. / (1. + epoch)**0.55), shape=batch_features.shape)
            loss = train_step(model, loss_fn, batch_features, batch_labels)
            
            accuracy = tf.metrics.Accuracy()
            predictions = tf.nn.softmax(model(batch_features))
            accuracy(tf.argmax(predictions, 1), tf.argmax(batch_labels, 1))
            
            train_loss += loss
            train_accuracy.append(accuracy.result())
        
        epoch_loss.append(tf.reduce_mean(train_loss))
        epoch_accuracy.append(tf.reduce_mean(train_accuracy))
        
        if (epoch != 0) and ((epoch + 1) % 50 == 0):
            print('epoch {}/{} : mean loss = {}, mean accuracy = {}'.format(epoch + 1,
                                                                            epochs,
                                                                            tf.reduce_mean(train_loss),
                                                                            tf.reduce_mean(train_accuracy)))
    return epoch_accuracy, epoch_loss

### Logistic-based Model

#### 2-layer Neural Net

We define a 2-layer NN with Logistic activation function.

In [11]:
model = NeuralNet(num_layers=2, neurons=[512, 512], activation=tf.nn.sigmoid, num_classes=10)

We train our model for 300 epochs.

In [12]:
logistic_performance = train(model, loss_fn, dataset, epochs=300)

epoch 50/300 : mean loss = 99.00557708740234, mean accuracy = 0.7573410272598267
epoch 100/300 : mean loss = 94.01123809814453, mean accuracy = 0.8447097539901733
epoch 150/300 : mean loss = 88.87132263183594, mean accuracy = 0.9380893111228943
epoch 200/300 : mean loss = 86.87881469726562, mean accuracy = 0.9682280421257019
epoch 250/300 : mean loss = 86.22598266601562, mean accuracy = 0.9786166548728943
epoch 300/300 : mean loss = 85.86920928955078, mean accuracy = 0.9838699102401733


We produce predictions for the test data.

In [13]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [14]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [15]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

Test accuracy : 0.970300018787384


#### 3-layer Neural Net

We define a 3-layer NN with Logistic activation function.

In [11]:
model = NeuralNet(num_layers=3, neurons=[512, 256, 128], activation=tf.nn.sigmoid, num_classes=10)

We train our model for 300 epochs.

In [12]:
logistic_performance = train(model, loss_fn, dataset, epochs=300)

epoch 50/300 : mean loss = 99.00557708740234, mean accuracy = 0.7573410272598267
epoch 100/300 : mean loss = 94.01123809814453, mean accuracy = 0.8447097539901733
epoch 150/300 : mean loss = 88.87132263183594, mean accuracy = 0.9380893111228943
epoch 200/300 : mean loss = 86.87881469726562, mean accuracy = 0.9682280421257019
epoch 250/300 : mean loss = 86.22598266601562, mean accuracy = 0.9786166548728943
epoch 300/300 : mean loss = 85.86920928955078, mean accuracy = 0.9838699102401733


We produce predictions for the test data.

In [13]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [14]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [15]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

Test accuracy : 0.970300018787384


#### 5-layer Neural Net

We define a 5-layer NN with Logistic activation function.

In [11]:
model = NeuralNet(num_layers=5, neurons=[512, 512, 256, 256, 128], activation=tf.nn.sigmoid, num_classes=10)

We train our model for 300 epochs.

In [12]:
logistic_performance = train(model, loss_fn, dataset, epochs=300)

epoch 50/300 : mean loss = 99.00557708740234, mean accuracy = 0.7573410272598267
epoch 100/300 : mean loss = 94.01123809814453, mean accuracy = 0.8447097539901733
epoch 150/300 : mean loss = 88.87132263183594, mean accuracy = 0.9380893111228943
epoch 200/300 : mean loss = 86.87881469726562, mean accuracy = 0.9682280421257019
epoch 250/300 : mean loss = 86.22598266601562, mean accuracy = 0.9786166548728943
epoch 300/300 : mean loss = 85.86920928955078, mean accuracy = 0.9838699102401733


We produce predictions for the test data.

In [13]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [14]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [15]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

Test accuracy : 0.970300018787384


### TanH-based Model

We define a 2-layer NN with Hyperbolic Tangent activation function.

In [None]:
model = NeuralNet(units=[512, 512], activation=tf.nn.tanh, num_classes=10)

We train our model for 300 epochs.

In [None]:
tanh_performance = train(model, loss_fn, dataset, epochs=300)

We produce predictions for the test data.

In [None]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [None]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [None]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

### ReLU-based Model

We define a 2-layer NN with ReLU activation function.

In [None]:
model = NeuralNet(units=[512, 512], activation=tf.nn.relu, num_classes=10)

We train our model for 300 epochs.

In [None]:
relu_performance = train(model, loss_fn, dataset, epochs=300)

We produce predictions for the test data.

In [None]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [None]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [None]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

### Leaky ReLU-based Model

We define a 2-layer NN with Leaky ReLU activation function.

In [None]:
model = NeuralNet(units=[512, 512], activation=tf.nn.leaky_relu, num_classes=10)

We train our model for 300 epochs.

In [None]:
lrelu_performance = train(model, loss_fn, dataset, epochs=300)

We produce predictions for the test data.

In [None]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [None]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [None]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

### Softplus-based Model

We define a 2-layer NN with Softplus activation function.

In [None]:
model = NeuralNet(units=[512, 512], activation=tf.nn.softplus, num_classes=10)

We train our model for 300 epochs.

In [None]:
softplus_performance = train(model, loss_fn, dataset, epochs=300)

We produce predictions for the test data.

In [None]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [None]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [None]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

### ELU-based Model

We define a 2-layer NN with ELU activation function.

In [None]:
model = NeuralNet(units=[512, 512], activation=tf.nn.elu, num_classes=10)

We train our model for 300 epochs.

In [None]:
elu_performance = train(model, loss_fn, dataset, epochs=300)

We produce predictions for the test data.

In [None]:
predictions = tf.nn.softmax(model(test_features))

We compute the test accuracy.

In [None]:
accuracy = tf.metrics.Accuracy()
test_accuracy = accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

Check for the test accuracy.

In [None]:
print('Test accuracy : {}'.format(test_accuracy.numpy()))

## Training Performance

We lay down the training performance of each model.

In [None]:
plt.figure(figsize=(16, 8))
plt.rcParams.update({'font.size': 14})

plt.subplot(121)
plt.plot(range(len(logistic_performance[0])), logistic_performance[0], label='logistic')
plt.plot(range(len(tanh_performance[0])), tanh_performance[0], label='tanh')
plt.plot(range(len(relu_performance[0])), relu_performance[0], label='relu')
plt.plot(range(len(lrelu_performance[0])), lrelu_performance[0], label='leaky_relu')
plt.plot(range(len(softplus_performance[0])), softplus_performance[0], label='softplus')
plt.plot(range(len(elu_performance[0])), elu_performance[0], label='elu')
plt.xlabel('epochs')
plt.ylabel('train accuracy')
plt.legend(loc='lower right')
plt.grid()

plt.subplot(122)
plt.plot(range(len(logistic_performance[1])), logistic_performance[1], label='logistic')
plt.plot(range(len(tanh_performance[1])), tanh_performance[1], label='tanh')
plt.plot(range(len(relu_performance[1])), relu_performance[1], label='relu')
plt.plot(range(len(lrelu_performance[1])), lrelu_performance[1], label='leaky_relu')
plt.plot(range(len(softplus_performance[1])), softplus_performance[1], label='softplus')
plt.plot(range(len(elu_performance[1])), elu_performance[1], label='elu')
plt.xlabel('epochs')
plt.ylabel('train loss')
plt.legend(loc='upper right')
plt.grid()

plt.savefig('mnist_performance.png', dpi=300)
plt.show()