Deep Learning using Rectified Linear Units
===

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__version__ = '1.0.0'
__author__ = 'Abien Fred Agarap'

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [3]:
tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices('GPU')[0], True)

In [4]:
np.random.seed(42)
tf.random.set_seed(42)

In [5]:
class NeuralNet(tf.keras.Model):
    def __init__(self, **kwargs):
        super(NeuralNet, self).__init__()
        self.hidden_layer_1 = tf.keras.layers.Dense(units=kwargs['units'][0], activation=kwargs['activation'])
        self.hidden_layer_2 = tf.keras.layers.Dense(units=kwargs['units'][1], activation=kwargs['activation'])
        self.output_layer = tf.keras.layers.Dense(units=kwargs['num_classes'])
    
    @tf.function
    def call(self, features):
        activation = self.hidden_layer_1(features)
        activation = self.hidden_layer_2(activation)
        output = self.output_layer(activation)
        return output

In [6]:
def loss_fn(logits, labels):
    softmax_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    return tf.reduce_mean(softmax_loss)

In [7]:
def train_step(model, loss, optimizer, features, labels):
    with tf.GradientTape() as tape:
        logits = model(features)
        train_loss = loss(logits=logits, labels=labels)
    gradients = tape.gradient(train_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return train_loss

In [37]:
def train(model, loss_fn, optimizer, dataset, epochs):
    for epoch in range(epochs):
        epoch_loss = []
        train_loss = 0
        for batch_features, batch_labels in dataset:
            batch_features += tf.random.normal(stddev=(1. / (1. + epoch)**0.55), shape=batch_features.shape)
            loss = train_step(model, loss_fn, optimizer, batch_features, batch_labels)
            train_loss += loss
        epoch_loss.append(tf.reduce_mean(train_loss))
        if (epoch != 0) and ((epoch + 1) % 10 == 0):
            print('epoch {}/{} : mean loss = {}'.format(epoch + 1, epochs, tf.reduce_mean(train_loss)))

In [38]:
model = NeuralNet(units=[512, 512], activation=tf.nn.relu, num_classes=10)

In [11]:
(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.mnist.load_data()

In [12]:
train_features = train_features.astype('float32').reshape(-1, 784) / 255.
test_features = test_features.astype('float32').reshape(-1, 784) / 255.

In [13]:
train_labels = tf.one_hot(train_labels, 10)
test_labels = tf.one_hot(test_labels, 10)

In [14]:
dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
dataset = dataset.prefetch(1024).shuffle(1024).batch(512, True)

W0721 18:26:34.324304 140103405123392 deprecation.py:323] From /home/darth/tf2/lib/python3.6/site-packages/tensorflow/python/data/util/random_seed.py:58: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [15]:
optimizer = tf.optimizers.Adam(learning_rate=3e-4)

In [39]:
train(model, loss_fn, optimizer, dataset, epochs=100)

epoch 10/100 : mean loss = 12.640436172485352
epoch 20/100 : mean loss = 4.981122016906738
epoch 30/100 : mean loss = 2.0374670028686523
epoch 40/100 : mean loss = 0.8174115419387817
epoch 50/100 : mean loss = 0.37886688113212585
epoch 60/100 : mean loss = 0.19943363964557648
epoch 70/100 : mean loss = 0.09841873496770859
epoch 80/100 : mean loss = 0.051297999918460846
epoch 90/100 : mean loss = 0.08284129947423935
epoch 100/100 : mean loss = 0.07814887166023254


In [40]:
predictions = tf.nn.softmax(model(test_features))

In [41]:
accuracy = tf.metrics.Accuracy()

In [42]:
accuracy(tf.argmax(predictions, 1), tf.argmax(test_labels, 1))

<tf.Tensor: id=1932214, shape=(), dtype=float32, numpy=0.9875>