A neural network example from Coding the Deep Learning Revolution V2 from Adventures in Machine Leanring pg 14 -21

In [33]:
import tensorflow as tf
import numpy as np

In [34]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

One final thing that needs to be considered is how to extract the training data in batches of
samples. The function below can handle this:

In [35]:
def get_batch(x_data, y_data, batch_size):
    idxs = np.random.randint(0, len(y_data), batch_size)
    return x_data[idxs,:,:], y_data[idxs]

In [36]:
# Python optimisation variables
epochs = 10
batch_size = 100

# Normalize the input images by dividing by 255 (scales the data so its between 0 and 1).
# Note: 255 is the max of the x values (my observation))
x_train = x_train / 255
x_test = x_test / 255

# convert x_test to tensor to pass through model (train data will be converted to tensors on the fly)
x_test = tf.Variable(x_test)

In [37]:
# The next step is to setup the weight and bias variables for the three-layer neural network.
# There are always L - 1 number of weights/bias tensors, where L is the number of layers.
# These variables are defined in the code below:

In [38]:
# now declare the weights connecting the input to the hidden layer
W1 = tf.Variable(tf.random.normal([784,300], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random.normal([300]), name='b1')
# and the weights connecting the hidden layer to the output layer
W2 = tf.Variable(tf.random.normal([300,10], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random.normal([10]), name='b2')


In [39]:
# The standard devatiion is for the normal distribution of random numbers drawn
# The 300 in W1 is the number of nodes in the hidden layer
# The [300,10] tensor in W2 
# The W2 variable is a [300, 10] tensor, connecting the 300-node hidden layer to the 10-node output layer.

In [40]:
# The next step in the code is to create the computations that occur within the nodes of the network.
# If the reader recalls, the computations within the nodes of a neural network are of the following form:
# z = Wx + b
# h = f(z)
# Where W is the weights matrix, x is the layer input vector, b is the bias and f is the activation
# function of the node. These calculations comprise the feed-forward pass of the input data
# through the neural network. To execute these calculations, a dedicated feed forward function
# is created:

In [41]:
def nn_model(x_input, W1, b1, W2, b2):
    # flatten the input image from 28 x 28 to 784
    x_input = tf.reshape(x_input, (x_input.shape[0], -1))
    x = tf.add(tf.matmul(tf.cast(x_input,tf.float32), W1), b1)
    x = tf.nn.relu(x)
    logits = tf.add(tf.matmul(x,W2), b2)
    return logits
# Note that no activation function has been applied to this output layer of nodes (yet). 
# In machine/deep learning, the term “logits” refers to the un-activated output of a layer of nodes.

In [42]:
def loss_fn(logits, labels):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    return cross_entropy

In [43]:
# setup the optimizer
optimizer = tf.keras.optimizers.Adam()
# The Adam object can take a learning rate as input, but for the present purposes, the default value is used.

In [45]:
# The Training Loop
total_batch = int(len(y_train)/batch_size)

for epoch in range(epochs):
    avg_loss = 0
    for i in range(total_batch):
        batch_x, batch_y = get_batch(x_train, y_train, batch_size=batch_size)
        # create tensors
        batch_x = tf.Variable(batch_x)
        batch_y = tf.Variable(batch_y)
        # Create a one hot vector
        batch_y = tf.one_hot(batch_y,10)
        with tf.GradientTape() as tape:
            logits = nn_model(batch_x, W1, b1, W2, b2)
            loss = loss_fn(logits, batch_y)
        gradients = tape.gradient(loss, [W1, b1, W2, b2])
        optimizer.apply_gradients(zip(gradients, [W1, b1,W2,b2]))
        avg_loss += loss / total_batch
    test_logits = nn_model(x_test, W1, b1, W2, b2)
    max_idxs = tf.argmax(test_logits, axis=1)
    test_acc = np.sum(max_idxs.numpy() == y_test)/len(y_test)
    print(f"Epoch: {epoch + 1}, loss={avg_loss:.3f}, test set accuracy={test_acc*100:.3f}%")

print("\n Training Complete!")
    

Epoch: 1, loss=0.363, test set accuracy=94.460%
Epoch: 2, loss=0.147, test set accuracy=96.210%
Epoch: 3, loss=0.102, test set accuracy=96.950%
Epoch: 4, loss=0.076, test set accuracy=97.270%
Epoch: 5, loss=0.058, test set accuracy=97.580%
Epoch: 6, loss=0.047, test set accuracy=97.580%
Epoch: 7, loss=0.038, test set accuracy=97.760%
Epoch: 8, loss=0.032, test set accuracy=97.810%
Epoch: 9, loss=0.024, test set accuracy=97.950%
Epoch: 10, loss=0.020, test set accuracy=97.960%

 Training Complete!
