A neural network example from Coding the Deep Learning Revolution V2 from Adventures in Machine Leanring pg 14 -21

In [19]:
import tensorflow as tf
import numpy as np

In [5]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# LN
# the data made of arrays is imported as a tuple
# The dataset is two categories that can be further subdivided into 2 categories - hence the pair of tuples
# x_train is a list [a1, ... a60000] # a is an image
# ai = [r1, .., r28]
# ri = [pixel1,..., pixel28]

In [9]:
epochs = 10
batch_size = 100
# LN
# 600 batches 
# are weight are going to be updated 6000 times. 600 batches * 10 epochs


# Normalize the input images by dividing by 255 (scales the data so its between 0 and 1). Note: 255 is the max of the x values (my observation))
x_train = x_train / 255.0
x_test = x_test / 255.0
#LN this goes inside all the nested lists and divides the number by 255

In [10]:
# convert x_test to tensor to pass through model (train data will be converted to tensors on the fly)
x_test = tf.Variable(x_test)

In [37]:
# The next step is to setup the weight and bias variables for the three-layer neural network.
# There are always L - 1 number of weights/bias tensors, where L is the number of layers.
# These variables are defined in the code below:

In [11]:
# now declare the weights connecting the input to the hidden layer
W1 = tf.Variable(tf.random.normal([784,300], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random.normal([300]), name='b1')
# and the weights connecting the hidden layer to the output layer
W2 = tf.Variable(tf.random.normal([300,10], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random.normal([10]), name='b2')

# LN
# W1 line creates a matrix 784x300 and fully connects - first layer of the nn is 784 and hidden layer is 300
# b1 is being initalized with 1 dimension (1x300) - bias is fed int each node in the hidden layer 
# . . . . so it needs to be 300 in this case
# W2 goes from the hidden layer of 300 to the output layer of 10
# This configures a fully connected nn

In [39]:
# The standard devatiion is for the normal distribution of random numbers drawn
# The 300 in W1 is the number of nodes in the hidden layer
# The [300,10] tensor in W2 
# The W2 variable is a [300, 10] tensor, connecting the 300-node hidden layer to the 10-node output layer.

In [40]:
# The next step in the code is to create the computations that occur within the nodes of the network.
# If the reader recalls, the computations within the nodes of a neural network are of the following form:
# z = Wx + b
# h = f(z)
# Where W is the weights matrix, x is the layer input vector, b is the bias and f is the activation
# function of the node. These calculations comprise the feed-forward pass of the input data
# through the neural network. To execute these calculations, a dedicated feed forward function
# is created:

In [21]:
def nn_model(x_input, W1, b1, W2, b2):
    # flatten the input image from 28 x 28 to 784
    x_input = tf.reshape(x_input, (x_input.shape[0], -1))
    x = tf.add(tf.matmul(tf.cast(x_input,tf.float32), W1), b1)
    x = tf.nn.relu(x)
    logits = tf.add(tf.matmul(x,W2), b2)
    return logits
# Note that no activation function has been applied to this output layer of nodes (yet). 
# In machine/deep learning, the term “logits” refers to the un-activated output of a layer of nodes.

# LN
# This fucntion produces y-bar, the nn's prediction before the output layer is activated

# x_input = tf.reshape(x_input, (x_input.shape[0], -1)) Explained
# x-input is a list of size 100
# tf.reshape(tensor, shape, name=None) - 
# if you have t = 
# [[1,2,3],
#  [4,5,6]] 
# and do t.reshape[3,2] and get:
# [[1,2],
#  [3,4],
#  [5,6]]
# x_input = tf.reshape(x_input, (x_input.shape[0], -1)) 
#.shape returns a 1d tensor representing the shape of the input ex:[28, 28]
# x_input.shape = [100,28,28] so x_input.shape[0] = 100 #literall gives the shape 100 x 28 x28
# tf.reshape(x_input[100,-1]) - negative one is 784 - negative one always converts the number that fits so
# x input is a list of size 100(100nlists) each one sized 784)
# https://www.tensorflow.org/api_docs/python/tf/reshape
# https://www.tensorflow.org/api_docs/python/tf/shape

# x = tf.add(tf.matmul(tf.cast(x_input,tf.float32), W1), b1) - Explained
# tf.cast(x_input, tf.float32) - x-input comes in as 64 bit this converts to 32 bit (tensorflow only works on 32 bit)
# then matrix multiplication matmult multiples x-input * W1 then adds the result to b1

# x_inputT (dim:100x784) * W (dim: 784 x 300) + b1 (dim: 1 x 300) = dim: 100 x 300 

# x = tf.nn.relu(x) = puts x through reul activation function

# logits = tf.add(tf.matmul(x,W2), b2) 
# x (dim: 100 x 300) * W2 (dim: 300 x 10) + b2 (dim: 1 x 10) = result dim (100 x 10)

# This takes 100 images at once and computing their unactivated outputs at once



In [22]:
def get_batch(x_data, y_data, batch_size):
    idxs = np.random.randint(0, len(y_data), batch_size) #list of len 100
    return x_data[idxs,:,:], y_data[idxs]

# LN
# randint(lower, upper, size) ex: randint(1,6,4) generates 4 random numbers between 1 and 6
# x_data is a Numpy array dim(60,000 x 784)
# len(y_data) = 60,000 and y_data dim(100 x 1)
# idxs is a list of len 100
# x_data[idxs,:,:] - length of list 100 tuples 100 x 2 
# result - creating tuples of each x-values and labels (y_data is the labels)

In [23]:
def loss_fn(logits, labels):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    return cross_entropy

# LN
#cross entropy can be replaced with Euclidean Distance

In [24]:
# setup the optimizer
optimizer = tf.keras.optimizers.Adam()
# The Adam object can take a learning rate as input, but for the present purposes, the default value is used.
# LN
# does the gradient descent
# Adam - adptive learning rate optimizer - uses stochastic gradient descent

In [25]:
# The Training Loop
total_batch = int(len(y_train)/batch_size) #int should not be needed because your data should divide evenly by batch #

for epoch in range(epochs):
    avg_loss = 0
    for i in range(total_batch):
        batch_x, batch_y = get_batch(x_train, y_train, batch_size=batch_size)
        # coverts batch_x, batch_y to tensors
        batch_x = tf.Variable(batch_x)
        batch_y = tf.Variable(batch_y)
        # Create a one hot vector
        batch_y = tf.one_hot(batch_y,10) # categorical data want one to be 1 and all the rest are 0
        #think softmax for labeled data
        with tf.GradientTape() as tape: #code in here is dealing with an output. it creates the gradient surface
            logits = nn_model(batch_x, W1, b1, W2, b2)
            loss = loss_fn(logits, batch_y)
        #the next 2 lines are doing gradient descent
        #take in a loss with all the factors that contribute to the loss and optimize it 
        gradients = tape.gradient(loss, [W1, b1, W2, b2]) #figure out what weights will reduce loss
        optimizer.apply_gradients(zip(gradients, [W1, b1,W2,b2])) #update those weights
        avg_loss += loss / total_batch #how much loss is being contributed by each batch
    test_logits = nn_model(x_test, W1, b1, W2, b2) # unactivated result that is not through softmax
    max_idxs = tf.argmax(test_logits, axis=1) # essentially doing the softmax (softmax is for learning)
    test_acc = np.sum(max_idxs.numpy() == y_test)/len(y_test) #computes the accuracy
    #finds the maximum compares array of nine 0's and a 1 to the tst array of the same and if it matches its added
    print(f"Epoch: {epoch + 1}, loss={avg_loss:.3f}, test set accuracy={test_acc*100:.3f}%")

print("\n Training Complete!")
    

Epoch: 1, loss=2.067, test set accuracy=59.000%
Epoch: 2, loss=1.151, test set accuracy=79.700%
Epoch: 3, loss=0.720, test set accuracy=85.400%
Epoch: 4, loss=0.544, test set accuracy=87.670%
Epoch: 5, loss=0.456, test set accuracy=89.000%
Epoch: 6, loss=0.412, test set accuracy=89.390%
Epoch: 7, loss=0.377, test set accuracy=90.140%
Epoch: 8, loss=0.356, test set accuracy=90.470%
Epoch: 9, loss=0.341, test set accuracy=90.680%
Epoch: 10, loss=0.332, test set accuracy=90.920%

 Training Complete!
