In [31]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author: DeRafael
In this part we construct the fully connect neural network models on  MNIST
"""
# import packages
import tensorflow as tf
import os

In [32]:
# hyperparameter setting
training_epochs = 30
step_size = 1e-2
batch_size = 600

In [33]:
# load data
# MNIST 60000 training data & 10000 testing data  x: (?, 28, 28); y (?,)
# data are stored as numpy format
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
Feature_dimension = 784
N_train = x_train.shape[0] # 60000
N_test = x_test.shape[0] # 10000

In [34]:
# build training dataset and testing dataset
# We reshape the training data to [?, 784] and convert them to tensor
# .batch() sets the batch size and shuffle the data
train_dataset = (
    tf.data.Dataset.from_tensor_slices((tf.reshape(x_train, [-1, Feature_dimension]), y_train))
    .batch(batch_size).shuffle(buffer_size=N_train, seed=0)
)

# we preprocess the data: divide the images by 255 and cast the data format to tf.float32 other than tf.int
# onehot will change the label from a number to a one hot vector
# e.g. 6 -> [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
train_dataset = (
    train_dataset.map(lambda x, y:
                      (tf.divide(tf.cast(x, tf.float32), 255.0),
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

test_dataset = (
    tf.data.Dataset.from_tensor_slices((tf.reshape(x_test, [-1, Feature_dimension]), y_test)).batch(N_test)
)

test_dataset = (
    test_dataset.map(lambda x, y:
                      (tf.divide(tf.cast(x, tf.float32), 255.0),
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

# the data type of the two datasets are MapDatasets

In [35]:
# define neural networks
# we build a class
class neural_netowrk(tf.keras.Model):
    def __init__(self, seed=1):
        super(neural_netowrk, self).__init__()
        # use random seed to make the initialization repeat
        tf.random.set_seed(seed)
        # define fully connected layers
        # function: tf.keras.layers.Dense(number_of_output_nodes, activation=activation_functions, name=layer_name)
        # the input tensor is batch_size x feature(784)
        self.fc1 = tf.keras.layers.Dense(100, activation = 'sigmoid', name='fc1')
        # the output of the first layer is batch_size x 100
        self.fc2 = tf.keras.layers.Dense(10, activation='softmax', name='fc2')
        # the output of the second layer is batch_size x 10 (we have already include the softmax function)

    def forward(self, input):
        '''
        here we define the forward function
        :param input: the input data
        :return: output tensor
        '''
        # For each layer, a bias will also be initialized and add to the output after matrix multiply.
        x = self.fc1(input)
        output = self.fc2(x)
        return output

In [36]:
# tf.losses.binary_crossentropy is the cross entropy function function
def compute_loss(true, pred):
    '''
    :param true: true labels
    :param pred: output tensor
    :return: loss
    '''
    return tf.reduce_mean(tf.reduce_sum(tf.keras.metrics.categorical_crossentropy(true, pred), axis=-1))  # cross entropy

# compute accuracy
# we use the function tf.keras.metrics.categorical_accuracy() to compute the accuracy
def compute_accuracy(true, pred):
    '''
    :param true: true labels
    :param pred: output tensor
    :return: accuracy
    '''
    return tf.reduce_mean(tf.keras.metrics.categorical_accuracy(true, pred))

In [37]:
# we define a function:
def check(model):
    '''
    :param model: the neural network
    :return: training loss; training accuracy; testing loss; testing accuracy
    '''
    loss = []
    acc = []
    for x, y in train_dataset:
        output = model.forward(x)
        loss.append(compute_loss(y, output))
        acc.append(compute_accuracy(y, output))
    train_loss = sum(loss) / len(loss)
    train_acc = sum(acc) / len(acc)
    # we can use zip function to seperate data and labels
    test_x, test_y = zip(*test_dataset)
    test_x = test_x[0]
    test_y = test_y[0]

    output = model.forward(test_x)
    test_loss = compute_loss(test_y, output)
    test_acc = compute_accuracy(test_y, output)
    # tesnor.numpy() can convert the tensor to numpy format
    return train_loss.numpy(), train_acc.numpy(), test_loss.numpy(), test_acc.numpy()

In [38]:
def training():
    epoch = 0
    # define the model
    model = neural_netowrk()
    # define the optimizer
    # here we use the adam optimizer
    optimizer = tf.keras.optimizers.SGD(learning_rate=step_size)
    # set the loop conditions
    while epoch < training_epochs:
        epoch += 1
        # training
        for x, y in train_dataset: # go through all the training data
            # we come to the most important part, using tf.GradientTape
            # tensorflow will compute the gradient of the parameter by tf.GradientTape
            with tf.GradientTape() as tape:
                # go forward we get the output
                output = model.forward(x)
                # compute training loss
                loss = compute_loss(y, output)
            # ask for the gradient
            # tape.gradient(target=loss, source=parameter)
            grads = tape.gradient(target=loss, sources=model.trainable_variables) # the type of the gradients is list
            # optimize parameter
            # zip function is needed to align the gradients and parameters
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
        # every 5 epochs, we compute the training loss on the whole training data and the testing loss
        if epoch % 5 == 1:
            train_loss, train_acc, test_loss, test_acc = check(model)
            # print the results
            print('training loss: %.3f, training accuracy: %.3f, testing loss: %.3f, testing accuracy: %.3f'
                  %(train_loss/batch_size, float(train_acc), test_loss / N_test, float(test_acc)))

In [39]:
training()

training loss: 0.330, training accuracy: 0.904, testing loss: 0.316, testing accuracy: 0.910
training loss: 0.148, training accuracy: 0.955, testing loss: 0.160, testing accuracy: 0.950
training loss: 0.095, training accuracy: 0.973, testing loss: 0.120, testing accuracy: 0.963
training loss: 0.084, training accuracy: 0.975, testing loss: 0.121, testing accuracy: 0.964
training loss: 0.064, training accuracy: 0.983, testing loss: 0.111, testing accuracy: 0.967
training loss: 0.056, training accuracy: 0.985, testing loss: 0.107, testing accuracy: 0.969
