### Codes are inspired from: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/tensorflow_v2/notebooks/3_NeuralNetworks/convolutional_network.ipynb

In [1]:
import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as  np

In [2]:


# MNIST dataset parameters.
num_classes = 10 # total classes (0-9 digits).

# Training parameters.
learning_rate = 0.01
training_steps = 200
batch_size = 128
display_step = 10

# Network parameters.
conv1_filters = 32 # number of filters for 1st conv layer.
conv2_filters = 64 # number of filters for 2nd conv layer.
fc1_units = 1024 # number of neurons for 1st fully-connected layer.



In [3]:
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.


In [4]:
# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [5]:
class CNN(Model):
    def __init__(self):
        super(CNN, self).__init__()
        
        # Adding one convolutional layer
        self.conv1 = layers.Conv2D(32, kernel_size = 3, activation = tf.nn.relu)
        self.maxpool1 = layers.MaxPool2D(2, strides=2)
        
         # Flatten the data to a 1-D vector for the fully connected layer.
        self.flatten = layers.Flatten()

        # Fully connected layer.
        self.fc1 = layers.Dense(1024)
        # Apply Dropout (if is_training is False, dropout is not applied).
        self.dropout = layers.Dropout(rate=0.5)

        # Output layer, class prediction.
        self.out = layers.Dense(num_classes)
    
    def call(self, x, is_training=False):
        x = tf.reshape(x, [-1, 28, 28, 1])
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout(x, training=is_training)
        x = self.out(x)
        if not is_training:
            # tf cross entropy expect logits without softmax, so only
            # apply softmax when not training.
            x = tf.nn.softmax(x)
        return x

In [6]:

def cross_entropy_loss(x, y):
    y = tf.cast(y, tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
    return tf.reduce_mean(loss)

# Accuracy metric.
def accuracy(y_pred, y_true):
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)



In [7]:
# Training process. 
def train(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        # Forward pass.
        pred = cnn(x, is_training=True)
        # Compute loss.
        loss = cross_entropy_loss(pred, y)
        
    # Variables to update, i.e. trainable variables.
    trainable_variables = cnn.trainable_variables

    # Compute gradients.
    gradients = g.gradient(loss, trainable_variables)
    
    # Update W and b following gradients.
    optimizer.apply_gradients(zip(gradients, trainable_variables))

In [8]:
cnn = CNN()
# Stochastic gradient descent optimizer.
optimizer = tf.optimizers.SGD(learning_rate)
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    train(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = cnn(batch_x)
        loss = cross_entropy_loss(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 10, loss: 2.296375, accuracy: 0.289062
step: 20, loss: 2.289256, accuracy: 0.421875
step: 30, loss: 2.278344, accuracy: 0.570312
step: 40, loss: 2.259719, accuracy: 0.757812
step: 50, loss: 2.251052, accuracy: 0.695312
step: 60, loss: 2.230496, accuracy: 0.671875
step: 70, loss: 2.174964, accuracy: 0.734375
step: 80, loss: 2.161464, accuracy: 0.734375
step: 90, loss: 2.063108, accuracy: 0.835938
step: 100, loss: 2.048398, accuracy: 0.765625
step: 110, loss: 1.978615, accuracy: 0.804688
step: 120, loss: 1.974553, accuracy: 0.781250
step: 130, loss: 1.951821, accuracy: 0.773438
step: 140, loss: 1.933967, accuracy: 0.765625
step: 150, loss: 1.855328, accuracy: 0.851562
step: 160, loss: 1.836396, accuracy: 0.789062
step: 170, loss: 1.840333, accuracy: 0.789062
step: 180, loss: 1.814095, accuracy: 0.843750
step: 190, loss: 1.799976, accuracy: 0.820312
step: 200, loss: 1.805807, accuracy: 0.765625


In [9]:
pred = cnn(x_test)
print(f"Test Accuracy:  {accuracy(pred, y_test)}")

Test Accuracy:  0.8698999881744385


In [10]:
cnn = CNN()
# Adam optimizer.
optimizer = tf.optimizers.Adam(learning_rate)
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(200), 1):
    # Run the optimization to update W and b values.
    train(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = cnn(batch_x)
        loss = cross_entropy_loss(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 10, loss: 2.110892, accuracy: 0.687500
step: 20, loss: 1.588778, accuracy: 0.898438
step: 30, loss: 1.567721, accuracy: 0.921875
step: 40, loss: 1.572155, accuracy: 0.921875
step: 50, loss: 1.545868, accuracy: 0.953125
step: 60, loss: 1.543453, accuracy: 0.953125
step: 70, loss: 1.536548, accuracy: 0.929688
step: 80, loss: 1.510620, accuracy: 0.984375
step: 90, loss: 1.550764, accuracy: 0.929688
step: 100, loss: 1.499901, accuracy: 0.976562
step: 110, loss: 1.515895, accuracy: 0.968750
step: 120, loss: 1.514361, accuracy: 0.968750
step: 130, loss: 1.519545, accuracy: 0.960938
step: 140, loss: 1.522086, accuracy: 0.960938
step: 150, loss: 1.496347, accuracy: 0.984375
step: 160, loss: 1.506718, accuracy: 0.960938
step: 170, loss: 1.517227, accuracy: 0.960938
step: 180, loss: 1.533745, accuracy: 0.945312
step: 190, loss: 1.507281, accuracy: 0.960938
step: 200, loss: 1.509618, accuracy: 0.976562


In [12]:
pred = cnn(x_test)
print(f"Test Accuracy:  {accuracy(pred, y_test)}")

Test Accuracy:  0.9635000228881836
