In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import Model

### Load mnist dataset

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(60000, 28, 28) (10000, 28, 28)
(60000,) (10000,)


In [3]:
# add channel dimension
x_train = x_train[..., tf.newaxis].astype('float32')
x_test  = x_test[..., tf.newaxis].astype('float32')

### Tensorflow input

In [4]:
BUFFER_SIZE = 10000
BATCH_SIZE = 32
# train dataset
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))\
.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
# test dataset
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))\
.batch(BATCH_SIZE)

### Convolutional neural network for classification

In [5]:
class mnist_cnn(Model):
    
    def __init__(self):
        super(mnist_cnn, self).__init__()
        self.conv1 = Conv2D(32, 3, 1, activation='relu')
        self.maxp = MaxPooling2D(2, 2)
        self.conv2 = Conv2D(64, 3, 1, activation='relu')
        self.flat = Flatten()
        self.dense1 = Dense(64, activation='relu')
        self.dense2 = Dense(10)
    
    def call(self, x):
        x = self.conv1(x)
        x = self.maxp(x)
        x = self.conv2(x)
        x = self.flat(x)
        x = self.dense1(x)
        return self.dense2(x)

In [6]:
model = mnist_cnn()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

### Loss, accuracy metrics

In [7]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

### Train and test function

In [8]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return train_loss(loss), train_accuracy(labels, predictions)

In [9]:
@tf.function
def test_step(images, labels):
    predictions = model(images, training=False)
    loss = loss_object(labels, predictions)
    return test_loss(loss), test_accuracy(labels, predictions)

### Training model and monitor test loss

In [10]:
EPOCHS =5

patience = 1
val_loss_monitor = []
for epoch in range(EPOCHS):
    
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()
    
    for image, label in train_ds:
        los, acc = train_step(image, label)
    for image, label in test_ds:
        val_los, val_acc = test_step(image, label)
    
    template = 'Epoch {}, Loss {:.4f}, Acc {:.2f}, Val_loss {:.4f}, Val_acc {:.2f}'
    print(template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result()*100,
                          test_loss.result(),
                          test_accuracy.result()*100))
    
    val_loss_monitor.append(test_loss.result())
    if len(val_loss_monitor) > 1:
        if val_loss_monitor[-1] > val_loss_monitor[-2]:
            print('Early Stopped!!!')
            break

Epoch 1, Loss 0.1181, Acc 96.46, Val_loss 0.0419, Val_acc 98.66
Epoch 2, Loss 0.0388, Acc 98.77, Val_loss 0.0390, Val_acc 98.65
Epoch 3, Loss 0.0259, Acc 99.18, Val_loss 0.0317, Val_acc 99.00
Epoch 4, Loss 0.0173, Acc 99.42, Val_loss 0.0340, Val_acc 98.89
Early Stopped!!!
