In [1]:
# Import os
import os
# Lower verbose setting
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Import modules
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

## Index
[Loading Data](#Loading-Data)

[Sequential API](#Sequential-API)

[Functional API](#Functional-API)

[Improving the Model](#Improving-the-Model)

## Loading Data

In [2]:
# Loading data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape)
print(y_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)
(60000,)


In [4]:
# Reshaping data
X_train = X_train.reshape(-1, 28*28).astype('float32') / 255.0
X_test = X_test.reshape(-1, 28*28).astype('float32') / 255.0

## Sequential API

In [10]:
# Sequential API (Very convenient, not very flexible)
model = keras.models.Sequential(
    [
        keras.Input(shape=(28 * 28)),
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10)
    ]
)

In [11]:
# Model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_4 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_5 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Model compile
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy'],
)

In [13]:
# Model fit
model.fit(X_train, y_train, batch_size=32, epochs=5, verbose=2)

Train on 60000 samples
Epoch 1/5
60000/60000 - 4s - loss: 0.5858 - accuracy: 0.8239
Epoch 2/5
60000/60000 - 4s - loss: 0.2953 - accuracy: 0.9130
Epoch 3/5
60000/60000 - 3s - loss: 0.2268 - accuracy: 0.9324
Epoch 4/5
60000/60000 - 3s - loss: 0.1781 - accuracy: 0.9469
Epoch 5/5
60000/60000 - 3s - loss: 0.1430 - accuracy: 0.9567


<tensorflow.python.keras.callbacks.History at 0x24000d6d088>

In [14]:
# Model evaluate
model.evaluate(X_test, y_test, batch_size=32, verbose=2)

10000/10000 - 1s - loss: 0.1342 - accuracy: 0.9582


[0.1342455362766981, 0.9582]

## Functional API


In [24]:
# Functional API (A bit more flexible)
inputs = keras.Input(shape=784)
hidden1 = layers.Dense(512, activation='relu', name='hidden1')(inputs)
hidden2 = layers.Dense(256, activation='relu', name='hidden2')(hidden1)
outputs = layers.Dense(10, activation='softmax')(hidden2)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy'],
)

In [25]:
# Model fit
model.fit(X_train, y_train, batch_size=32, epochs=5, verbose=2)

Train on 60000 samples
Epoch 1/5
60000/60000 - 4s - loss: 0.5831 - accuracy: 0.8254
Epoch 2/5
60000/60000 - 3s - loss: 0.2868 - accuracy: 0.9154
Epoch 3/5
60000/60000 - 4s - loss: 0.2201 - accuracy: 0.9348
Epoch 4/5
60000/60000 - 3s - loss: 0.1725 - accuracy: 0.9484
Epoch 5/5
60000/60000 - 3s - loss: 0.1393 - accuracy: 0.9579


<tensorflow.python.keras.callbacks.History at 0x23f3204e5c8>

In [26]:
# Model evaluate
model.evaluate(X_test, y_test, batch_size=32, verbose=2)

10000/10000 - 1s - loss: 0.1332 - accuracy: 0.9580


[0.133151106640324, 0.958]

In [27]:
model.summary()


Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
hidden1 (Dense)              (None, 512)               401920    
_________________________________________________________________
hidden2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_15 (Dense)             (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


## Improving the Model
This section tweaks the Functional API model in order to achieve a higher accuracy on the MNIST
dataset

In [41]:
# Optimizing
inputs = keras.Input(shape=784)
hidden1 = layers.Dense(512, activation='relu', name='hidden1')(inputs)
hidden2 = layers.Dense(256, activation='relu', name='hidden2')(hidden1)
hidden3 = layers.Dense(256, activation='sigmoid', name='hidden3')(hidden2)
outputs = layers.Dense(10, activation='softmax')(hidden3)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy'],
)

# Model fit
model.fit(X_train, y_train, batch_size=24, epochs=20, verbose=2)

Train on 60000 samples
Epoch 1/20
60000/60000 - 5s - loss: 0.6315 - accuracy: 0.7919
Epoch 2/20
60000/60000 - 5s - loss: 0.2713 - accuracy: 0.9171
Epoch 3/20
60000/60000 - 5s - loss: 0.1939 - accuracy: 0.9395
Epoch 4/20
60000/60000 - 5s - loss: 0.1482 - accuracy: 0.9542
Epoch 5/20
60000/60000 - 5s - loss: 0.1207 - accuracy: 0.9633
Epoch 6/20
60000/60000 - 5s - loss: 0.1009 - accuracy: 0.9686
Epoch 7/20
60000/60000 - 5s - loss: 0.0864 - accuracy: 0.9740
Epoch 8/20
60000/60000 - 5s - loss: 0.0759 - accuracy: 0.9767
Epoch 9/20
60000/60000 - 5s - loss: 0.0659 - accuracy: 0.9793
Epoch 10/20
60000/60000 - 5s - loss: 0.0600 - accuracy: 0.9807
Epoch 11/20
60000/60000 - 5s - loss: 0.0525 - accuracy: 0.9831
Epoch 12/20
60000/60000 - 5s - loss: 0.0474 - accuracy: 0.9852
Epoch 13/20
60000/60000 - 5s - loss: 0.0424 - accuracy: 0.9856
Epoch 14/20
60000/60000 - 5s - loss: 0.0394 - accuracy: 0.9875
Epoch 15/20
60000/60000 - 5s - loss: 0.0350 - accuracy: 0.9885
Epoch 16/20
60000/60000 - 5s - loss: 0.03

<tensorflow.python.keras.callbacks.History at 0x24016bbb5c8>

In [42]:
# Model evaluate
model.evaluate(X_test, y_test, batch_size=24, verbose=2)

10000/10000 - 1s - loss: 0.0797 - accuracy: 0.9811


[0.07971823363120566, 0.9811]