#Classifying Handwritten Digits from the MNIST Dataset


> Using Keras



Load TensorFlow

In [1]:
import tensorflow as tf
print("TensorFlow version: ", tf.__version__)

TensorFlow version:  2.18.0


Load the MNIST dataset

In [2]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


### Define the model

In [4]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [18]:
model.summary()

In [5]:
predictions = model(x_train[:1]).numpy()
predictions

array([[0.06199396, 0.05302403, 0.06337319, 0.13820148, 0.11010821,
        0.0645375 , 0.15385468, 0.14305127, 0.06090349, 0.15095218]],
      dtype=float32)

In [6]:
tf.nn.softmax(predictions).numpy()

array([[0.09619025, 0.09533129, 0.09632301, 0.10380721, 0.10093152,
        0.09643522, 0.10544492, 0.10431189, 0.09608541, 0.10513931]],
      dtype=float32)

Define a loss function

In [7]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [8]:
loss_fn(y_train[:1], predictions).numpy()

np.float32(2.3388836)

#### Compile the model.
Using the above loss function, Adam optimizer, measuring accuracy

In [9]:
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

### Train and evaluate the model.

In [10]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10


  output, from_logits = _get_logits(


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8611 - loss: 0.4796
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9533 - loss: 0.1555
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9665 - loss: 0.1109
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9740 - loss: 0.0858
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9776 - loss: 0.0718
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9783 - loss: 0.0674
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9815 - loss: 0.0552
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9826 - loss: 0.0527
Epoch 9/10
[1m1875/1875[0m [32m━

<keras.src.callbacks.history.History at 0x78da02c8c890>

### Model Performance

In [11]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - 5ms/step - accuracy: 0.9808 - loss: 0.0710


[0.0709957554936409, 0.9807999730110168]

#### The overall model accuracy is **98.08%**

#### Creating a probability model

Wrap the previously trained model and attach a softmax

In [12]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

In [13]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[0.08533683, 0.08533683, 0.08533712, 0.08533707, 0.08533683,
        0.08533683, 0.08533683, 0.23196809, 0.08533683, 0.08533683],
       [0.08533676, 0.08533683, 0.23196909, 0.08533677, 0.08533676,
        0.08533676, 0.08533676, 0.08533676, 0.08533676, 0.08533676],
       [0.08533935, 0.23192811, 0.08534653, 0.08533935, 0.0853394 ,
        0.08533945, 0.0853396 , 0.08534599, 0.08534284, 0.08533935],
       [0.23196803, 0.08533683, 0.08533697, 0.08533683, 0.08533683,
        0.08533683, 0.085337  , 0.08533683, 0.08533683, 0.08533707],
       [0.08536183, 0.08536182, 0.08536226, 0.08536182, 0.23157203,
        0.08536182, 0.08536207, 0.08536383, 0.08536182, 0.08553064]],
      dtype=float32)>

Train the new model.

In [14]:
probability_model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [17]:
probability_model.summary()

In [15]:
probability_model.fit(x_train, y_train, epochs=10)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9861 - loss: 1.4792
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9853 - loss: 1.4789
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9871 - loss: 1.4770
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9869 - loss: 1.4766
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9875 - loss: 1.4753
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9873 - loss: 1.4756
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9889 - loss: 1.4742
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9884 - loss: 1.4740
Epoch 9/10
[1m1875/1875

<keras.src.callbacks.history.History at 0x78d93cf610d0>

Evaluate the performance.

In [16]:
probability_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 2s - 5ms/step - accuracy: 0.9824 - loss: 1.4795


[1.479454517364502, 0.9824000000953674]

This model resulted in an accuracy of **98.24%** .

---
So, adding the extra softmax layer to the model resulted in a slight increase in accuracy. The first model, however, was more effective in minimizing loss.