# MNIST Classification

### Import the relevant packages

In [62]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

In [63]:
mnist_dataset, mnist_info = tfds.load(name='mnist',with_info=True,as_supervised=True)

In [64]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
# make 10% of train data a validation data
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

# scale all the values for numerical stability

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

# shuffle data
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

# exatrct validation data
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
# shuffle remaining train data
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_data.batch(num_test_samples)

# divide validation data into 2 
validation_inputs, validation_targets = next(iter(validation_data))

2025-07-26 12:03:24.825109: W tensorflow/core/kernels/data/cache_dataset_ops.cc:916] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


# Model

### Ouline the Model

In [68]:
input_size = 784
output_size = 10
hidden_layer_size = 200

model = tf.keras.Sequential([
                            tf.keras.Input(shape=(28, 28, 1)),  # ✅ define input shape here
                            tf.keras.layers.Flatten(),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax')
                            ])

### Choose the Optimizer and the Loss function

In [69]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
# sparse_categorical_crossentropy as targets are in for of integers
model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training

In [70]:
NUM_EPOCHS = 50
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_data, epochs = NUM_EPOCHS, validation_data = (validation_inputs,validation_targets), verbose = 2, callbacks=early_stopping)

Epoch 1/50
540/540 - 2s - 3ms/step - accuracy: 0.9028 - loss: 0.3556 - val_accuracy: 0.9477 - val_loss: 0.1760
Epoch 2/50
540/540 - 1s - 3ms/step - accuracy: 0.9595 - loss: 0.1372 - val_accuracy: 0.9660 - val_loss: 0.1192
Epoch 3/50
540/540 - 1s - 3ms/step - accuracy: 0.9724 - loss: 0.0949 - val_accuracy: 0.9742 - val_loss: 0.0896
Epoch 4/50
540/540 - 1s - 2ms/step - accuracy: 0.9788 - loss: 0.0717 - val_accuracy: 0.9760 - val_loss: 0.0784
Epoch 5/50
540/540 - 1s - 2ms/step - accuracy: 0.9830 - loss: 0.0563 - val_accuracy: 0.9835 - val_loss: 0.0579
Epoch 6/50
540/540 - 1s - 3ms/step - accuracy: 0.9869 - loss: 0.0439 - val_accuracy: 0.9838 - val_loss: 0.0555
Epoch 7/50
540/540 - 1s - 3ms/step - accuracy: 0.9894 - loss: 0.0361 - val_accuracy: 0.9858 - val_loss: 0.0422
Epoch 8/50
540/540 - 1s - 3ms/step - accuracy: 0.9915 - loss: 0.0287 - val_accuracy: 0.9887 - val_loss: 0.0346
Epoch 9/50
540/540 - 1s - 3ms/step - accuracy: 0.9923 - loss: 0.0244 - val_accuracy: 0.9907 - val_loss: 0.0274
E

<keras.src.callbacks.history.History at 0x35a1842f0>

### Test Model

In [61]:
test_loss, test_accuracy = model.evaluate(test_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.9763 - loss: 0.0742
