Classifying MNIST with Tensorflow

In [1]:
#import required libraries
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import mnist
import numpy as np

print(tf.__version__)

2.16.1


Importing and Preparing Data

In [2]:
# Getting train and test sets
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [3]:
# Check max pixel value for scaling
max_value = np.max(train_images[0])
print(max_value)

# Scale Images
train_images = train_images/max_value
test_images = test_images/max_value

255


# Training and Modeling
we will create a CNN architecture for this use-case. As we have image data CNN works better as convolving layers reduces the high dimension of images without loosing its information.

In [9]:
# Creating CNN architecture
def create_model(n_conv=1, n_filters=None, kernal_size=None, n_dense=None, dense=None):
    layers = [tf.keras.layers.InputLayer(shape=INPUT_SHAPE)]
    if n_conv==1:
         layers.extend([tf.keras.layers.InputLayer(input_shape=INPUT_SHAPE),
                                             tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='valid', input_shape=INPUT_SHAPE, name='conv1'),
                                             tf.keras.layers.MaxPool2D((2, 2), strides=(2,2), padding="valid",name="pool1"),
                                             tf.keras.layers.Flatten()])
    else:
        for i in range(n_conv-1):
            layers.extend([tf.keras.layers.Conv2D(n_filters[i], kernal_size[i], activation='relu', padding='valid', input_shape=INPUT_SHAPE, name=f'conv{i}'),
                           tf.keras.layers.MaxPool2D((2, 2), strides=(2,2), padding="valid",name=f"pool{i}"),])
            
        layers.append(tf.keras.layers.Flatten())
        
        if n_dense > 1:
            for i in range(n_dense-2):
                layers.append([tf.keras.layers.Dense(128, activation=tf.nn.relu),
                               tf.keras.layers.Dropout(0.2)])
    
    layers.append(tf.keras.layers.Dense(dense, activation=tf.nn.softmax))
        
    model = tf.keras.models.Sequential(layers)                                        
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

Define the callbacks to avoid over-fitting. Callback will stop the training when there is no improvement in the loss for 3 consecutive epocs.

In [6]:
from tensorflow.keras.callbacks import EarlyStopping
# monitor="val_loss" : This is default value
callbacks = [EarlyStopping(patience=2, mode='min')]

In [7]:
def get_k_fold_score(k_fold, model):
    fold_no = 1
    history_list = []
    
    for train_idx, test_idx in k_fold.split(train_images, train_labels):
        history = model.fit(train_images[train_idx], train_labels[train_idx], validation_data=(train_images[test_idx], train_labels[test_idx]), callbacks=callbacks, epochs=1)
        print(f'Fold {fold_no} : history.history')
        fold_no+=1
        history_list.append(history)
    return history_list

In [28]:
from sklearn.model_selection import KFold

num_folds = 10
n_epocs = 10

INPUT_SHAPE = (28, 28, 1)
n_conv = 3
n_dense = 1
n_classes = 10

filters = [256, 128, 128]
kernal_size = n_conv * [(3, 3)]

assert len(filters) == n_conv
assert len(kernal_size) == n_conv


cnn_model = create_model(n_conv, filters, kernal_size, n_dense, n_classes)
print(cnn_model.summary())

scores = get_k_fold_score(KFold(n_splits=num_folds, shuffle=True), cnn_model)

None
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 63ms/step - accuracy: 0.9185 - loss: 0.2754 - val_accuracy: 0.9802 - val_loss: 0.0594
Fold 1 : history.history
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 56ms/step - accuracy: 0.9867 - loss: 0.0446 - val_accuracy: 0.9908 - val_loss: 0.0320
Fold 2 : history.history
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 55ms/step - accuracy: 0.9902 - loss: 0.0318 - val_accuracy: 0.9862 - val_loss: 0.0455
Fold 3 : history.history
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 56ms/step - accuracy: 0.9932 - loss: 0.0215 - val_accuracy: 0.9917 - val_loss: 0.0262
Fold 4 : history.history
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 58ms/step - accuracy: 0.9948 - loss: 0.0185 - val_accuracy: 0.9937 - val_loss: 0.0196
Fold 5 : history.history
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 66ms/step - accuracy: 0.

In [29]:
from matplotlib import pyplot as plt

accuracy = []
loss = []
val_loss = []
val_accuracy = []

for i in range(num_folds):
    accuracy.append(scores[i].history['accuracy'])
    loss.append(scores[i].history['loss'])
    val_loss.append(scores[i].history['val_loss'])
    val_accuracy.append(scores[i].history['val_accuracy'])


print(np.mean(accuracy))
print(np.mean(val_accuracy))

predictions = cnn_model.predict(test_images)
print(cnn_model.compute_metrics(test_images, test_labels, predictions))

0.9910055518150329
0.9924333333969116
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step
{'accuracy': 0.9935625195503235, 'loss': 0.005633053369820118}
