### Importing necessary libraries

In [1]:
import numpy as np
import random
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as k

from sklearn.model_selection import train_test_split

In [2]:
#### Loading the mnist datasets ####
(X, y), (X_test, y_test) = mnist.load_data()

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.2, random_state=50)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
print("--- Shape of training set ---")
print(f"X-train: {X_train.shape}")
print(f"y-train: {y_train.shape}")

print("--- Shape of validation test ---")
print(f"X_val: {X_val.shape}")
print(f"y_val: {y_val.shape}")
print("--- Shape of testing set ----")
print(f"X-test: {X_test.shape}")
print(f"y-test: {y_test.shape}")

--- Shape of training set ---
X-train: (48000, 28, 28)
y-train: (48000,)
--- Shape of validation test ---
X_val: (12000, 28, 28)
y_val: (12000,)
--- Shape of testing set ----
X-test: (10000, 28, 28)
y-test: (10000,)


In [4]:
# specify input dimensions of each image
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

# batch size, number of classes, epochs
batch_size = 128
num_classes = 10
epochs = 20

In [5]:
# reshape x_train and x_test
x_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
x_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
x_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

(48000, 28, 28, 1)
(12000, 28, 28, 1)
(10000, 28, 28, 1)


In [6]:
# convert class labels (from digits) to one-hot encoded vectors
y_train = keras.utils.to_categorical(y_train, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)

(48000, 10)


In [7]:
# originally, the pixels are stored as ints
x_train.dtype

# convert int to float
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

# normalise
x_train /= 255
x_val /= 255
x_test /= 255

In [8]:
# model
model = Sequential()

# a keras convolutional layer is called Conv2D
# help(Conv2D)
# note that the first layer needs to be told the input shape explicitly

# first conv layer
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape)) # input shape = (img_rows, img_cols, 1)

# second conv layer
model.add(Conv2D(64, kernel_size=(3, 3),
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# flatten and put a fully connected layer
model.add(Flatten())
model.add(Dense(128, activation='relu')) # fully connected
model.add(Dropout(0.5))

# softmax layer
model.add(Dense(num_classes, activation='softmax'))

# model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2  (None, 12, 12, 64)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 12, 12, 64)        0         
                                                                 
 flatten (Flatten)           (None, 9216)              0         
                                                                 
 dense (Dense)               (None, 128)               1179776   
                                                        

In [9]:
# usual cross entropy loss
# choose any optimiser such as adam, rmsprop etc
# metric is accuracy
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

In [11]:
# fit the model
# this should take around 10-15 minutes when run locally on a windows/mac PC
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=10,
          verbose=1,
          validation_data=(x_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ec221645ea0>

In [12]:
# evaluate the model on test data
model.evaluate(x_test, y_test)



[0.027635052800178528, 0.9916999936103821]

In [None]:
print(model.metrics_names)

['loss', 'accuracy']
