In [1]:
#data courtesy of MNIST: http://yann.lecun.com/exdb/mnist/
import pandas as pd
import struct
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
import tensorflow as tf

In [2]:
#see: https://stackoverflow.com/questions/39969045/parsing-yann-lecuns-mnist-idx-file-format
#loading the training data/input
with open("train-images.idx3-ubyte", "rb") as f:
    magic, size = struct.unpack(">II", f.read(8))
    nrows, ncols = struct.unpack(">II", f.read(8))
    train_data = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>'))
    train_data = train_data.reshape((size, nrows, ncols))

train_data.shape


(60000, 28, 28)

In [3]:
#loading the target labels
with open("train-labels.idx1-ubyte", "rb") as f:
    magic, size = struct.unpack(">II", f.read(8))
    train_label = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>'))
    #train_label = train_label.reshape((size, 28, 28))
train_label.shape    

(60000,)

In [None]:
#reshape the 3D matrix to a format that can be used by numpy and tensorflow
#the images are of size 28x28 pixels
#28*28 = 784
train_data = train_data.reshape(train_data.shape[0], 784)

for i in range(5):
    plt.imshow(train_data[i,:].reshape(28,28), cmap="gray")
    plt.title(train_label[i])
    plt.show()


In [None]:
#output layer uses linear activation because softmax activation's numerical stability is
#lower compared to that of linear activation + softmax function
model =  Sequential(
    [
        tf.keras.Input(shape=(784,)),
        Dense(80,activation="relu"),
        Dense(40,activation="relu"),
        Dense(20,activation="relu"),
        Dense(10,activation="linear"),
    ]
)

model.summary()

In [None]:
model.compile(
    loss= tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer= tf.keras.optimizers.Adam(0.0003)
)

model.fit(train_data, train_label, epochs=100)

In [None]:
with open("t10k-images.idx3-ubyte", "rb") as f:
    magic, size = struct.unpack(">II", f.read(8))
    nrows, ncols = struct.unpack(">II", f.read(8))
    test_data = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>'))
    test_data = test_data.reshape((size, 28, 28))
    
test_data = test_data.reshape(test_data.shape[0], 784)
test_data.shape

In [None]:
with open("t10k-labels.idx1-ubyte", "rb") as f:
    magic, size = struct.unpack(">II", f.read(8))
    test_label = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>'))
    #train_data = train_data.reshape((size, nrows, ncols))

test_label.shape   

In [None]:
test_data, cv_data, test_label, cv_label = train_test_split(test_data, test_label, train_size=0.25)

print(f"Test sets and CV set shape:{test_data.shape, cv_data.shape}")

In [None]:
predictions = tf.nn.softmax(model.predict(cv_data)).numpy()

c = 0
m = cv_data.shape[0]

for i in range(m):
    if(np.argmax(predictions[i])!=cv_label[i]):
        c = c + 1
        #print(f"prediction and label:{np.argmax(predictions[i]), cv_label[i]}")

print(f"Cross validation set error rate: {c/m}")

In [None]:
test_pred = tf.nn.softmax(model.predict(test_data)).numpy()

c = 0
m = test_data.shape[0]

for i in range(m):
    if(np.argmax(test_pred[i])!=test_label[i]):
        c = c + 1
        #print(f"prediction and label:{np.argmax(test_pred[i]), test_label[i]}")

print(f"Test set error rate: {c/m}")