FROM https://www.kaggle.com/datasets/hojjatk/mnist-dataset

In [2]:
#
# This is a sample Notebook to demonstrate how to read "MNIST Dataset"
#
import numpy as np # linear algebra
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)

In [3]:
#
# Verify Reading Dataset via MnistDataloader class
#
%matplotlib inline
import random
import matplotlib.pyplot as plt

#
# Set file paths based on added MNIST Datasets
#
input_path = './input'
training_images_filepath = join(input_path, 'train-images-idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels-idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images-idx3-ubyte')
test_labels_filepath = join(input_path, 't10k-labels-idx1-ubyte')

#
# Helper function to show a list of images with their relating titles
#
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images)/cols) + 1
    plt.figure(figsize=(30,20))
    index = 1    
    for x in zip(images, title_texts):        
        image = x[0]        
        title_text = x[1]
        plt.subplot(rows, cols, index)        
        plt.imshow(image, cmap=plt.cm.gray)
        if (title_text != ''):
            plt.title(title_text, fontsize = 15);        
        index += 1

#
# Load MINST dataset
#
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

#
# Show some random training and test images 
#
# images_2_show = []
# titles_2_show = []
# for i in range(0, 10):
#     r = random.randint(1, 60000)
#     images_2_show.append(x_train[r])
#     titles_2_show.append('training image [' + str(r) + '] = ' + str(y_train[r]))    

# for i in range(0, 5):
#     r = random.randint(1, 10000)
#     images_2_show.append(x_test[r])        
#     titles_2_show.append('test image [' + str(r) + '] = ' + str(y_test[r]))    

# show_images(images_2_show, titles_2_show)

In [356]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def softmax(x):
    return np.exp(x) / sum(np.exp(x))
def d_relu(x):
    return x>0

In [357]:
from itertools import chain
x_arr_train = np.array([list(chain(*x_train[0]))])
y_arr_train = np.array(y_train[0:5000])
x_arr_test = np.array([list(chain(*x_test[0]))])
y_arr_test = np.array(y_test[0:5000])
for i in range(1,5000):
    x_arr_train = np.concatenate((x_arr_train,np.array([list(chain(*x_train[i]))])), axis=0)
    x_arr_test = np.concatenate((x_arr_test,np.array([list(chain(*x_test[i]))])), axis=0)
x_arr_train = x_arr_train/255
x_arr_test = x_arr_test/255


In [363]:
def forward_prop(x_arr,W1,W2,b1,b2):    
    """hidden layer"""
    A1 = W1.dot(x_arr.T) + b1
    O1 = np.maximum(0,A1)
    """predict layer"""
    A2 = W2.dot(O1) + b2
    O2 = softmax(A2)
    return A1,O1,A2,O2

def back_prop(x_arr,y_arr,A1,O1,O2,W2):
    Y = np.zeros((y_arr.size,10))
    Y[np.arange(y_arr.size),y_arr] = 1
    Y = Y.T
    m = y_arr.size
    dA2 = O2 - Y
    dW2 = 1/m * dA2.dot(O1.T)
    db2 = 1/m * np.sum(dA2)
    dA1 = W2.T.dot(dA2)*d_relu(A1)
    dW1 = 1/m * dA1.dot(x_arr)
    db1 = 1/m * np.sum(dA1)
    return dW1,db1,dW2,db2

def get_predictions(O2):
    return np.argmax(O2, 0)

def get_accuracy(predictions, Y):
    return (np.sum(predictions == Y) / Y.size)*100


In [365]:
W1 = np.random.randn(10, 784) * np.sqrt(2 / 784)
W2 = np.random.randn(10, 10) * np.sqrt(2 / 10)
b1 = np.random.randn(10, 1)
b2 = np.random.randn(10, 1)
loop = 1000
learning_rate = 0.1
for i in range(loop):
    A1,O1,A2,O2 = forward_prop(x_arr_train,W1,W2,b1,b2)
    dW1,db1,dW2,db2 = back_prop(x_arr_train,y_arr_train,A1,O1,O2,W2)
    W1 = W1 - learning_rate * dW1
    W2 = W2 - learning_rate * dW2
    b1 = b1 - learning_rate * db1
    b2 = b2 - learning_rate * db2

In [366]:
A1,O1,A2,O2 = forward_prop(x_arr_test,W1,W2,b1,b2)

In [367]:
predictions = get_predictions(O2)
print(get_accuracy(predictions, y_arr_test))

84.74000000000001
