In [1]:
import numpy as np
import pandas as pd

In [2]:
def one_hot_encoding(Y):
    Y_len = np.shape(Y)[0]
    vector = np.zeros((Y_len, 10))
    vector[np.arange(0, Y_len, 1), Y] = 1
    return vector

In [3]:
train = pd.read_csv("mnist_train.csv")
train = np.array(train)
train_Y = one_hot_encoding(train[:, 0])
train_X = train[:, 1:]

In [4]:
weights_first_layer = np.random.rand(20, 784) - 0.5
bias_first_layer = np.zeros(20)
weights_second_layer = np.random.rand(10, 20) - 0.5
bias_second_layer = np.zeros(10)

In [5]:
def Sigmoid(X):
    X = np.clip(X, -700, 700) # to avoid overflow
    return 1 / (1 + np.exp(-X))

def softmax(X):
    X = X - np.max(X) # to avoid overflow
    return np.e**X / np.sum(np.e**X) 

def get_error(output, expected_output):
    return np.sum((output - expected_output) ** 2) / len(output)

def train(images, labels):
  
    epochs = 7
    learning_rate = 0.001
    
    global weights_first_layer
    global bias_first_layer
    global weights_second_layer
    global bias_second_layer
    
    for _ in range(epochs):
        no_of_correct_answers = 0
        for image, label in zip(images, labels):
            
            # Forwards propagation
            first_layer_values = Sigmoid(np.dot(image, weights_first_layer.transpose()) + bias_first_layer)
            second_layer_values = softmax(np.dot(first_layer_values, weights_second_layer.transpose()) + bias_second_layer)
            
            # Backwards propagation second_layer->first_layer
            delta_second_layer = second_layer_values - label
            weights_second_layer += -learning_rate * (np.dot(delta_second_layer.reshape(10, 1), first_layer_values.reshape(1, 20)))
            bias_second_layer += (-learning_rate * (np.dot(delta_second_layer.reshape(10, 1), 1))).flatten()
            
            # Backwards propagation first_layer->input
            delta_first_layer = np.dot(weights_second_layer.T, delta_second_layer) * (first_layer_values * (1 - first_layer_values))
            weights_first_layer += -learning_rate * (np.dot(delta_first_layer.reshape(20, 1), image.reshape(1, 784)))
            bias_first_layer += (-learning_rate * (np.dot(delta_first_layer.reshape(20, 1), 1))).flatten() 
                        
            no_of_correct_answers += (1 if np.argmax(second_layer_values) == np.argmax(label) else 0)
        print("Accuracy", no_of_correct_answers / images.shape[0] * 100, "%")
        
def test(images, labels):
    no_of_correct_answers = 0
    for image, label in zip(images, labels):
        first_layer_values = Sigmoid(np.dot(image, weights_first_layer.transpose()) + bias_first_layer)
        second_layer_values = softmax(np.dot(first_layer_values, weights_second_layer.transpose()) + bias_second_layer)
        
        no_of_correct_answers += (1 if np.argmax(second_layer_values) == np.argmax(label) else 0)
        get_error(second_layer_values, label)
    
    print("Accuracy", no_of_correct_answers / images.shape[0] * 100, "%")


In [6]:
train(train_X, train_Y)

Accuracy 57.483333333333334 %
Accuracy 73.495 %
Accuracy 77.73833333333333 %
Accuracy 79.41166666666668 %
Accuracy 79.99000000000001 %
Accuracy 80.26833333333333 %
Accuracy 81.70500000000001 %


In [7]:
test_date = pd.read_csv("mnist_test.csv")
test_date = np.array(test_date)
test_Y = one_hot_encoding(test_date[:, 0])
test_X = test_date[:, 1:]

In [8]:
test(test_X, test_Y)

Accuracy 81.76 %
