In [None]:
import numpy as np
from scipy.special import expit
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
from mnist.loader import MNIST
import random

In [None]:
in_dim = 785
hidden_dim = 100
out_dim = 10
eta = 0.1  # Learning rate. You might try different rates (e.g. 0.001, 0.01, 0.1) to maximize the accuracy

# matrix to store the activation h1...hk 
hl_input = np.zeros((1, hidden_dim + 1))
hl_input[0, 0] = 1

def weight_update(feature, label, weight_i2h, weight_h2o):
    scores_hl = np.dot(feature.reshape(1, in_dim), weight_i2h)
    sig_hl = expit(scores_hl)
    hl_input[0, 1:] = sig_hl

    scores_ol = np.dot(hl_input, weight_h2o)
    sig_ol = expit(scores_ol)

    y_i = np.zeros((1,out_dim))
    y_i[0,label] = 1
    delta_ol = sig_ol*(1-sig_ol)*(sig_ol-y_i)
    weight_h2o -= eta*np.dot(hl_input.T,delta_ol)

    delta_hl = sig_hl*(1-sig_hl)*np.dot(delta_ol,weight_h2o[1:].T)
    weight_i2h-= eta*np.dot(feature.reshape(in_dim,1),delta_hl)

    return weight_i2h, weight_h2o


def get_predictions(dataset, weight_i2h, weight_h2o):
    hl_in = np.zeros((dataset.shape[0], hidden_dim + 1))
    hl_in[:, 0] = 1
    scores_hl = np.dot(dataset, weight_i2h)
    sig_hl = expit(scores_hl)
    hl_in[:,1:] = sig_hl
    scores_ol = np.dot(hl_in, weight_h2o)
    sig_ol = expit(scores_ol)
    labels = np.argmax(sig_ol, axis=1)
    
    return labels


def train(train_set, labels, weight_i2h, weight_h2o):
    for i in range(0, train_set.shape[0]):
        weight_i2h, weight_h2o = weight_update(train_set[i, :], labels[i], weight_i2h, weight_h2o)
    return weight_i2h, weight_h2o

## Evaluating NN classifier

In [None]:
mndata = MNIST('./data')
X_train, Y_train = map(np.array, mndata.load_training())
X_test, Y_test = map(np.array, mndata.load_testing())
X_train = X_train / 255.0
X_test = X_test / 255.0

X_train = np.hstack((np.ones((X_train.shape[0],1)),X_train)) 
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test))

In [None]:
num_epochs = 5

weight_1 = np.random.uniform(-0.05,0.05,(in_dim,hidden_dim))
weight_2 = np.random.uniform(-0.05,0.05,(hidden_dim+1,out_dim))

arr_train_acc = []
arr_test_acc = [] 

for i in range(1, num_epochs+1):
    # Test network on training set and get training accuracy
    pred_train_labels = get_predictions(X_train, weight_1, weight_2)  
    curr_accu = accuracy_score(Y_train, pred_train_labels)

    print("Epoch " + str(i) + " :\tTraining Set Accuracy = " + str(curr_accu))
    # Test network on test set and get accuracy on test set
    pred_test_labels = get_predictions(X_test, weight_1, weight_2)  
    test_accu = accuracy_score(Y_test, pred_test_labels)
    print("\t\tTest Set Accuracy = " + str(test_accu))
    # Train the network
    weight_1, weight_2 = train(X_train, Y_train, weight_1, weight_2)    

    arr_train_acc.append(curr_accu)
    arr_test_acc.append(test_accu)

# Test network on test set and get test accuracy
pred_test_labels = get_predictions(X_test, weight_1, weight_2)  
test_accu = accuracy_score(Y_test, pred_test_labels)

# Confusion Matrix
print("\t\tFinal Accuracy = " + str(test_accu) + "\n\nConfusion Matrix :\n")
print(confusion_matrix(Y_test, pred_test_labels))
print("\n")

Epoch 1 :	Training Set Accuracy = 0.10515
		Test Set Accuracy = 0.1035
Epoch 2 :	Training Set Accuracy = 0.929
		Test Set Accuracy = 0.9316
Epoch 3 :	Training Set Accuracy = 0.9537833333333333
		Test Set Accuracy = 0.9525
Epoch 4 :	Training Set Accuracy = 0.96395
		Test Set Accuracy = 0.9602
Epoch 5 :	Training Set Accuracy = 0.97065
		Test Set Accuracy = 0.9635
		Final Accuracy = 0.9676

Confusion Matrix :

[[ 972    0    0    1    0    2    2    1    1    1]
 [   0 1123    2    2    0    1    2    1    4    0]
 [   8    2  996    7    5    0    2    7    5    0]
 [   0    0    5  994    0    2    0    5    3    1]
 [   1    0    4    0  950    0    6    1    2   18]
 [   7    3    2   13    2  841   10    3    5    6]
 [  10    4    1    1    3    4  930    0    5    0]
 [   3   11   15    6    1    0    1  975    0   16]
 [   4    3    3    7    5    4    5    4  935    4]
 [   6    6    0   13    9    2    0    7    6  960]]


