In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
from sklearn.metrics import confusion_matrix
from scipy.special import expit # For sigmoid function


#class of neural networks and make multi layer perceptron
class Neural_Network:

#Loading the files and initializing the values
    def __init__(self, train_data, test_data): 

#read files 
        self.training_data = np.array(pd.read_csv("mnist_train.csv"))
        self.testing_data = np.array(pd.read_csv("mnist_test.csv"))

#(Exp 3)for 30,000 and 15,000
        np.random.shuffle(self.training_data)
        self.training_data = self.training_data[0:15000]

#number of hidden layer 20 units + 1 bias [20, 50, 100] (Exp 1)
        self.n = 100

#create random weights nd-matrix (785 X 20) ; (21 X 10) for n = (20,50,100)
        self.weights_xh = np.random.uniform(-0.05, 0.05, (785, self.n))
        self.weights_hk = np.random.uniform(-0.05, 0.05, (self.n+1, 10))

#bias value X0 = 1
        self.bias = 1

#store previous delta wt from hidden to output layer
        self.prev_wt_ho = np.zeros((self.n+1, 10))

#store previous delta wt from input to hidden layer
        self.prev_wt_ih = np.zeros((785, self.n))

#learning rates stays constant for all experiments
        self.learning_rate = 0.1

#selecting different momemtum valuesaccording to Experiment 2 and keeping hidden nodes constant [(n=100)--> 0.9, 0, 0.25, 0.5] (Exp 2)
        self.momentum = 0.9

#to store activations from hidden unint
        self.activation_h= np.zeros((1, self.n+1))
        self.activation_h[0,0] = 1 

    def process(self, train, epoch):
        actual_list=[]
        predicted_list = []

        if (train == 1): 
            data = self.training_data
        else:            
            data = self.testing_data

#loop to run through entire data set
        for i in range(0, data.shape[0]):  

            actual_value = data[i,0].astype('int')
            actual_list.append(actual_value)

#preprocessing the data
            xi = data[i].astype('float16')/255
            xi[0] = self.bias
            xi = xi.reshape(1, 785)

#Sigmoid activation calculations of hidden (20 + 1) and output (10) unit
            z_h = np.dot(xi, self.weights_xh)
#sigma fn
            act_h = expit(z_h)      
            self.activation_h[0,1:] =act_h

            z_k = np.dot(self.activation_h, self.weights_hk)
#sigma fn
            act_k = expit(z_k)      

#store max of output units activations
            predicted_list.append(np.argmax(np.array((act_k))))

#backpropagation using stochastic gradiant descent

            if (epoch > 0 and train == 1):

#error calculation
                tk = np.zeros((1, 10)) + 0.1
                tk[0, actual_value] = 0.9

#error at output
                error_op = act_k * (1 - act_k) * (tk - act_k)

#error at hidden units
                error_hu = act_h * (1 - act_h) * (np.dot(error_op, self.weights_hk[1:, :].T))

#update weights 

                delta_weight_ho = (self.learning_rate * error_op * self.activation_h.T) + (self.momentum* self.prev_wt_ho)
                self.prev_wt_ho = delta_weight_ho
                self.weights_hk = self.weights_hk + delta_weight_ho

                delta_weight_ih = (self.learning_rate * error_hu * xi.T) + (self.momentum * self.prev_wt_ih)
                self.prev_wt_ih = delta_weight_ih
                self.weights_xh = self.weights_xh + delta_weight_ih

#calculate accuracy of training and testing data for each epoch.
        accuracy_x = (np.array(predicted_list) == actual_list).sum() / float(len(actual_list)) * 100

#confusion matrix for testing data.
        if(epoch > 0):
                if(train == 1):
                    print("for testing")
#confusion matrix for training data
                else:
                    print("for training")
                print("confusion matrix for epoch ",epoch)
                print(confusion_matrix(actual_list, predicted_list))

        return accuracy_x


    def store_accuracy(self, epoch, x_accuracy, file_name):
#store accuracies of training and testing data
        with open(file_name, 'a', newline='') as file:
            wr = csv.writer(file)
            wr.writerow([epoch, x_accuracy])


train_data = "mnist_train.csv"
test_data = "mnist_test.csv"

#initialize values before sending testing set.
NN = Neural_Network(train_data, test_data)
for epoch in range(31): # run for epochs
    train_accuracy = NN.process(1, epoch)
    test_accuracy = NN.process(0, epoch)
    NN.store_accuracy(epoch, train_accuracy, 'train_acc' + str(NN.learning_rate) + '_' +str(NN.momentum) + '_' +str(NN.n) + '_' +'15k'+ '.csv')
    NN.store_accuracy(epoch, test_accuracy, 'test_acc' + str(NN.learning_rate) + '_' + str(NN.momentum)+ '_' +str(NN.n)+ '_' +'15k' + '.csv')


    
    
    



for testing
confusion matrix for epoch  1
[[1375    1    6    6   16   14   21    5   32    2]
 [   4 1621   18   12   11    7    3    5   25    6]
 [  21   19 1279   29   36    6   25   24   34   13]
 [  19   14   53 1275    7   67   13   15   60   19]
 [  10   10   10    1 1296    2   22    5   18  103]
 [  27   17   13   68   22 1055   31    7   47   16]
 [  22    8   18    5   26   19 1377    1   18    1]
 [  15   24   27   10   35    4    3 1368   11   69]
 [  16   31   26   37   13   29   14    8 1242   34]
 [  12    9   13   23  112   18    3   52   29 1220]]
for training
confusion matrix for epoch  1
[[ 972    0    0    1    1    3    1    1    1    0]
 [   0 1101    3    5    2    2    3    2   17    0]
 [  25    1  906   14   24    6    1   21   29    5]
 [   5    0    6  915    2   35    0   11   27    9]
 [   2    1    4    0  953    0    2    1    4   15]
 [  12    2    1    9   15  826    5    4   11    7]
 [  25    3    2    1   32   10  874    0   11    0]
 [   2    8  

for training
confusion matrix for epoch  8
[[ 971    0    1    1    0    1    1    2    3    0]
 [   0 1118    1    4    0    1    3    0    8    0]
 [  12    1  969   12    5    0    0   12   19    2]
 [   2    0    9  956    0   13    0    5   21    4]
 [   2    2    4    0  955    0    3    1    2   13]
 [   4    1    1   12    5  841    7    3   14    4]
 [  12    3    4    1   14    2  912    1    9    0]
 [   2    7   11    0    3    0    0  987    7   10]
 [  10    0    2    0   10    3    3    6  937    3]
 [   8    5    1   10   36    5    0    8   12  924]]
for testing
confusion matrix for epoch  9
[[1460    0    2    0    0    0    3    0   13    0]
 [   0 1694    7    0    2    1    1    2    4    1]
 [   0    0 1476    0    2    0    0    5    3    0]
 [   1    0    5 1509    0    4    0    6   12    5]
 [   1    0    2    0 1465    0    2    0    0    7]
 [   1    0    1    2    0 1290    3    1    3    2]
 [   5    1    0    0    0    2 1485    0    2    0]
 [   1    2  

for testing
confusion matrix for epoch  16
[[1470    0    1    0    0    0    1    0    6    0]
 [   0 1702    6    1    0    1    0    0    2    0]
 [   0    0 1481    0    0    0    0    2    3    0]
 [   0    0    2 1528    0    3    0    1    5    3]
 [   0    0    1    0 1473    0    0    0    0    3]
 [   0    0    1    3    1 1294    2    0    1    1]
 [   1    0    0    0    0    0 1493    0    1    0]
 [   1    0    2    0    0    0    0 1559    1    3]
 [   1    0    0    1    0    0    0    1 1447    0]
 [   2    0    1    2    0    0    0    0    4 1482]]
for training
confusion matrix for epoch  16
[[ 970    0    1    2    0    1    0    2    4    0]
 [   0 1120    1    4    0    2    2    0    6    0]
 [  11    3  978    9    3    0    0   10   16    2]
 [   1    0    8  956    0   18    0    6   17    4]
 [   4    2    3    0  951    0    3    1    2   16]
 [   5    0    1   13    4  836    7    1   19    6]
 [   8    3    3    1    8    4  918    1   12    0]
 [   4    6

for training
confusion matrix for epoch  23
[[ 961    0    3    3    0    0    2    2    7    2]
 [   0 1120    3    3    0    1    3    0    5    0]
 [   5    3  983    8    2    0    0   13   16    2]
 [   1    0   10  957    0   16    0    6   17    3]
 [   5    1    2    0  948    0    3    0    3   20]
 [   8    1    1   16    3  823   11    2   19    8]
 [   4    3    3    1    5    2  925    1   14    0]
 [   1    5   14    3    2    1    1  974   11   15]
 [  10    1    5    3    4    3    3    2  939    4]
 [   5    4    1    9   23    4    0    8   17  938]]
for testing
confusion matrix for epoch  24
[[1473    0    0    0    0    0    0    0    5    0]
 [   0 1705    3    1    1    0    0    0    2    0]
 [   0    0 1485    0    0    0    0    0    1    0]
 [   0    0    2 1534    0    0    0    0    3    3]
 [   0    0    0    0 1477    0    0    0    0    0]
 [   0    0    0    4    0 1296    2    0    1    0]
 [   1    0    0    0    0    0 1494    0    0    0]
 [   1    0