In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.preprocessing as skp
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import keras

Using TensorFlow backend.


In [2]:
dataset = tf.keras.datasets.mnist

In [3]:
(X_train, Y_train), (X_test, Y_test) = dataset.load_data()

In [4]:
X_train = keras.utils.normalize(X_train,axis=1)
X_test = keras.utils.normalize(X_test,axis=1)

In [5]:
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [6]:
X_train = X_train.flatten().reshape(60000,784)
X_test = X_test.flatten().reshape(10000,784)
Y_train = Y_train.reshape(60000,1).astype('float32')
Y_test = Y_test.reshape(10000,1).astype('float32')

In [8]:
Enc = OneHotEncoder(sparse = False, categories = 'auto')
Y_train = Enc.fit_transform(Y_train)
Y_test = Enc.fit_transform(Y_test)

In [9]:
class Neural_Net:
    def __init__(self, ip_nodes, hdn_nodes, op_nodes):
        self.W_01 = np.random.normal(0,1,(hdn_nodes, ip_nodes))
        self.B_01 = np.random.normal(0,1,(hdn_nodes,1))
        self.W_12 = np.random.normal(0,1,(op_nodes, hdn_nodes))
        self.B_12 = np.random.normal(0,1,(op_nodes, 1))
    
    def Sigmoid(self, Z):
        return (1/(1+(np.exp(-Z))))
    
    def Softmax(self, Z):
        return ((np.exp(Z))/(np.exp(Z).sum()))
    
    def Predict(self, X):
        self.Z1 = (self.W_01 @ X.T) + self.B_01
        self.A1 = self.Sigmoid(self.Z1)
        self.Z2 = (self.W_12 @ self.A1) + self.B_12
        self.A2 = self.Softmax(self.Z2)
        return self.A2.T
    
    def Train(self, X_train, Y_train, Epochs, Batch_size, Alpha):
        e = Epochs
        while e>0:
            itr = (X_train.shape[0]) // Batch_size
            for i in range(itr):
                # Forward Propogation
                X = X_train[(Batch_size*i):(Batch_size*(i+1))]
                Y = Y_train[(Batch_size*i):(Batch_size*(i+1))].T
                self.Z1 =  (self.W_01 @ (X.T)) + self.B_01
                self.A1 = self.Sigmoid(self.Z1)
                self.Z2 = (self.W_12 @ self.A1) + self.B_12
                self.A2 = self.Softmax(self.Z2)
                
                self.A1_t = self.A1.T
                self.A2_t = self.A2.T
                
                if i == 0:
                    Predictions = np.array(self.A2_t)
                else:
                    Predictions = np.vstack((Predictions,self.A2_t))
                
                # Loss Calculation
                L = (Y.T * np.log(self.A2_t)).sum()
                Loss = -(1/Batch_size) * L
                    
                # Calculate Gradients
                dloss_B_12 = -(Y * (1 - self.A2))
                dloss_W_12 = dloss_B_12 @ self.A1.T
                dloss_B_01 = ((dloss_B_12.T @ self.W_12) * self.A1_t * (1 - self.A1_t)).T
                dloss_W_01 = dloss_B_01 @ X
                    
                # Backpropogation
                self.W_01 = self.W_01 - Alpha * dloss_W_01
                self.B_01 = self.B_01 - Alpha * dloss_B_01
                self.W_12 = self.W_12 - Alpha * dloss_W_12
                self.B_12 = self.B_12 - Alpha * dloss_B_12
                            
            # Calculate Overall Loss
            L_train = (Y_train * np.log(Predictions)).sum()
            Train_loss = -(1/Y_train.shape[0]) * L_train
            
            Y_train_Orig = np.argmax(Y_train, axis=1)
            Y_train_Predict = np.argmax(Predictions, axis=1)
            
            print("Epoch",Epochs-e+1,"    loss:",Loss,"    accuracy:",accuracy_score(Y_train_Orig,Y_train_Predict), "\n")
            e = e - 1
    
    def Test(self, X_test, Y_test, Batch_size):
        itr = (X_test.shape[0]) // Batch_size
        for i in range(itr):
            X = X_test[(Batch_size*i):(Batch_size*(i+1))]
            Y_pred = self.Predict(X)
            
            if i == 0:
                Y_predicted = np.array(Y_pred)
            else:
                Y_predicted = np.vstack((Y_predicted,Y_pred))
        
        L_test = (Y_test * np.log(Y_predicted)).sum()
        Test_loss = -(1/Y_test.shape[0]) * L_test
        
        Y_test_Orig = np.argmax(Y_test, axis=1)
        Y_test_Predict = np.argmax(Y_predicted, axis=1)
        
        print(classification_report(Y_test_Orig,Y_test_Predict))
        print(confusion_matrix(Y_test_Orig,Y_test_Predict))
        print("loss:",Test_loss, "accuracy:",accuracy_score(Y_test_Orig,Y_test_Predict))

In [10]:
ip = int(input("Enter the number of nodes in input layer: "))
hdn = int(input("Enter the number of nodes in 1st hidden layer: "))
op = int(input("Enter the number of nodes in output layer: "))
e = int(input("Enter the number of epochs: "))
batch = int(input("Enter the batch size: "))
lr = float(input("Enter the learning rate: "))
NN = Neural_Net(ip, hdn, op)

Enter the number of nodes in input layer: 784
Enter the number of nodes in 1st hidden layer: 256
Enter the number of nodes in output layer: 10
Enter the number of epochs: 5
Enter the batch size: 1
Enter the learning rate: 0.0001


In [11]:
NN.Train(X_train, Y_train, e, batch, lr)

Epoch 1     loss: 7.660795216156919     accuracy: 0.18545 

Epoch 2     loss: 4.099623898279769     accuracy: 0.3624 

Epoch 3     loss: 2.748261894439406     accuracy: 0.46576666666666666 

Epoch 4     loss: 2.6964890489739455     accuracy: 0.5219666666666667 

Epoch 5     loss: 2.2548829189067274     accuracy: 0.55435 



In [13]:
NN.Test(X_test, Y_test, batch)

              precision    recall  f1-score   support

           0       0.90      0.59      0.72       980
           1       0.95      0.55      0.69      1135
           2       0.70      0.54      0.61      1032
           3       0.50      0.63      0.55      1010
           4       0.67      0.55      0.60       982
           5       0.42      0.49      0.45       892
           6       0.82      0.53      0.64       958
           7       0.84      0.56      0.67      1028
           8       0.32      0.56      0.41       974
           9       0.34      0.59      0.43      1009

   micro avg       0.56      0.56      0.56     10000
   macro avg       0.65      0.56      0.58     10000
weighted avg       0.65      0.56      0.58     10000

[[583   0   4  46   6 112  16   2  70 141]
 [  0 620   2 192   0   7  14   0 179 121]
 [ 10  11 556  35  15  24  31   5 204 141]
 [  7   5  11 632   3 186   5   4  89  68]
 [  0   0  41  16 537  24  13  19 172 160]
 [ 15   2   6 143  38 441 