Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

Load Data

In [None]:
import tensorflow as tf

In [None]:
#loading the data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
np.unique(y_train), np.unique(y_test)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8))

In [None]:
x_train=x_train.reshape(len(x_train),-1)

In [None]:
x_test=x_test.reshape(len(x_test),-1)

Standardize Data

In [None]:
x_train=(x_train-np.mean(x_train,axis=0))/(np.std(x_train,axis=0)+10e-16)
x_test=(x_test-np.mean(x_test,axis=0))/(np.std(x_test,axis=0)+10e-16)

Applying One_Hot_Encoder for Train Data

In [None]:
num_classes = len(np.unique(y_train))
y_train_onehot = np.zeros((len(y_train), num_classes))
y_train_onehot[np.arange(len(y_train)), y_train] = 1

In [None]:
y_train_onehot[:3]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

Applying One_Hot_Encoder for test Data

In [None]:
num_classes = len(np.unique(y_test))
y_test_onehot = np.zeros((len(y_test), num_classes))
y_test_onehot[np.arange(len(y_test)), y_test] = 1

In [None]:
y_test_onehot[:3]

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

Sigmoid Function

In [None]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

sigmoid_derivative Function

In [None]:
def sigmoid_derivative(x):
    return x * (1 - x)

softmax Function

In [None]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

Cost Function

In [None]:
def cost_function(phiz, y_train):
    return (y_train * np.log(phiz) - (1 - y_train) * np.log(1 - phiz)).mean()

mse_loss

In [None]:
def mse_loss(y_pred, y_true):
    return np.mean(np.power(y_pred - y_true, 2))


Building Dynamic Neural Network 

In [None]:
class NeuralNetwork:
    def __init__(self,x,y, num_of_layers, size_of_layers):
        self.x=x
        self.y=y
        self.num_of_layers = num_of_layers
        self.size_of_layers = size_of_layers
        self.t_weights=[]
        self.t_bias=[]
        self.t_activation=[]
        for i in range(num_of_layers):
          if(i==0):
             self.weights=np.random.randn(size_of_layers[i],x.shape[1])
             self.t_weights.append(self.weights) 
          else:
             self.weights=np.random.randn(size_of_layers[i],size_of_layers[i-1])
             self.t_weights.append(self.weights)

          self.bias=np.random.randn(size_of_layers[i])
          self.t_bias.append(self.bias)
 

    
    
    def forward(self,x,y):
      output_layer=self.x
      nr_correct = 0  
      for i in range (self.num_of_layers):
          z=np.dot(output_layer,self.t_weights[i].T)+self.t_bias[i]
          if i < self.num_of_layers - 1:
              activation = sigmoid(z)
               
          else:
              activation = softmax(z)
              
          output_layer=activation
          self.t_activation.append(output_layer)

      error=mse_loss(self.t_activation[-1],y)
      #print(error)
      nr_correct += int(np.argmax(self.t_activation[-1]) == np.argmax(y))
      #print(nr_correct)
      return self.t_activation

  

              #true
            
    def backward(self, x,y,learning_rate):
          self.errors = []
          self.t_activation = self.forward(x,y)
          Error=self.t_activation[-1]-y
          #print(Error.shape)
          self.errors.append(Error)         
          self.t_weights[-1] += -learning_rate * np.dot(Error.T, self.t_activation[-2]) / len(y)
          #print((self.t_weights[-1].shape))
          self.t_bias[-1] += -learning_rate * np.mean(Error)
          #print((self.t_bias[-1].shape))
          for i in range(self.num_of_layers-1,0,-1):
            #print(i)
            if(i==self.num_of_layers-1 and i!=1):
              delta_h=np.dot(self.t_weights[-i+1].T,Error.T)*(sigmoid_derivative(self.t_activation[-i]).T)
              Error=delta_h
              self.errors.append(Error)        
              #print(Error.shape)
              #print(delta_h.shape)
              self.t_weights[-i] += -learning_rate * np.dot(delta_h,self.t_activation[-i-1])/len(delta_h)
              #print((self.t_weights[-i].shape))
              #print(delta_h.shape)
              self.t_bias[-i] += -learning_rate * np.mean(delta_h)
              #print(self.t_bias[-i].shape)
            elif(i==1):
                if(self.num_of_layers==2):
                    delta_h=np.dot(self.t_weights[i].T,Error.T)*(sigmoid_derivative(self.t_activation[i-1].T))
                    self.t_weights[i-1] += -learning_rate * np.dot(delta_h,self.x)/len(x)
                    #print(self.t_weights[i-1].shape)
                    #print(delta_h.shape)
                    self.t_bias[i-1] += -learning_rate * np.mean(delta_h)
                    #print(self.t_bias[i-1].shape)
                 #print(delta_h.shape)
                else:
                   #print(Error.shape)
                   delta_h=np.dot(self.t_weights[i].T,Error)*(sigmoid_derivative(self.t_activation[i-1].T))
                   self.t_weights[i-1] += -learning_rate * np.dot(delta_h,self.x )/len(x)
                   #print(self.t_weights[i-1].shape)
                   #print(delta_h.shape)
                   self.t_bias[i-1] += -learning_rate * np.mean(delta_h)
                   #print(self.t_bias[i-1].shape)
            
            
          return self.t_activation[-1]
              
    
    
    
   
    # def predict(self, y_true, y_pred):
    #   train_acc = np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1)) * 100  #y_pred=self.t_activation[-1]
    #   return train_acc
   
   
    def predict(self, y_true, y_pred):
      accuracy = 0
      for i in range(len(y_true)):
        if np.argmax(y_true[i]) == np.argmax(y_pred[i]):
            accuracy += 1
      accuracy /= len(y_true) 
      return accuracy*100

   
   
    def fit(self, x, y, epochs, learning_rate):
      train_accs = []
      for epoch in range(epochs):
          # self.backward(x, y, learning_rate)
          y_pred=self.backward( x,y,learning_rate)
          #if epoch % 50 == 0:  # decrease learning rate every 50 epochs
             #learning_rate *= 0.5
          train_acc = self.predict(y,y_pred)
          train_accs.append(train_acc)
          if epoch % 100 == 0: 
            print('accuracy of',epoch,'is:',train_acc)
      return train_accs  



In [None]:
name=[]
Accuracy=[]

Train model using 2 layers

In [None]:
def NN(x, y, num_of_layers, size_of_layers):
    nn = NeuralNetwork(x_train,y_train_onehot,num_of_layers, size_of_layers)
    accuracy=nn.fit(x_train,y_train_onehot,700,0.9)
    return accuracy
accuracy=NN(x_train, y_train_onehot, 2 ,[20,10])

accuracy of 0 is: 11.651666666666667
accuracy of 100 is: 60.81166666666667
accuracy of 200 is: 70.14333333333333
accuracy of 300 is: 74.67
accuracy of 400 is: 77.21000000000001
accuracy of 500 is: 78.81833333333333
accuracy of 600 is: 80.05499999999999


In [None]:
print('accuracy is:',accuracy[-1])

accuracy is: 81.00166666666667



1- Build NN with only 2 layers => 1 hidden layer and 1 output layer

In [None]:
def NN(x, y, num_of_layers, size_of_layers):
    nn = NeuralNetwork(x_test,y_test_onehot,num_of_layers, size_of_layers)
    accuracy=nn.fit(x_test,y_test_onehot,700,0.9)
    return accuracy
accuracy=NN(x_test, y_test_onehot, 2 ,[20,10])
name.append('1')

accuracy of 0 is: 9.28
accuracy of 100 is: 62.239999999999995
accuracy of 200 is: 71.65
accuracy of 300 is: 75.72
accuracy of 400 is: 77.92999999999999
accuracy of 500 is: 79.41
accuracy of 600 is: 80.43


In [None]:
print('accuracy is:',accuracy[-1])
Accuracy.append(accuracy[-1])

accuracy is: 81.32000000000001


2- Build NN with 3 layers=> 2 hidden layers
Where # of neurons in first layer < # of neurons in second layer and 1 output layer

In [None]:
def NN(x, y, num_of_layers, size_of_layers):
    nn = NeuralNetwork(x_test,y_test_onehot,num_of_layers, size_of_layers)
    accuracy=nn.fit(x_test,y_test_onehot,1000,0.9)
    return accuracy
accuracy=NN(x_test, y_test_onehot, 3 ,[10,20,10]) 
print('accuracy is:',accuracy[-1])
name.append('2')

accuracy of 0 is: 8.27
accuracy of 100 is: 34.25
accuracy of 200 is: 45.410000000000004
accuracy of 300 is: 50.160000000000004
accuracy of 400 is: 52.800000000000004
accuracy of 500 is: 57.34
accuracy of 600 is: 60.019999999999996
accuracy of 700 is: 58.730000000000004
accuracy of 800 is: 61.89
accuracy of 900 is: 62.77
accuracy is: 63.949999999999996


In [None]:
print('accuracy is:',accuracy[-1])
Accuracy.append(accuracy[-1])

accuracy is: 63.949999999999996


3- Build NN with 3 layers=> 2 hidden layers
Where # of neurons in first layer > # of neurons in second layer

In [None]:
def NN(x, y, num_of_layers, size_of_layers):
    nn = NeuralNetwork(x_test,y_test_onehot,num_of_layers, size_of_layers)
    accuracy=nn.fit(x_test,y_test_onehot,1000,0.9)
    return accuracy
accuracy=NN(x_test, y_test_onehot, 3 ,[30,20,10]) 
print('accuracy is:',accuracy[-1])
name.append('3')

accuracy of 0 is: 6.279999999999999
accuracy of 100 is: 37.3
accuracy of 200 is: 43.26
accuracy of 300 is: 44.99
accuracy of 400 is: 45.83
accuracy of 500 is: 46.42
accuracy of 600 is: 55.44
accuracy of 700 is: 58.63
accuracy of 800 is: 59.96
accuracy of 900 is: 60.36
accuracy is: 61.260000000000005


In [None]:
print('accuracy is:',accuracy[-1])
Accuracy.append(accuracy[-1])

accuracy is: 61.260000000000005


In [None]:
Number_Of_Layers=pd.DataFrame(name,columns=['Number_Of_Layers'])
Accuracy=pd.DataFrame(Accuracy,columns=['Accuracy'])

In [None]:
Table=pd.concat([Number_Of_Layers,Accuracy],axis=1)

In [None]:
Table

Unnamed: 0,Number_Of_Layers,Accuracy
0,1,81.32
1,2,63.95
2,3,61.26


In [None]:
# def NN(x, y, num_of_layers, size_of_layers):
#     nn = NeuralNetwork(x_train,y_train_onehot,num_of_layers, size_of_layers)
#     accuracy=nn.fit(x_train,y_train_onehot,600,0.9)
#     return accuracy
# accuracy=NN(x_train, y_train_onehot, 3 ,[10,20,10]) 
# print('accuracy is:',accuracy[-1])

In [None]:
# print('accuracy is:',accuracy[-1])