**Libraries**

In [16]:
import numpy as np
from keras.datasets import mnist
import warnings
warnings.filterwarnings('ignore')

**Load MNIST**

In [17]:

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.array( x_train,dtype = float )
x_test = np.array( x_test,dtype = float )

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784) 


**Standardization**

In [18]:

y_train=y_train.reshape(60000,1) 
y_test=y_test.reshape(10000,1)

x_train= (x_train - np.mean(x_train))/ np.std(x_train)
x_test= (x_test - np.mean(x_test))/ np.std(x_test)


**Sigmoid**

In [19]:

def sigmoid(z,der= False):#i need derivative in chain rule...
  if der:
     return (np.exp(-z))/((np.exp(-z)+1)**2)

  return (1/(1+np.exp(-z)))


**Parameters initialization function**

In [20]:
def initialize_parameters(size_of_layers):
    parameters = {}
    num_of_layers = len(size_of_layers)
     
    for i in range(1, num_of_layers):
        # store w,b for each layer
        # dict includes weights matrices with the number of layers
        # dict includes biases with numb er of layers   
        parameters['W' + str(i)] = np.random.randn(size_of_layers[i], size_of_layers[i-1])   * np.sqrt(1. / size_of_layers[i])# when neurons increased i want lower weights
        parameters['B' + str(i)] = np.random.randn(size_of_layers[i], 1)     * np.sqrt(1. / size_of_layers[i])
    
    return parameters


**Forward Propagation**

In [21]:

def forwardprop(xtrain,parameters,size_of_layers):
  
  parameters['A0']= xtrain # features 
  parameters['A0']=parameters['A0'].reshape(784,1) # to avoid (784,)
  num_of_layers = len(size_of_layers)

  for i in range(1,num_of_layers):
    parameters['Z'+str(i)] = np.dot(parameters['W'+str(i)],parameters['A'+str(i-1)])+parameters['B'+str(i)]
    parameters['A'+str(i)] = sigmoid(parameters['Z'+str(i)])
    
  return parameters 
  

**Backward Propagation**

In [22]:
#outer 18 23
def backwardprop(ytrain,output,parameters,size_of_layers):
  changes = {} # dw + db  
  num_of_layers = len(size_of_layers)

  #(a,b)*(b,d) = (a,d) dot product
  #(a,b)*(c,d) = (a,d) outer
    
  # dl/dw = dl/da * da/z * a_prev  dz/dw      
  # calculate output layer error first or dz
  # dervative of mean square error has a summation but as i am calc the mse for each neuron individually no need for summation
  # dl/da = 2*(a-y)/len(a) 
  # dz= 2*(a-y)/len(a)*sigmoid(z) # where sigmoid(z) without derivative is the A of the layer but i need derivative of sigmoid(z) 
  dz = 2*(output-ytrain)/output.shape[0]*sigmoid(parameters['Z'+str(num_of_layers-1)],der=True)

  # dl/dw = dz * a_ofprev, donot forget eta later
  #outer product is for shapes (10,1) (20,1)  -->> (10,1)   1 cuz of one sample
  changes['W'+str(num_of_layers-1)]=np.outer(dz, parameters['A'+str(num_of_layers-2)])
  changes['B'+str(num_of_layers-1)]=np.sum(dz, axis=1, keepdims=True)

  for i in range(num_of_layers-2, 0, -1):
     dz=np.dot(parameters['W'+str(i+1)].T,dz)*sigmoid(parameters['Z'+str(i)],der=True)
     changes['W'+str(i)]=np.outer(dz,parameters['A'+str(i-1)])
     changes['B'+str(i)]=np.sum(dz, axis=1, keepdims=True) 
  return changes


**Update parameters function**

In [23]:

def update_parameters(changes,parameters,eta):
    # I used stochastic gradient descent
    for key,value in changes.items():
      #because i stored all A and Z in this dictionary not only w&b
      #so i need to update only w,b       
      if(key[0]=='W' or key[0]=='B'): 
         parameters[key] -= eta * value
      
    return parameters


**Prediction function**

In [24]:

def predict(parameters,size_of_layers):
    num_of_layers=len(size_of_layers)

    predictions=[]
    for j in range(len(y_test)):
          input = np.asfarray(x_test[j])
          target = np.zeros(10) + 0.01
          target[y_test[j]] = 0.99
          parameters = forwardprop(input,parameters,size_of_layers)
          output = parameters['A'+str(num_of_layers-1)]
          
          ypred= np.argmax(output)
          predictions.append(ypred==np.argmax(target))

    return predictions 


**Neural Network function**

In [25]:

def NN (x, y, num_of_layers, size_of_layers): #size_of_layers= [784,20,10]   
    
    # add input layers and there sizes 
    num_of_layers+=1
    size_of_layers.insert(0,784)
    
    epochs = 15
    eta = 0.1
    
    # dicitionary of paramerters 
    parameters = initialize_parameters(size_of_layers)
    
    for i in range(epochs):
        #pass sample by sample to neural network
        for j in range(len(x)):
          #take sample as input
          input = np.asfarray(x[j])
          input = input.reshape(784,1)
          #take y as target
          # oneHot encoding  
          target = np.zeros(10) + 0.01
          target[y[j]] = 0.99
          target=target.reshape(10,1)
          
          # forward path 
          parameters = forwardprop(input,parameters,size_of_layers)
          output = parameters['A'+str(num_of_layers-1)]
  
          # backward path
          changes = backwardprop(target,output,parameters,size_of_layers)

          # update parameters with stochastic gradient descent
          parameters = update_parameters(changes,parameters,eta)
    predictions = predict(parameters,size_of_layers)      
    return predictions


**Accuracy Function**

In [26]:

def accuracy(predictions):
    t=0
    for x in predictions:
       if x :
         t+=1
#np.mean(predictions) --> this gives an error of un being callable, so i stored it in variable first
    return t/len(predictions) 


**Run 3 Neural Networks dynamically**

In [27]:
num_of_layers = [2,3,3]
size_of_layers = [[30,10],[20,30,10],[80,50,10]]

predictions1 = NN(x_train, y_train, num_of_layers[0], size_of_layers[0])
predictions2 = NN(x_train, y_train, num_of_layers[1], size_of_layers[1])
predictions3 = NN(x_train, y_train, num_of_layers[2], size_of_layers[2])


**Evaluate Neural Networks**

In [28]:
accuracy1=accuracy(predictions1)
print("Accuracy of first Neural Network (1 hidden layer with neurons=[30] ):", str(np.round(accuracy1*100,2))+"%")

Accuracy of first Neural Network (1 hidden layer with neurons=[30] ): 94.89%


In [31]:
accuracy2=accuracy(predictions2)
print("Accuracy of second Neural Network (2 hidden layers with neurons=[20,30]):",str(np.round(accuracy2*100,2))+"%")

Accuracy of second Neural Network (2 hidden layers with neurons=[20,30]): 93.71%


In [32]:
accuracy3=accuracy(predictions3)
print("Accuracy of third Neural Network (2 hidden layers with neurons=[80,50]):",str( np.round(accuracy3*100,2))+"%" )

Accuracy of third Neural Network (2 hidden layers with neurons=[80,50]): 97.1%
