### BackPropagation Algorithm

In [1]:
import numpy as np
from numpy.linalg import norm
import copy
import os

In [3]:
def relu(x):  #Adds non-linearity and removes negative values, helping neural networks model complex patterns.
    return x*(np.sign(x)+1.)/2.
def sigmoid(x): #any real number to the range (0, 1)
    return 1./(1.+np.exp(-x))
def softmax(x): #converts a vector of values into a probability distribution
    return np.exp(x)/sum(np.exp(x))
def mynorm(z): #Root Mean Square (RMS)
    return np.sqrt(np.mean(z**2))

###### A three layer feed-forward network as in the Example given in Chapter 4. On each pass through the training loop, the training input is fed forward through the network to calculate the loss, then the gradient of the loss with respect to each of the weights is calculated and the weights updated for the next pass through the loop.

In [13]:
#Y->training output
#Xtrain->training input
#Xpred->input for prediction

def myANN(Y, Xtrain, Xpred, w01, w02, w03, b01, b02, b03):
    #Initialization
    w1 = copy.copy(w01)
    w2 = copy.copy(w02)
    w3 = copy.copy(w03)
    b1 = copy.copy(b01)
    b2 = copy.copy(b02)
    b3 = copy.copy(b03)

    k=1
    change = 999

    #training loop
    while(change>0.001 and k<201):
        print('Iteration:', k)

        #start feedforward
        z1 = sigmoid(w1 @ Xtrain + b1)  #hidden layer 1
        z2 = sigmoid(w2 @ z1 + b2)      #hidden layer 2
        Yhat = w3 @ z2 + b3             #output layer
        loss = -Y @ np.log(Yhat)        #cross-entropy loss
        print("current loss:", loss)

        ##find gradient of loss w.r.t. each weight
        #output layer
        dLdb3 = Yhat - Y 
        dLdW3 = np.outer(dLdb3, z2)
        # Hidden Layer 2
        dLdb2 = (w3.T @ (dLdb3)) * z2 * (1-z2)
        dLdW2 = np.outer(dLdb2,z1)
        # Hidden Layer 1
        dLdb1 = (w2.T @ (dLdb2)) * z1 * (1-z1)
        dLdW1 = np.outer(dLdb1, Xtrain)
        
        ## Update Weights by Back Propagation
        # Output Layer
        b3 -= dLdb3 # (learning rate is one)
        w3 -= dLdW3
        # Hidden Layer 2
        b2 -= dLdb2
        w2 -= dLdW2
        # Hidden Layer 1
        b1 -= dLdb1
        w1 -= dLdW1

        change = norm(dLdb1)+norm(dLdb2)+norm(dLdb3)+norm(dLdW1)+norm(dLdW2)+norm(dLdW3)
        k += 1
        
    Z1pred = w1 @ Xpred + b1
    Z2pred = w2 @ sigmoid(Z1pred) + b2
    Z3pred = w3 @ sigmoid(Z2pred) + b3
    Ypred = softmax(Z3pred)
    print("")
    print("Summary")
    print("Target Y \n", Y)
    print("Fitted Ytrain \n", Yhat)
    print("Xpred\n", Xpred)
    print("Fitted Ypred \n", Ypred)
    print("Weight Matrix 1 \n", w1)
    print("Bias Vector 1 \n", b1)
    print("Weight Matrix 2 \n", w2)
    print("Bias Vector 2 \n", b2)
    print("Weight Matrix 3 \n", w3)
    print("Bias Vector 3 \n", b3)


In [14]:
## Initial weights and biases
W0_1 = np.array([[0.1,0.3,0.7], [0.9,0.4,0.4]])
b_1 = np.array([1.,1.])

W0_2 = np.array([[0.4,0.3], [0.7,0.2]])
b_2 = np.array([1.,1.])

W0_3 = np.array([[0.5,0.6], [0.6,0.7], [0.3,0.2]])
b_3 = np.array([1.,1.,1.]) 

In [15]:
#training data
X_train = np.array([0.1, 0.7, 0.3])
YY      = np.array([1., 0., 0.])
X_pred  = X_train

In [17]:
myANN(YY, X_train, X_pred, W0_1, W0_2, W0_3, b_1, b_2, b_3)

Iteration: 1
current loss: -0.6539992473098665
Iteration: 2
current loss: nan
Iteration: 3
current loss: -0.4805511383308699
Iteration: 4
current loss: nan
Iteration: 5
current loss: -0.007541664749705356
Iteration: 6
current loss: nan

Summary
Target Y 
 [1. 0. 0.]
Fitted Ytrain 
 [ 9.99707392e-01 -2.66237750e-04 -5.08312047e-05]
Xpred
 [0.1 0.7 0.3]
Fitted Ypred 
 [0.57611829 0.2119417  0.21194001]
Weight Matrix 1 
 [[0.09153201 0.24072407 0.67459603]
 [0.90750974 0.45256817 0.42252921]]
Bias Vector 1 
 [0.91532011 1.07509738]
Weight Matrix 2 
 [[-0.92683361 -1.06146774]
 [-0.65490402 -1.19372961]]
Bias Vector 2 
 [-0.67822254 -0.71821671]
Weight Matrix 3 
 [[ 0.51128657  0.60029999]
 [ 0.41104025  0.44653699]
 [ 0.10371677 -0.05263121]]
Bias Vector 3 
 [ 0.89457823 -0.08124117 -0.00440959]


  loss = -Y @ np.log(Yhat)        #cross-entropy loss
