# *Experiment 8*

# *Back Propogation*

In [2]:
# import the required modules
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import math
import random
import string
import csv
import time

In [1]:
class BackPropagation():
    
    def __init__(self,networkFile="./inputs/structure.xlsx",dataFile="./inputs/backprop.xlsx",outputClasses=1,epochs=1000,alpha=0.1):
        
        # no of output classes of the network
        self.outputClasses = outputClasses
        # number of epochs 
        self.epochs = epochs
        # value of learning rate
        self.alpha = alpha
        # to obtain random seeds
        np.random.seed(time.time())

        # Call the input function to obtain the number of nodes of each layer
        self.networkExtract(networkFile)    
        # Call the input function to obtain the input values from training set
        self.dataExtract(dataFile)        
        
        # number of hidden layers in the network
        self.noHiddenLayers = len(self.nodesPerLayer)
        # add the first and last layer node counts
        self.nodesPerLayer = [self.no_features] + self.nodesPerLayer + [self.outputClasses]
        # total number of layers in the network
        self.totalLayers = len(self.nodesPerLayer)
        
        # initialize the structure for the neural net
        self.initialize()
        
        # to store the cost function for each epoch
        self.totalerror = []
        
    def networkExtract(self,networkFile:str) -> None:
        
        # read the input from excel file
        excel_file = networkFile
        # convert it into a pandas dataframe
        dataframe = pd.read_excel(excel_file)
        # stores the number of nodes in every layer
        self.nodesPerLayer = dataframe.columns.tolist()
    

    def dataExtract(self,dataFile:str) -> None:
        
        # read the input from excel file 
        excel_file = dataFile
        # convert it into a pandas dataframe
        dataframe = pd.read_excel(excel_file)
        # find out the number of features
        self.no_features = len(dataframe.columns) - 1
        # find out the number of inputs
        self.no_rows = len(dataframe.index)
        
        # Convert the dataframe into numpy array for analysis
        self.training_data = np.array([ dataframe.iloc[i,:self.no_features].tolist() for i in range(self.no_rows) ])
        # Obtain the output in a separate numpy column vector
        self.actual_op = np.array([dataframe['y'].tolist()]).T
        
    def initialize(self):
        
        self.weights = []
        # construct the weight matrices
        for i in range(self.totalLayers-1):
            # initialize weights randomly with mean 0 and range [-1, 1]
            self.weights.append(2*np.random.random((self.nodesPerLayer[i]+1,self.nodesPerLayer[i+1])) - 1)            
        
    def run(self):
        for i in range(self.epochs):
            ll = self.singleCycle()
        return ll    
        
    def singleCycle(self):
        
        # list of activations for all layers
        activations = []
        # list of errors for all layers
        errors = []
        # list of partial derivative matrices
        delta = []
        
        # Forward Propogation
        
        # initialize with training input
        inp = np.hstack((np.ones((self.training_data.shape[0], 1)), self.training_data))
        activations.append(inp)
        
        # Propogation for rest of the layers
        for i in range(self.totalLayers-2):
            newinp = np.hstack((np.ones((self.training_data.shape[0], 1)),self.sigmoid(np.dot(inp,self.weights[i]))))
            activations.append(newinp)
            inp = newinp
        # Do last layer separately to avoid adding bias term
        activations.append(self.sigmoid(np.dot(inp,self.weights[self.totalLayers-2])))
        
        # Backward Propogation
        err = activations[self.totalLayers-1] - self.actual_op
        self.totalerror.append(0.5*((err.sum())**2))
        
        errors.append(err)
        for i in range(self.totalLayers-2,0,-1):
            newerr = activations[i][:,1:] * (1 - activations[i][:,1:]) * np.dot(err,self.weights[i].T[:,1:])
            errors.append(newerr)
            err = newerr
        
        # Calculate the partial derivatives
        for i in range(self.totalLayers-1):
            delta.append(activations[0][:,:,np.newaxis] * errors[self.totalLayers-2-i][:,np.newaxis,:])
        
        # Take the average of the partial derivatives
        for i in range(self.totalLayers-1):
            delta[i] = np.average(delta[i],axis=0)
        
        # Update the weights
        for i in range(self.totalLayers-1):
            self.weights[i] += -self.alpha * delta[i]
            
        return activations[self.totalLayers-1]
    
    def forwardPropogate(self,activations:list,inparr:list):
                
        curinp = inparr
        for i in range(self.totalLayers-1):
                curinp = self.weights
                activations.append(np.dot(inparr))
                
        
    def sigmoid(self,x:float, derivative=False) -> float:
        
        if (derivative == True):
            return x * (1 - x)
        else:
            return 1 / (1 + np.exp(-x))
            

In [178]:
a = np.asarray([1,2,3])
a.sum() 

6

In [182]:
backprop = BackPropagation("../inputs/structure.xlsx","../inputs/backprop.xlsx")
print(backprop.run())

[[0.37851865]
 [0.7740483 ]
 [0.35766052]
 [0.76219351]
 [0.29301553]
 [0.47544113]]


In [185]:
suma = 0
for i in backprop.totalerror:
    suma += i
print(suma)    

9.138018559672842


In [None]:
import numpy as np

# define the sigmoid function
def sigmoid(x):
    return x * (1 - x)
    
# choose a random seed for reproducible results
np.random.seed(1)

# learning rate
alpha = .1

# number of nodes in the hidden layer
num_hidden = 3

# inputs
X = np.array([  
    [0, 0, 1],
    [0, 1, 1],
    [1, 0, 0],
    [1, 1, 0],
    [1, 0, 1],
    [1, 1, 1],
])

# outputs
# x.T is the transpose of x, making this a column vector
y = np.array([[0, 1, 0, 1, 1, 0]]).T

# initialize weights randomly with mean 0 and range [-1, 1]
# the +1 in the 1st dimension of the weight matrices is for the bias weight
hidden_weights = 2*np.random.random((X.shape[1] + 1, num_hidden)) - 1
output_weights = 2*np.random.random((num_hidden + 1, y.shape[1])) - 1

# number of iterations of gradient descent
num_iterations = 10000

# for each iteration of gradient descent
for i in range(num_iterations):

    # forward phase
    # np.hstack((np.ones(...), X) adds a fixed input of 1 for the bias weight
    input_layer_outputs = np.hstack((np.ones((X.shape[0], 1)), X))
    hidden_layer_outputs = np.hstack((np.ones((X.shape[0], 1)), sigmoid(np.dot(input_layer_outputs, hidden_weights))))
    output_layer_outputs = np.dot(hidden_layer_outputs, output_weights)

    # backward phase
    # output layer error term
    output_error = sigmoid(output_layer_outputs) - y
    # hidden layer error term
    # [:, 1:] removes the bias term from the backpropagation
    hidden_error = hidden_layer_outputs[:, 1:] * (1 - hidden_layer_outputs[:, 1:]) * np.dot(output_error, output_weights.T[:, 1:])

    # partial derivatives
    hidden_pd = input_layer_outputs[:, :, np.newaxis] * hidden_error[: , np.newaxis, :]
    output_pd = hidden_layer_outputs[:, :, np.newaxis] * output_error[:, np.newaxis, :]

    # average for total gradients
    total_hidden_gradient = np.average(hidden_pd, axis=0)
    total_output_gradient = np.average(output_pd, axis=0)

    # update weights
    hidden_weights += - alpha * total_hidden_gradient
    output_weights += - alpha * total_output_gradient

# print the final outputs of the neural network on the inputs X
print("Output After Training: \n{}".format(output_layer_outputs))

In [None]:
class BackPropagation():
    
    def __init__(self,networkFile,dataFile,outputClasses=1,epochs=5000,alpha=0.1):
        
        # no of output classes of the network
        self.outputClasses = outputClasses
        # number of epochs 
        self.epochs = epochs
        # value of learning rate
        self.alpha = alpha
        # choose a random seed for reproducible results
        np.random.seed(1)

        # Call the input function to obtain the number of nodes of each layer
        self.networkExtract(networkFile)    
        # Call the input function to obtain the input values
        self.dataExtract(dataFile)        
        
        # number of hidden layers in the network
        self.noHiddenLayers = len(self.nodesPerLayer)
        # add the first and last layer node counts
        self.nodesPerLayer = [self.no_features] + self.nodesPerLayer + [self.outputClasses]
        # total number of layers in the network
        self.totalLayers = len(self.nodesPerLayer)
        
        # initialize the structure for the neural net
        self.initialize()
        
    def networkExtract(self,networkFile:str) -> None:
        
        # read the input from excel file
        excel_file = networkFile
        # convert it into a pandas dataframe
        dataframe = pd.read_excel(excel_file)
        # stores the number of nodes in every layer
        self.nodesPerLayer = dataframe.columns.tolist()
    

    def dataExtract(self,dataFile:str) -> None:
        
        # read the input from excel file 
        excel_file = dataFile
        # convert it into a pandas dataframe
        dataframe = pd.read_excel(excel_file)
        # find out the number of features
        self.no_features = len(dataframe.columns) - 1
        # find out the number of inputs
        self.no_rows = len(dataframe.index)
        
        # Convert the dataframe into numpy array for analysis
        self.training_data = np.array([ dataframe.iloc[i,:self.no_features].tolist() for i in range(self.no_rows) ])
        # Obtain the output in a separate numpy column vector
        self.actual_op = np.array([dataframe['y'].tolist()]).T
        
    def initialize(self):
        
        self.weights = []
        # construct the weight matrices
        for i in range(self.totalLayers-1):
            # initialize weights randomly with mean 0 and range [-1, 1]
            self.weights.append(2*np.random.random((self.nodesPerLayer[i]+1,self.nodesPerLayer[i+1])) - 1)            
        
    def run(self):
        for i in range(self.epochs):
            ll = self.singleCycle()
        return ll    
        
    def singleCycle(self):
        
        # list of activations for all layers
        activations = []
        # list of errors for all layers
        errors = []
        # list of partial derivative matrices
        delta = []
        
        # Forward Propogation
        
        # initialize with training input
        inp = np.hstack((np.ones((self.training_data.shape[0], 1)), self.training_data))
        activations.append(inp)
        
        # Propogation for rest of the layers
        for i in range(self.totalLayers-2):
            newinp = np.hstack((np.ones((self.training_data.shape[0], 1)),self.sigmoid(np.dot(inp,self.weights[i]))))
            activations.append(newinp)
            inp = newinp
        # Do last layer separately to avoid adding bias term
        activations.append(self.sigmoid(np.dot(inp,self.weights[self.totalLayers-2])))
        
        # Backward Propogation
        err = activations[self.totalLayers-1] - self.actual_op
        errors.append(err)
        for i in range(self.totalLayers-2,0,-1):
            newerr = activations[i][:,1:] * (1 - activations[i][:,1:]) * np.dot(err,self.weights[i].T[:,1:])
            errors.append(newerr)
            err = newerr
        
        # Calculate the partial derivatives
        for i in range(self.totalLayers-1):
            delta.append(activations[0][:,:,np.newaxis] * errors[self.totalLayers-2-i][:,np.newaxis,:])
        
        # Take the average of the partial derivatives
        for i in range(self.totalLayers-1):
            delta[i] = np.average(delta[i],axis=0)
        
        # Update the weights
        for i in range(self.totalLayers-1):
            self.weights[i] += -self.alpha * delta[i]
            
        return activations[self.totalLayers-1]
    
    def forwardPropogate(self,activations:list,inparr:list):
                
        curinp = inparr
        for i in range(self.totalLayers-1):
                curinp = self.weights
                activations.append(np.dot(inparr))
                
        
    def sigmoid(self,x:float, derivative=False) -> float:
        
        if (derivative == True):
            return x * (1 - x)
        else:
            return 1 / (1 + np.exp(-x))
            