<h1 style = "font-size:3rem;color:darkcyan"> Training a Neural Network </h1>

implementing backward propagation from scratch


In [10]:
# import libraries
import numpy as np
import math
from random import random
import matplotlib.pyplot as plt

In [60]:
class ANN:
    
    def __init__(self,n_inputs = 3, hidden_layer_sizes = [3,4], n_targets = 1):
        self.n_inputs = n_inputs
        self.hidden_layer_sizes = hidden_layer_sizes
        self.n_targets = n_targets
        
        layers = [self.n_inputs] + self.hidden_layer_sizes + [self.n_targets]
        print(layers)
       
        self.weights = [np.random.rand(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.activations = [np.zeros(layers[i]) for i in range(len(layers))]
        self.derivatives = [np.zeros((layers[i], layers[i+1])) for i in range(len(layers)-1)]
        
    def _sigmoid(self, h):
        a = 1 / (1 + math.e**(-h))
        return a
    
    def _sigmoid_dh(self, x):
        da = x - (1.0 - x)
        return da
    
    def _activate(self, inputs, W):
        h = np.dot(inputs, W) # summation

        activation = self._sigmoid(h) # modulation
        return activation
    
    def _forward_propagate(self, inputs):
        activations = inputs
        self.activations[0] = activations
        for i in range(len(self.weights)):
            activations = self._activate(activations, self.weights[i])
            self.activations[i+1] = activations
        return activations
    
    def _back_propagate(self, error):
        # calculate error gradient
        #error_gradient = self.a[len(weights)] - targets[len(weights)]
        
        for i in reversed(range(len(self.weights))):
            activations = self.activations[i+1]
            delta = error * self._sigmoid_dh(activations)
            
            delta_reshaped =  delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i]
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
            
            #print(current_activations.shape)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)
            
            # print(f'W[{i}] = {self.derivatives[i]}')
        return error
         
    def _gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] += self.derivatives[i] * learning_rate
   
    def _mse(self, target, output):
        return np.average((target - output)**2)
    
    def train_neural_network(self, inputs, targets, learning_rate, max_epochs):
        #if np.array(inputs).size != self.n_inputs:
         #   print("Number of inputs does not match the input layer!")
          #  return None
        
        #if np.array(targets).size != self.n_targets:
       #     print("Number of targets does not match the output layer!")
         #   return None
        
        # repeat for max_epochs or when target error rate is achieved
        for i in range(max_epochs):
            sum_error = 0
            for input,target in zip(inputs, targets):
                # propagate forward
                output = self._forward_propagate(input)

                # calculate new error
                error = target - output

                # propagate backward
                self._back_propagate(error)

                # gradient descent
                self._gradient_descent(learning_rate)

                # calc sum error & print
                sum_error += self._mse(target, output)
            
            # print error after each epoch
            print(f'Error = {sum_error / len(inputs)} at epoch {i}')
        
        print(f'number of epochs exceeded ({i})')
        return self._forward_propagate(inputs)

In [61]:
ann = ANN(2, [5], 1)

[2, 5, 1]


In [62]:
learning_rate = 0.1
epochs = 50

inputs = np.array([[random() / 2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in inputs])

In [63]:
out = ann.train_neural_network(inputs, targets, learning_rate, epochs)

Error = 0.04519860937928883 at epoch 0
Error = 0.04138618381192187 at epoch 1
Error = 0.041320438196332306 at epoch 2
Error = 0.04126599728264042 at epoch 3
Error = 0.04120711833276096 at epoch 4
Error = 0.041142325244119655 at epoch 5
Error = 0.041070579038928266 at epoch 6
Error = 0.0409906563794502 at epoch 7
Error = 0.04090108427655711 at epoch 8
Error = 0.04080008659274347 at epoch 9
Error = 0.0406855213594643 at epoch 10
Error = 0.04055480881795848 at epoch 11
Error = 0.04040485322780501 at epoch 12
Error = 0.040231966369009636 at epoch 13
Error = 0.040031809354209406 at epoch 14
Error = 0.03979938376797497 at epoch 15
Error = 0.039529124809992565 at epoch 16
Error = 0.039215176964025523 at epoch 17
Error = 0.038851957953433894 at epoch 18
Error = 0.03843511484198416 at epoch 19
Error = 0.037962901555808566 at epoch 20
Error = 0.03743780936749035 at epoch 21
Error = 0.03686796936556807 at epoch 22
Error = 0.036267582424958526 at epoch 23
Error = 0.03565573330835924 at epoch 24
Er

In [59]:
print(targets[10])
print(out[10])

[0.59277291]
[0.]


In [None]:
# predict data
inputs = [0.1, 0.3]
output = [0.4]
y_pred = ann._forward_propagate(inputs)
print(f'predicted value = {y_pred}')