In [58]:
import random, math
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def swish(x):
    return x*sigmoid(x)

def relu(x):
    return np.maximum(0,x)

def activation_function(z,act):
    if act=="sigmoid":
        return sigmoid(z)
    elif act == "swish":
        return swish(z)
    elif act == "relu":
        return relu(z)
    elif act=="tanh":
        return np.tanh(z)
    
class Layer():
    def __init__(self,input_size,output_size,act=None):
        self.size = input_size
        self.output_size = output_size
        self.activation = act
        self.weights = np.random.rand(self.output_size,self.size)*2-1      
        self.bias = np.random.rand(output_size,1)*2-1
        self.A = np.random.rand(output_size,1)*2-1
        self.Z = np.random.rand(output_size,1)*2-1
        
    def forward_propagation(self, input_data):
        self.Z = np.dot(self.weights,input_data)+self.bias
        self.A = activation_function(self.Z, self.activation)
        
    def derMSE(self, target):
        return 2*(self.A - target)
    
    def descent(self, input_data, gradient, learningRate):
        
        if(self.activation == "tanh"):
            derZ = 1 - np.power(self.A, 2)
        elif(self.activation == "swish"):
            derZ = swish(self.Z) + sigmoid(self.Z) * (1 - swish(self.Z))
        reps = (self.weights.shape[0], 1)
        derWeights = np.tile(input_data.transpose(), reps)
        """derWeights is a matrix with derivatives of Z WRT weights, which is transposed inputs, 
        repeated in rows n-times, where n is number of neurons.
        
    Example:
    
        input_data = [
            [2],
            [1],
            [0]
        ]
        
        derWeights = [
            [2, 1, 0],
            [2, 1, 0],
            [2, 1, 0],
            ... n-rows
        ]
        
        """

        
        """ The below part adds the gradient to the derivative of A WRT input_data and passes this new
        gradient through return, to be used as the gradient for next layer's descent.
            dA/dX is made with 2 steps: adding the backprop gradient to the derivative of A WRT Z and then adding
        the derivative of Z WRT input_data (chain rule).
        
        1.
        Since A is a matrix shaped Nx1, where N is the number of outputs, the receiving gradient from the upper layer
        must be the same shape. Therefore we can multiply the gradient and the derivative together element-wise.
        
    Example:
        
         derZ = [3x1]
         gradient = [3x1]
         firstGrad = [3x1] *(elementwise) [3x1] = [3x1]
        
        """
        firstGrad = np.multiply(gradient, derZ)
        
        """
        2.
        What's left is adding the gradient of Z WRT input_data. 
        
        This turns out to be the weights matrix. Now we have to multiply the firstGrad gradient to these weights 
        element-wise but since the firstGrad is Nx1 shape and the weights are NxM, where M are the features, 
        we need to reshape the gradient matrix to match the weights matrix by cloning gradient's columns:
        """
        secondGrad = np.tile(firstGrad, (1, self.weights.shape[1]))
        
        """
        Finally we multiply (E-W) secondGrad to the weights matrix:
        """
        finalGrad = np.multiply(firstGrad, secondGrad)
        
        """But because same inputs are multiplied with many weights, we can sum those weights together. It turns out
        that we can sum columns to do that"""
        
        finalGrad = np.sum(finalGrad, axis=0, keepdims=True)
        derBias = 1
        
        weightGrad = np.multiply(derWeights, np.tile(firstGrad, (1, self.weights.shape[1])))
        self.weights = self.weights - learningRate * weightGrad
        self.bias = self.bias - learningRate * firstGrad
        
        """We return transposed matrix, because we desire inputs with a shape of Nx1 and right now finalGrad is 
        transposed"""
        return finalGrad.transpose()
    
        
class NeuralNetwork():
    def __init__(self):
        self.layers=[]
        self.epochs=10
        self.learning_rate = 0.008
    
    def add_layer(self,input_size,output_size,activation=None):
        new_layer = Layer(input_size,output_size,activation)
        self.layers.append(new_layer)
        
    def forward_propagation(self,layer_no):
        current_layer = self.layers[layer_no-1]
        prev_layer = self.layers[layer_no-2]
        act = current_layer.activation
        input_data = prev_layer.A
        self.Z = np.dot(weights,input_data)+self.bias
        result = activation_function(self.Z,act)
        current_layer.A = result
        return result
    
    def full_forward_propagation(self, input_data):
        print("layer 0 forward_propagation")
        
        self.layers[0].forward_propagation(input_data)
        for i in range(1, len(self.layers)):
            print("layer " + str(i) + " forward_propagation")
            
            self.layers[i].forward_propagation(self.layers[i-1].A)
        return self.layers[len(self.layers)-1].A
    
    def back_propagation(self, target):
        gradient = self.layers[len(self.layers)-1].derMSE(target)
        for i in range(0, len(self.layers)):
            index = len(self.layers)-1 - i
            print("Layer " + str(index) + " backpropagation")
            gradient = self.layers[index].descent(self.layers[index-1].A, gradient, self.learning_rate)


test_data = np.array([
    [3],
    [2],
    [1]
])

target_data = np.array([
    [5],
    [5],
    [5]
])

network = NeuralNetwork()
network.add_layer(3, 5, "tanh")
network.add_layer(5, 5, "tanh")
network.add_layer(5, 3, "swish")

for i in range(0, 800):
    print(str(network.full_forward_propagation(test_data)))
    network.back_propagation(target_data)
    

layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[1.76207983]
 [0.31156574]
 [1.33979142]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 1.06116937]
 [-0.27712197]
 [ 0.64502706]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 0.8499701 ]
 [-0.21430768]
 [ 0.04108096]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 1.20416421]
 [-0.20731584]
 [ 0.16126331]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 1.6000078 ]
 [-0.20208856]
 [ 0.36537679]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropa

Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 4.99739005]
 [-0.04913089]
 [ 4.99621142]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 4.9976533 ]
 [-0.04844861]
 [ 4.99659326]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 4.99789002]
 [-0.04778331]
 [ 4.99693663]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 4.9981029 ]
 [-0.04713442]
 [ 4.9972454 ]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 4.99829432]
 [-0.04650137]
 [ 4.99752307]]
Layer 2 backpropagation
Layer 1 backpr

Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000072]
 [-0.01962105]
 [ 5.00000041]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000072]
 [-0.01949712]
 [ 5.00000042]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000071]
 [-0.01937467]
 [ 5.00000042]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000071]
 [-0.01925368]
 [ 5.00000042]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.0000007 ]
 [-0.01913412]
 [ 5.00000042]]
Layer 2 backpropagation
Layer 1 backpr

 [ 5.00000025]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000037]
 [-0.01356712]
 [ 5.00000024]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000037]
 [-0.01350583]
 [ 5.00000024]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000037]
 [-0.01344507]
 [ 5.00000024]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000036]
 [-0.01338483]
 [ 5.00000024]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000036]
 [-0.

layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000018]
 [-0.00940932]
 [ 5.00000012]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000018]
 [-0.00937901]
 [ 5.00000012]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000018]
 [-0.00934889]
 [ 5.00000012]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000018]
 [-0.00931896]
 [ 5.00000012]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000017]
 [-0.00928922]
 [ 5.00000011]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_pr

Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000012]
 [-0.00780982]
 [ 5.00000008]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000012]
 [-0.00778868]
 [ 5.00000008]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000012]
 [-0.00776766]
 [ 5.00000008]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000012]
 [-0.00774674]
 [ 5.00000008]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000012]
 [-0.00772593]
 [ 5.00000008]]
Layer 2 backpropagation
Layer 1 backpr

layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000006]
 [-0.00554449]
 [ 5.00000004]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000006]
 [-0.00553362]
 [ 5.00000004]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000006]
 [-0.00552279]
 [ 5.00000004]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000006]
 [-0.00551201]
 [ 5.00000004]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000006]
 [-0.00550126]
 [ 5.00000004]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_pr

 [ 5.00000003e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000005e+00]
 [-4.79723067e-03]
 [ 5.00000003e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000005e+00]
 [-4.78903070e-03]
 [ 5.00000003e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000005e+00]
 [-4.78085802e-03]
 [ 5.00000003e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000005e+00]
 [-4.77271249e-03]
 [ 5.00000003e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagat

Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-4.12390057e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-4.11779487e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-4.11170679e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-4.10563627e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.0000000

 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-3.82822059e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-3.82294024e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-3.81767410e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[ 5.00000003e+00]
 [-3.81242212e-03]
 [ 5.00000002e+00]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagat