In [1]:
import random, math
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def swish(x):
    return x*sigmoid(x)

def relu(x):
    return np.maximum(0,x)

def activation_function(z,act):
    if act=="sigmoid":
        return sigmoid(z)
    elif act == "swish":
        return swish(z)
    elif act == "relu":
        return relu(z)
    elif act=="tanh":
        return np.tanh(z)
    
class Layer():
    def __init__(self,input_size,output_size,act=None):
        self.size = input_size
        self.output_size = output_size
        self.activation = act
        self.weights = np.random.uniform(-1,1,(self.output_size,self.size))    
        self.bias = np.random.uniform(-1,1,(output_size,1))
        self.A = np.random.uniform(-1,1,(output_size,1))
        self.Z = np.random.uniform(-1,1,(output_size,1))
        
    def forward_propagation(self, input_data):
        self.Z = np.dot(self.weights,input_data)+self.bias
        self.A = activation_function(self.Z, self.activation)
        
    def derMSE(self, target):
        return 2*(self.A - target)
    
    def descent(self, input_data, gradient, learningRate):
        
        if(self.activation == "tanh"):
            derZ = 1 - np.power(self.A, 2)
        elif(self.activation == "swish"):
            derZ = swish(self.Z) + sigmoid(self.Z) * (1 - swish(self.Z))
        reps = (self.weights.shape[0], 1)
        derWeights = np.tile(input_data.transpose(), reps)        # Size -> a(L-1)*a(L)
        """derWeights is a matrix with derivatives of Z WRT weights, which is transposed inputs, 
        repeated in rows n-times, where n is number of neurons.
        
    Example:
    
        input_data = [
            [2],
            [1],
            [0]
        ]
        
        derWeights = [
            [2, 1, 0],
            [2, 1, 0],
            [2, 1, 0],
            ... n-rows
        ]
        
        """

        
        """ The below part adds the gradient to the derivative of A WRT input_data and passes this new
        gradient through return, to be used as the gradient for next layer's descent.
            dA/dX is made with 2 steps: adding the backprop gradient to the derivative of A WRT Z and then adding
        the derivative of Z WRT input_data (chain rule).
        
        1.
        Since A is a matrix shaped Nx1, where N is the number of outputs, the receiving gradient from the upper layer
        must be the same shape. Therefore we can multiply the gradient and the derivative together element-wise.
        
    Example:
        
         derZ = [3x1]
         gradient = [3x1]
         firstGrad = [3x1] *(elementwise) [3x1] = [3x1]
        
        """
        firstGrad = np.multiply(gradient, derZ)             # ∂C/∂a(L) * ∂(act)/∂Z    a(L)*1 * a(L)*1 = a(L)*1
        
        """
        2.
        What's left is adding the gradient of Z WRT input_data. 
        
        This turns out to be the weights matrix. Now we have to multiply the firstGrad gradient to these weights 
        element-wise but since the firstGrad is Nx1 shape and the weights are NxM, where M are the features, 
        we need to reshape the gradient matrix to match the weights matrix by cloning gradient's columns:
        """
        secondGrad = np.tile(firstGrad, (1, self.weights.shape[1]))          #Size -> a(L) * rep a(L)
        
        """
        Finally we multiply (E-W) secondGrad to the weights matrix:
        """
        finalGrad = np.multiply(self.weights,secondGrad)         #Size -> a(L-1)*a(L) * a(L)*a(L) = a(L-1)*a(L)
        
        """But because same inputs are multiplied with many weights, we can sum those weights together. It turns out
        that we can sum columns to do that"""
        
        finalGrad = np.sum(finalGrad, axis=0, keepdims=True)       #Size -> 1 * a(L) 
        derBias = 1
        
        weightGrad = np.multiply(derWeights, np.tile(firstGrad, (1, self.weights.shape[1])))
        self.weights = self.weights - learningRate * weightGrad
        self.bias = self.bias - learningRate * firstGrad
        
        """We return transposed matrix, because we desire inputs with a shape of Nx1 and right now finalGrad is 
        transposed"""
        return finalGrad.transpose()     # Size -> a(L)
    
        
class NeuralNetwork():
    def __init__(self):
        self.layers=[]
        self.epochs=10
        self.learning_rate = 0.008
    
    def add_layer(self,input_size,output_size,activation=None):
        new_layer = Layer(input_size,output_size,activation)
        self.layers.append(new_layer)
        
    def forward_propagation(self,layer_no):
        current_layer = self.layers[layer_no-1]
        prev_layer = self.layers[layer_no-2]
        act = current_layer.activation
        input_data = prev_layer.A
        self.Z = np.dot(weights,input_data)+self.bias
        result = activation_function(self.Z,act)    # array containing neuron values
        current_layer.A = result            #After forward propogation, fills in the neurons in that layer
        return result
    
    def full_forward_propagation(self, input_data):
        print("layer 0 forward_propagation")
        
        self.layers[0].forward_propagation(input_data)        # From input data to first layer
        for i in range(1, len(self.layers)):
            print("layer " + str(i) + " forward_propagation")
            
            self.layers[i].forward_propagation(self.layers[i-1].A)      #From layer i-1 to layer i 
        return self.layers[len(self.layers)-1].A
    
    def back_propagation(self, target):
        gradient = self.layers[len(self.layers)-1].derMSE(target)     # ∂C/∂a(L)   Size -> a(L)
        for i in range(0, len(self.layers)):
            index = len(self.layers)-1 - i
            print("Layer " + str(index) + " backpropagation")
            gradient = self.layers[index].descent(self.layers[index-1].A, gradient, self.learning_rate)   #a(L)

    def predict(self,test_data):
        self.layers[0].forward_propagation(test_data)
        for i in range(1, len(self.layers)):
            self.layers[i].forward_propagation(self.layers[i-1].A)
        return self.layers[len(self.layers)-1].A

train_data = np.array([[[3],[2],[1]],
                      [[5],[1],[1]],
                      [[5],[0],[0]],
                      [[3],[0],[1]],
                      [[4],[-1],[2]],
                      [[3],[-1],[5]],
                      [[3],[-1],[11]],
                      [[3],[1],[3]],
                      [[-1],[1],[1]],
                      [[2],[0],[1]],
                      [[0],[0],[1]],
                      [[3],[0],[5]],
                      [[2],[-1],[6]],
                      [[1],[1],[-4]],
                      [[0],[0],[-1]],
                      [[2],[1],[-4]],
                      [[2],[1],[-1]],
                      [[1],[0],[-1]],
                      [[10],[4],[-2]],])

target_data = np.array([[0],
                        [1],
                        [1],
                        [1],
                        [1],
                        [1],
                        [0],
                        [0],
                        [0],
                        [1],
                        [0],
                        [1],
                        [0],
                        [1],
                        [1],
                        [1],
                        [0],
                        [1],
                        [1]])

network = NeuralNetwork()
network.add_layer(3, 1, "tanh")
network.add_layer(1, 1, "tanh")
network.add_layer(1, 1, "tanh")

for i in range(0, 80):
    for j in range(0,len(train_data)):
        print(str(network.full_forward_propagation(train_data[j])))
        network.back_propagation(target_data[j])
print("Prediction : ", network.predict([0,0,2]))



layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.47194046]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.46664395]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.47236831]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.46785018]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.4822832]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.49191322]]
Layer 2 backpropagation
Layer 1 backpropagation

layer 1 forward_propagation
layer 2 forward_propagation
[[0.66686787]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67249582]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.6761219]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.68098873]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67507284]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.50666059]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
lay

[[0.55265799]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66077089]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.56145603]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67311843]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67294382]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.50525572]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
la

layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67916297]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.5687913]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.65851032]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.57592104]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66824109]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66872907]]
Layer 2 backpropagation
Layer 1 backpropagation

[[0.67260095]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67560225]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67893157]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.67376225]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.57773342]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.6547918]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
lay

[[0.61808881]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.65589318]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.59168191]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66321862]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66533424]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66069783]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
la

[[0.66155424]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.65687793]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.65910875]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.65966533]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66334411]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66629325]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
la

Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.66520957]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.6601872]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.59605991]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.64483065]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_propagation
[[0.59947658]]
Layer 2 backpropagation
Layer 1 backpropagation
Layer 0 backpropagation
layer 0 forward_propagation
layer 1 forward_propagation
layer 2 forward_pr