In [1]:
import random, math
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def swish(x):
    return x*sigmoid(x)

def relu(x):
    return np.maximum(0,x)

def activation_function(z,act):
    if act=="sigmoid":
        return sigmoid(z)
    elif act == "swish":
        return swish(z)
    elif act == "relu":
        return relu(z)
    elif act=="tanh":
        return np.tanh(z)
    
class Layer():
    def __init__(self,input_size,output_size,act=None):
        self.size = input_size
        self.output_size = output_size
        self.activation = act
        self.weights = np.random.rand(self.output_size,self.size)*2-1      
        self.bias = np.random.rand(output_size,1)*2-1
        self.A = np.random.rand(output_size,1)*2-1
        self.Z = np.random.rand(output_size,1)*2-1
        
    def forward_propagation(self, input_data):
        self.Z = np.dot(self.weights,input_data)+self.bias
        self.A = activation_function(self.Z, self.activation)
        
    def derMSE(self, target):
        return 2*(self.A - target)
    
    def descent(self, input_data, gradient):
        
        derZ = 1 - np.power(self.A, 2)
        
        reps = (self.weights.shape[0], 1)
        derWeights = np.tile(input_data.transpose(), reps)
        print(input_data)
        print(derWeights)
        """derWeights is a matrix with derivatives of Z WRT weights, which is transposed inputs, 
        repeated in rows n-times, where n is number of neurons.
        
    Example:
    
        input_data = [
            [2],
            [1],
            [0]
        ]
        
        derWeights = [
            [2, 1, 0],
            [2, 1, 0],
            [2, 1, 0],
            ... n-rows
        ]
        
        """
        print("A shape: " + str(self.A.shape))
        print("gradient shape: " + str(gradient.shape))
        derBias = 1
        wgrad = np.tile(np.multiply(gradient, derZ), (1, self.weights.shape[1]))
        print(str(self.weights.shape))
        derInputs = np.sum(wgrad, axis=0, keepdims=True)
        print(str(derInputs.shape))
        return derInputs.transpose()
    
    """ The above part adds the gradient to the derivative of Z WRT input_data and passes this new
    gradient through return, to be used as the gradient for next layer's descent."""
        
        
class NeuralNetwork():
    def __init__(self):
        self.layers=[]
        self.epochs=10
        self.learning_rate = 0.01
    
    def add_layer(self,input_size,output_size,activation=None):
        new_layer = Layer(input_size,output_size,activation)
        self.layers.append(new_layer)
        
    def forward_propagation(self,layer_no):
        current_layer = self.layers[layer_no-1]
        prev_layer = self.layers[layer_no-2]
        act = current_layer.activation
        input_data = prev_layer.A
        self.Z = np.dot(weights,input_data)+self.bias
        result = activation_function(self.Z,act)
        current_layer.A = result
        return result
    
    def full_forward_propagation(self, input_data):
        self.layers[0].forward_propagation(input_data)
        for i in range(1, len(self.layers)):
            self.layers[i].forward_propagation(self.layers[i-1].A)
        return self.layers[len(self.layers)-1].A
    
    def back_propagation(self, target):
        gradient = self.layers[len(self.layers)-1].derMSE(target)
        for i in range(0, len(self.layers)):
            index = len(self.layers)-1 - i
            print("Layer " + str(index) + " backpropagation")
            gradient = self.layers[index].descent(self.layers[index-1].A, gradient)


test_data = np.array([
    [3],
    [2],
    [1]
])

target_data = np.array([
    [5],
    [5],
    [5]
])

network = NeuralNetwork()
network.add_layer(3, 2, "tanh")
network.add_layer(2, 3, "swish")

network.full_forward_propagation(test_data)
network.back_propagation(target_data)

Layer 1 backpropagation
[[0.08238065]
 [0.99190686]]
[[0.08238065 0.99190686]
 [0.08238065 0.99190686]
 [0.08238065 0.99190686]]
A shape: (3, 1)
gradient shape: (3, 1)
(3, 2)
(1, 2)
Layer 0 backpropagation
[[ 0.20363658]
 [-0.12446946]
 [-0.12119262]]
[[ 0.20363658 -0.12446946 -0.12119262]
 [ 0.20363658 -0.12446946 -0.12119262]]
A shape: (2, 1)
gradient shape: (2, 1)
(2, 3)
(1, 3)
