# Manual Backpropagation

In [1]:
# Import required modules
import numpy as np
import matplotlib.pyplot as plt

## Define a number of different activation functions

In [7]:
class SigmoidActivation:
    @staticmethod
    def forward(input) -> np.array:
        ex = np.exp(input)
        return ex / (ex + 1)
    
    @staticmethod
    def backward(input) -> np.array:
        sigmoid = SigmoidActivation.forward(input)
        return sigmoid * (1 - sigmoid)
    
class TanhActivation:
    @staticmethod
    def forward(input) -> np.array:
        ex = np.exp(input)
        ex_min = np.exp(-1*input)
        return (ex - ex_min) / (ex + ex_min)
    
    @staticmethod
    def backward(input) -> np.array:
        return None

### Define some other functions which are handy

In [20]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def checkActivation(activation_func) -> object:
    if activation_func == "sigmoid":
            activation = SigmoidActivation
    elif activation_func == "tanh":
        activation = TanhActivation
    else:
        raise AttributeError("Chosen activation function does not exist.")
    return activation

### Implement the class for a layer
Note a layer consists of a vector of `Nodes` which are represented by a `float` (i.e. weight of that specific node)

In [29]:
class layer:
    def __init__(self, numberOfInputs: int, numberOfNodes: int):
        self.weigths = np.random.rand(numberOfInputs, numberOfNodes)
        self.bias = np.random.rand(numberOfNodes)

    def getLayer(self):
        return(self.weigths)
    
    def compute(self, inputs: np.array, activation: object) -> np.array:
        W = np.dot(inputs, self.weigths) + self.bias
        return  activation.forward(W)


### Implement the logic to setup a simple ANN

In [None]:
class FFN:
    def __init__(self, dimentions: list[int], activation="sigmoid", alpha=1):
        if len(dimentions) < 2:
            raise AssertionError("Network must have at least two layers (input and output).")
        
        self.activation = checkActivation(activation_func=activation)

        self.learningRate = alpha
        self.inputLayer = layer(dimentions[0], dimentions[0])
        self.outputLayer = layer(dimentions[-2], dimentions[-1])

        self.hidden_layers = [] 
        input_dim = dimentions[0]
        for i in range(1, len(dimentions) - 1):
            layer_dim = dimentions[i]
            self.hidden_layers.append(layer(input_dim, layer_dim))
            input_dim = layer_dim

    def forward(self, input):
        activation = self.activation.forward(input)
        
        output = self.inputLayer.compute(input, self.activation)
        for hidden_layer in self.hidden_layers: 
            output = hidden_layer.compute(output, self.activation)
        output = self.outputLayer.compute(output, self.activation)

        return output   
    
    def backward(self, input, y_true):
        activation = self.activation.forward(input)

        store_impulse = [input]
        current_output = input
        
        for layer in self.hidden_layers:
            current_output = layer.compute(current_output, activation=self.activation)
            store_impulse.append(current_output)
        
        y_pred = self.outputLayer.compute(current_output, activation=self.activation)
        store_impulse.append(y_pred)

        Error = y_pred - y_true
        delta = Error * self.activation.backward(y_pred)

        print(store_impulse[-2].shape())
        print(delta.shape())
        self.outputLayer.weigths -= self.learningRate * np.dot(store_impulse[-2].T, delta)
        self.outputLayer.bias -= self.learningRate * np.sum(delta, axis=0)
        
        for i in reversed(range(len(self.hidden_layers))):
            layer = self.hidden_layers[i]
            activation_layer = store_impulse[i]

            delta = np.dot(delta, layer.weigths.T) * self.activation.backward(activation_layer)
            layer.weights -= self.learningRate * np.dot(activation_layer.T, delta)
            layer.bias -= self.learningRate * np.sum(delta, axis=0)

    # Note that currently I can only train the model on one example (this should be fixed in the future)
    def train(self, input, y_true, itterations=1000, debug_mode=0, get_value=50):
        prediction = []
        loss = []
        for i in range(itterations):
            self.backward(input=input, y_true=y_true)

            if i % get_value:
                prediction.append(self.forward(input))
                loss.append(self.forward(input) -  y_true)
                print(f"Training Itteration: {i}; Loss: {loss[-1]}") 

        if debug_mode == 1 or debug_mode == 2:
            plt.plot(range(itterations), loss)
            plt.xlabel("Itterations")
            plt.ylabel("Loss")
            plt.title("Training Loss over Itterations")
            plt.grid()
            plt.show()

            if debug_mode == 2:
                return prediction, loss        

### Forward Propagation

In [40]:
# Input
input_value = [0, 0, 0, 1, 0, 0, 0, 0]
correct_output_value = input_value

Network = FFN([8, 3, 8])
#print(Network.forward(input_value))

Network.train(input=input_value, y_true=correct_output_value, debug_mode=2, get_value=10)

3
8


ValueError: shapes (3,) and (8,) not aligned: 3 (dim 0) != 8 (dim 0)

### Backpropagation