# <center> **Implementation of Neural Network** </center>
#### <center> ***With Stochastic Gradient Descent*** </center>
>- ### Activation Functions
>> 1. Sigmoid Activation
>> 1. ReLU Activation
>> 1. Softmax Activation
>- ### Loss Functions
>> 1. Mean Square Loss
>> 1. Cross Entropy Loss

In [1]:
import numpy as np
from abc import ABC, abstractmethod

In [2]:
class ActivationFunction(ABC):
    def __init__(self) -> None:
        pass
    
    @abstractmethod
    def activate(self, input):
        pass
    
    @abstractmethod
    def gradient(self, input):
        pass

In [13]:
class SigmoidActivation(ActivationFunction):
    def activate(self, input):
        res = 1.0 / (1.0 + np.exp(-input))
        return res

    def gradient(self, input):
        sig = self.activate(input)
        sig = (1.0 - sig) * sig
        sig = np.squeeze(sig)
        return np.diag(sig)

In [4]:
class ReLUActivation(ActivationFunction):
    def activate(self, input):
        return np.maximum(0, input)
    
    def gradient(self, input):
        res = self.activate(input)
        res = np.squeeze(res)
        for i in range(len(res)):
            res[i] = 1.0 if res[i] > 0 else 0.0
        return np.diag(res)

In [5]:
class SoftmaxActivation(ActivationFunction):
    def activate(self, input):
        s = np.sum(np.exp(input))
        return np.exp(input) / s
    
    def gradient(self, input):
        try:
            input = np.squeeze(input)
            s = input.shape
            grad = np.zeros((s[0], s[0]), dtype=np.float64)
            a = self.activate(input)
            for i in range(s[0]):
                for j in range(i, s[0]):
                    if i == j:
                        grad[i][j] = a[i] * (1 - a[i])
                    else:
                        grad[i][j] = -1 * a[i] * a[j]
                        grad[j][i] = grad[i][j]
            return grad
        except:
            return self.activate(input) * (1 - self.activate(input))

In [6]:
class LossFunction(ABC):
    def __init__(self) -> None:
        pass
    
    @abstractmethod
    def loss(self, target, output):
        pass
    
    @abstractmethod
    def gradient(self, target, output):
        pass

In [7]:
class MeanSqureLoss(LossFunction):
    def loss(self, target, output):
        loss = target - output
        loss = loss * loss
        return 0.5 * np.sum(loss)
    
    def gradient(self, target, output):
        return output - target

In [8]:
class CrossEntropyLoss(LossFunction):
    def loss(self, target, output):
        loss = -target * np.log10(output)
        return np.sum(loss)
    
    def gradient(self, target, output):
        return - target / output

In [10]:
class Layer(ABC):
    def __init__(self, number_of_nodes : int, activation_function : ActivationFunction = None):
        self.__number_of_nodes = number_of_nodes
        self.__activation_function : ActivationFunction = activation_function
    
    @property
    def number_of_nodes(self):
        return self.__number_of_nodes
    
    
    def activate(self, input):
        if self.__activation_function:
            return self.__activation_function.activate(input)
        
        return input
    
    def gradient(self, input):
        if self.__activation_function:
            return self.__activation_function.gradient(input)
        
        print("No Activation Function is added...")
        return np.ones(input.shape, dtype=np.float64)

In [11]:
class NeuralNetwork:
    def __init__(self, number_of_inputs : int, number_of_outputs : int, loss_function : LossFunction, output_activation_function : ActivationFunction = None) -> None:
        self.__number_of_inputs = number_of_inputs
        self.__number_of_outputs = number_of_outputs
        self.__hidden_layers : list[Layer] = []
        self.__loss_function : LossFunction = loss_function
        self.__output_layer : Layer = None
        if output_activation_function:
            self.__output_layer = Layer(number_of_outputs, output_activation_function)
        self.__is_initialized = False
    
    def addHiddenLayer(self, layer : Layer):
        self.__is_initialized = False
        self.__hidden_layers.append(layer)
        
    def initialize(self):
        self.__is_initialized = True
        self.__weights = []
        self.__biases = []
        self.__weighted_sum = []
        self.__activated_output = [] # output after applying activation function in each layer
        self.__delta_error = [] # error in each layer
        
        prev_layer_size = self.__number_of_inputs
        for i in range(len(self.__hidden_layers)):
            self.__weights.append(np.random.rand(prev_layer_size, self.__hidden_layers[i].number_of_nodes) - 0.5)
            self.__biases.append(np.random.rand(self.__hidden_layers[i].number_of_nodes, 1) - 0.5)
            
            self.__weighted_sum.append(np.zeros((self.__hidden_layers[i].number_of_nodes, 1), dtype=np.float64))
            self.__activated_output.append(np.zeros((self.__hidden_layers[i].number_of_nodes, 1), dtype=np.float64))
            self.__delta_error.append(np.zeros((self.__hidden_layers[i].number_of_nodes, 1), dtype=np.float64))
            prev_layer_size = self.__hidden_layers[i].number_of_nodes
            
        self.__weights.append(np.random.rand(prev_layer_size, self.__number_of_outputs) - 0.5)
        self.__biases.append(np.random.rand(self.__number_of_outputs, 1) - 0.5)

        self.__weighted_sum.append(np.zeros((self.__number_of_outputs, 1), dtype=np.float64))
        self.__activated_output.append(np.zeros((self.__number_of_outputs, 1), dtype=np.float64))
        self.__delta_error.append(np.zeros((self.__number_of_outputs, 1), dtype=np.float64))
    
    @property
    def weights(self):
        return self.__weights
    
    @property
    def biases(self):
        return self.__biases
    
    @property
    def activated_output(self):
        return self.__activated_output
    
    def initializeWeights(self, weights):
        self.__weights = weights
        
    def initializeBiases(self, biases):
        self.__biases = biases
    
    def __forwardPass(self, input):
        if not self.__is_initialized:
            self.initialize()
        
        for i in range(len(self.__hidden_layers) + 1):
            if i == 0:
                self.__weighted_sum[i] = np.matmul(self.__weights[i].T, input) + self.__biases[i]
                self.__activated_output[i] = self.__hidden_layers[i].activate(self.__weighted_sum[i])
            elif i == len(self.__hidden_layers):
                self.__weighted_sum[i] = np.matmul(self.__weights[i].T, self.__activated_output[i-1]) + self.__biases[i]
                if self.__output_layer:
                    self.__activated_output[i] = self.__output_layer.activate(self.__weighted_sum[i])
                else:
                    self.__activated_output[i] = self.__weighted_sum[i]
            else:
                self.__weighted_sum[i] = np.matmul(self.__weights[i].T, self.__activated_output[i-1]) + self.__biases[i]
                self.__activated_output[i] = self.__hidden_layers[i].activate(self.__weighted_sum[i])

    def __backpropagate(self, target):
        loss_gradient = self.__loss_function.gradient(target, self.activated_output[-1])
        
        for i in range(len(self.__hidden_layers), -1, -1):
            if i == len(self.__hidden_layers):
                self.__delta_error[i] = np.matmul(self.__output_layer.gradient(self.__weighted_sum[i]), loss_gradient)
            else:
                self.__delta_error[i] = np.matmul(self.__weights[i+1], self.__delta_error[i+1])
                self.__delta_error[i] = np.matmul(self.__hidden_layers[i].gradient(self.__weighted_sum[i]), self.__delta_error[i])

    def __updateWeightsAndBiases(self, input, learning_rate):
        for i in range(len(self.__hidden_layers) + 1):
            if i == 0:
                self.__weights[i] = self.__weights[i] - learning_rate * np.matmul(input, self.__delta_error[i].T)
            else:
                self.__weights[i] = self.__weights[i] - learning_rate * np.matmul(self.__activated_output[i-1], self.__delta_error[i].T)

            self.__biases[i] = self.__biases[i] - learning_rate * self.__delta_error[i]

    def __process(self, input, target, learning_rate):
        self.__forwardPass(input)
        loss = self.__loss_function.loss(target, self.__activated_output[-1])
        self.__backpropagate(target)
        self.__updateWeightsAndBiases(input, learning_rate)
        return loss

    def fit(self, train_dataset, target_dataset, learning_rate : float = 0.05, epochs : int = 100):
        n = len(train_dataset)
        for e in range(epochs):
            total_loss = 0.0
            for i in range(n):
                p = np.array(np.expand_dims(train_dataset[i],1), dtype=np.float64)
                o = np.zeros((self.__number_of_outputs, 1), dtype=np.float64)
                o[int(target_dataset[i][0])] = 1.0
                loss = self.__process(p, o, learning_rate)
                total_loss += loss
                print(f"Epoch {e + 1}, oveservation {i + 1} : loss = {loss}", end="\r", flush=True)
            print(f"Epoch {e + 1}, oveservation {n} : loss = {total_loss / n}", flush=True)

    def predict(self, test_dataset):
        pred = []
        for i in range(len(test_dataset)):
            input = np.array(np.expand_dims(test_dataset[i],1), dtype=np.float64)
            self.__forwardPass(input)
            pred.append(self.__activated_output[-1])
            
        return pred

In [12]:
def example():
    nn = NeuralNetwork(2, 2, MeanSqureLoss(), SoftmaxActivation())

    nn.addHiddenLayer(Layer(2, SigmoidActivation()))

    nn.initialize()

    # weights = [np.array([[0.15, 0.25],
    #             [0.20, 0.30]]),
    #         np.array([[0.40, 0.50],
    #             [0.45, 0.55]])]

    # biases = [np.array([[0.35],
    #             [0.35]]),
    #         np.array([[0.60],
    #                     [0.60]])]

    # nn.initializeWeights(weights)
    # nn.initializeBiases(biases)

    x_input = np.array([[0.05],[0.10]], dtype=np.float64)
    y_output = np.array([[0.01],[0.99]], dtype=np.float64)
    nn.fit(x_input, y_output, 0.5)