In [None]:
# Multi-Layer Perceptron Neural Network

import numpy as np

class NeuralNetwork:

    def __init__(self, layers=[2, 2, 2, 1]):  # [input_size, hidden_layer1_size, hidden_layer2_size, ..., output_size]
        # Dataset (Inputs: 2, Samples: 3)
        self.x_dataset = np.array([[2, 6], [8, 5], [1, 2]])
        self.y_value = np.array([[1], [1], [0]])

        self.x_weights = []
        self.x_bias = []

        for i in range(len(layers) - 1):
            # Xavier Initialization: np.sqrt(1 / nodes_in)
            limit = np.sqrt(1 / layers[i])
            w = np.random.uniform(-limit, limit, (layers[i+1], layers[i]))
            b = np.zeros(layers[i+1])
            
            self.x_weights.append(w)
            self.x_bias.append(b)

       
        self.output_weights = self.x_weights.pop()
        self.output_bias = self.x_bias.pop()

        self.learning_rate = 0.1
        self.tolerance = 1e-6  # minimum change in loss
        self.prev_loss = float('inf')


    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def derivative_sgimoid(x):
        return x * (1 - x)
    
    def predict(self, new_data):
        dataset = np.atleast_2d(new_data)

        for w,b in zip(self.x_weights, self.x_bias):
            neuron = (dataset @ w.T) + b
            dataset = self.sigmoid(neuron)

        return self.sigmoid(dataset @ self.output_weights.T + self.output_bias)
    
    def train(self, epochs = 5):
         
        for i in range(epochs):
            activations, prediction = self.forward_propagation()

            # - (true value * log( predited value ) + (1 - true value)  * log(1 - predicted value) )
            loss = -np.mean(self.y_value * np.log(prediction) + (1 - self.y_value) * np.log(1 - prediction))

            if abs(self.prev_loss - loss) < self.tolerance:
                print(f"Stopping at iteration {i + 1}, loss change < {self.tolerance} : {abs(self.prev_loss - loss)}")
                break
            
            self.back_propagation(activations, prediction)
            self.prev_loss = loss
        return prediction

                
    # FORWARD PROPAGATION 
    def forward_propagation(self):
        dataset = self.x_dataset
        activations = []  # Store all hidden layers activations values

       
        for w,b in zip(self.x_weights, self.x_bias): # Basically getting the length of the list
            neuron = ( dataset @ w.T) + b  # ( Hidden layer Weights * Inputs ) + Bias
            activation = self.sigmoid(neuron)  # Sigmoid ( neuron )
            dataset = activation # Re-assign the input as the activation of the previous layer 
            activations.append(dataset) # Collecting all activations of each layers

        # Sigmoid ( ( output_weights * last layer activation ) + output_bias )
        prediction = self.sigmoid(dataset @ self.output_weights.T + self.output_bias)
        return activations, prediction


    # BACKPROPAGATION
    def back_propagation(self, activations, prediction):
        batch_size = self.x_dataset.shape[0]
        
        # output delta = predicted value - true value
        output_delta = prediction - self.y_value

        # output delta * last activation value / batch_size
        output_dw = (output_delta.T @ activations[-1]) / batch_size
        output_db = np.sum(output_delta, axis=0) / batch_size # output gradient bias


        hidden_dw = []
        hidden_db = []
        delta = output_delta

        # calculate the backdrop error and gradient of each hidden layer = W.T * delta * derivative of sigmoid (activation)
        for i in reversed(range(len(self.x_weights))):
            if i == len(self.x_weights) - 1:
                # last hidden layer -> output
                delta = (delta @ self.output_weights) * self.derivative_sgimoid(activations[i])
            else:
                delta = (delta @ self.x_weights[i + 1]) * self.derivative_sgimoid(activations[i])
            

            prev_activation = self.x_dataset if i == 0 else activations[i - 1]
            hidden_dw.insert(0, ((delta.T @ prev_activation) / batch_size))
            hidden_db.insert(0, np.sum(delta, axis=0) / batch_size)

        # update the output weight and bias = old weight - ( learning_rate * gradient )
        self.output_weights -= self.learning_rate * output_dw 
        self.output_bias -= self.learning_rate * output_db

         # update the hidden layer weight and bias = old weight - ( learning_rate * gradient )
        for i in range(len(self.x_weights)):
            self.x_weights[i] -= self.learning_rate * hidden_dw[i]
            self.x_bias[i] -= self.learning_rate * hidden_db[i]
        return True



train = NeuralNetwork()
train.train(epochs=1)

test_pass = train.predict([8, 6])
print(f"Pass Probability: {test_pass[0][0]:.4f}")


Pass Probability: 0.5056
