In [73]:
#Writing a Neural Network Class

import numpy as np
        
class NeuralNetwork:
    def __init__(self, neuronsperlayer): #Sets Up Weights and Biases of Neural Network
        self.noflayers = len(neuronsperlayer)
        #Xavier Initialisation
        self.w = [np.random.normal(0.0, (2.0 / (neuronsperlayer[i] + neuronsperlayer[i+1])) ** 0.5, size=(neuronsperlayer[i+1], neuronsperlayer[i])) for i in range(self.noflayers-1)]
        self.b = [np.zeros((neuronsperlayer[i+1], 1)) for i in range(self.noflayers-1)]
    
    @staticmethod
    def sigmoid(arr): #Activation Function of a Neuron
        return 1 / (1 + np.exp(-arr))

    @staticmethod
    def derivative_sigmoid(arr): #Derivative of Activation Function
        sigmoid_value = NeuralNetwork.sigmoid(arr)
        return sigmoid_value * (1 - sigmoid_value)
        
    def forward(self, input_layer): #Forward Pass through Neural Network
        output_layer = np.copy(input_layer)
        self.a = [np.copy(input_layer)]
        self.z = []
        
        for w, b in zip(self.w, self.b):
            z = w @ output_layer + b
            self.z.append(z)
            
            output_layer = NeuralNetwork.sigmoid(z)
            self.a.append(output_layer)
            
        return output_layer
    
    @staticmethod
    def cost(output_layer, target): #Cost Function
        return 0.5 * np.sum((target - output_layer) ** 2)
    
    def backward(self, target): #Backpropagation of Neural Network --> Provides Gradient of Cost Function
        delta = (self.a[-1] - target) * NeuralNetwork.derivative_sigmoid(self.z[-1])
        self.grad_b = [np.copy(delta)]
        self.grad_w = [np.copy(delta) @ self.a[-2].T]
        
        for i in range(self.noflayers-2, 0, -1):
            delta = self.w[i].T @ delta * NeuralNetwork.derivative_sigmoid(self.z[i-1])
            self.grad_b.append(np.copy(delta))
            self.grad_w.append(np.copy(delta) @ self.a[i-1].T)
                
        self.grad_w.reverse()
        self.grad_b.reverse()
        
        return self.grad_w, self.grad_b
    
    def update(self, learning_rate): #Updating the Weights and Biases of Neural Network
        for i in range(self.noflayers-1):
            self.w[i] -= learning_rate * self.grad_w[i]
            self.b[i] -= learning_rate * self.grad_b[i]
            
    def learn(self, training_data, validation_data, epochs, learning_rate): #Stochastic Gradient Descent 
        data_size = len(training_data)
        
        for epoch in range(1, epochs + 1):
            total_loss = 0.0
            data = list(zip(training_data, validation_data))
            np.random.shuffle(data)

            for x, y in data:
                out = self.forward(x)
                total_loss += self.cost(out, y)
                grad_w, grad_b = self.backward(y)
                self.update(learning_rate)

            if (epoch % 100 == 0 or epoch == 1):
                average_loss = total_loss / data_size
                print(f"Epoch {epoch:4d}/{epochs} — Average Loss: {average_loss:.4f}")


In [78]:
#Testing Neural Network Class by Training it to be an AND Gate

#Data
training_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
validation_data = np.array([0, 0, 0, 1])

#Reshapes Data into Vector Shape
training_data = [row.reshape((2,1)) for row in training_data] 
validation_data = [row.reshape((1,1)) for row in validation_data]

#Training Neural Network
nn = NeuralNetwork([2, 2, 1])
epochs = 500
learning_rate = 0.5
nn.learn(training_data, validation_data, epochs, learning_rate)
print()

#Checking that the Neural Network works as an AND Gate
for x, y in zip(training_data, validation_data):
    output_layer = nn.forward(x)                       
    output_value = output_layer.ravel()[0]                
    prediction = int(np.round(output_value))     
    input_layer = x.ravel().astype(int).tolist()
    target = int(y.ravel()[0])
    
    print(f"Input: {input_layer}, Ouput: {output_value:.3f} --> Rounds to: {prediction}, Expected: {target}")

Epoch    1/500 — Average Loss: 0.1058
Epoch  100/500 — Average Loss: 0.0769
Epoch  200/500 — Average Loss: 0.0375
Epoch  300/500 — Average Loss: 0.0134
Epoch  400/500 — Average Loss: 0.0064
Epoch  500/500 — Average Loss: 0.0039

Input: [0, 0], Ouput: 0.010 --> Rounds to: 0, Expected: 0
Input: [0, 1], Ouput: 0.087 --> Rounds to: 0, Expected: 0
Input: [1, 0], Ouput: 0.080 --> Rounds to: 0, Expected: 0
Input: [1, 1], Ouput: 0.872 --> Rounds to: 1, Expected: 1
