In [21]:
import numpy as np
        
class NeuralNetwork:
    def __init__(self, npl):
        self.noflayers = len(npl)
        self.w = [np.random.normal(0.0, (2.0 / (npl[i] + npl[i+1])) ** 0.5, size=(npl[i+1], npl[i])) for i in range(self.noflayers-1)]
        self.b = [np.zeros((npl[i+1], 1)) for i in range(self.noflayers-1)]
    
    @staticmethod
    def sigmoid(arr):
        return 1 / (1 + np.exp(-arr))

    @staticmethod
    def derivative_sigmoid(arr):
        sigmoid_value = NeuralNetwork.sigmoid(arr)
        return sigmoid_value * (1 - sigmoid_value)
        
    def forward(self, input_layer):
        output_layer = np.copy(input_layer)
        self.a = [np.copy(input_layer)]
        self.z = []
        
        for w, b in zip(self.w, self.b):
            z = w @ output_layer + b
            self.z.append(z)
            
            output_layer = NeuralNetwork.sigmoid(z)
            self.a.append(output_layer)
            
        return output_layer
    
    @staticmethod
    def cost(output_layer, target):
        return 0.5 * np.sum((target - output_layer) ** 2)
    
    def backward(self, target):
        delta = (self.a[-1] - target) * NeuralNetwork.derivative_sigmoid(self.z[-1])
        self.grad_b = [np.copy(delta)]
        self.grad_w = [np.copy(delta) @ self.a[-2].T]
        
        for i in range(self.noflayers-2, 0, -1):
            delta = self.w[i].T @ delta * NeuralNetwork.derivative_sigmoid(self.z[i-1])
            self.grad_b.append(np.copy(delta))
            self.grad_w.append(np.copy(delta) @ self.a[i-1].T)
                
        self.grad_w.reverse()
        self.grad_b.reverse()
        
        return self.grad_w, self.grad_b
    
    def update(self, learning_rate):
        for i in range(self.noflayers-1):
            self.w[i] -= learning_rate * self.grad_w[i]
            self.b[i] -= learning_rate * self.grad_b[i]
            
    def learn(self, training_data, validation_data, epochs, learning_rate):
        data_size = len(training_data)
        
        for epoch in range(1, epochs + 1):
            total_loss = 0.0
            np.random.shuffle(training_data)

            for x, y in zip(training_data, validation_data):
                out = self.forward(x)
                total_loss += self.cost(out, y)
                grad_w, grad_b = self.backward(y)
                self.update(learning_rate)

            if (epoch % 100 == 0 or epoch == 1):
                average_loss = total_loss / data_size
                print(f"Epoch {epoch:4d}/{epochs} — Average Loss: {average_loss:.4f}")
