In [7]:
# Import necessary libraries
import random
import numpy as np

# Define the neural network class
class Network(object):

    def __init__(self, sizes):
        """The list 'sizes' contains the number of neurons in the respective layers of the network.
        For example, if the list is [2, 3, 1], the network will have 3 layers: the first layer with
        2 neurons, the second with 3, and the third with 1 neuron.
        Biases and weights are initialized using a Gaussian distribution.
        The input layer does not have biases.
        """
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # Xavier initialization of weights
        self.weights = [np.random.randn(y, x) / np.sqrt(x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Return the output of the network if 'a' is input."""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """Train the network using mini-batch stochastic gradient descent."""
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print(f"Epoch {j}: {self.evaluate(test_data)} / {n_test}")
            else:
                print(f"Epoch {j} complete")
            # Monitor cost function
            cost = self.calculate_cost(training_data)
            print(f"Cost after epoch {j}: {cost}")

    def update_mini_batch(self, mini_batch, eta):
        """Update the network's weights and biases by applying gradient descent using backpropagation."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Return a tuple '(nabla_b, nabla_w)' representing the gradient for the cost function."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # Feedforward
        activation = x
        activations = [x]  # Store all activations layer by layer
        zs = []  # Store all z vectors layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # Backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Backpropagate through the layers
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Return the number of test inputs for which the neural network outputs the correct result."""
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def calculate_cost(self, data):
        """Calculate the cost (mean squared error) for the training data."""
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            cost += np.linalg.norm(a - y)**2 / 2.0
        return cost

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives for the output activations."""
        return (output_activations - y)

# Sigmoid and its derivative
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [11]:
# XOR problem data
training_data = [
    (np.array([[0], [0]]), np.array([[0]])),
    (np.array([[0], [1]]), np.array([[1]])),
    (np.array([[1], [0]]), np.array([[1]])),
    (np.array([[1], [1]]), np.array([[0]]))
]

# Initialize the network: input size = 2, hidden layer size = 3, output size = 1
net = Network([2, 3, 1])

# Train the network with 100 epochs, mini-batch size of 4, and learning rate (eta) of 1.0
net.SGD(training_data, epochs=150, mini_batch_size=4, eta=1.0)

# Test the network after training
for x, y in training_data:
    predicted = net.feedforward(x)
    print(f"Input: {x.T} -> Predicted: {predicted}, Actual: {y}")

Epoch 0 complete
Cost after epoch 0: 0.7524949484489346
Epoch 1 complete
Cost after epoch 1: 0.7372818364935586
Epoch 2 complete
Cost after epoch 2: 0.721292774659092
Epoch 3 complete
Cost after epoch 3: 0.7046430875942592
Epoch 4 complete
Cost after epoch 4: 0.687496419588183
Epoch 5 complete
Cost after epoch 5: 0.6700652546476946
Epoch 6 complete
Cost after epoch 6: 0.6526055751859434
Epoch 7 complete
Cost after epoch 7: 0.6354045218052948
Epoch 8 complete
Cost after epoch 8: 0.6187612710980146
Epoch 9 complete
Cost after epoch 9: 0.6029631887949076
Epoch 10 complete
Cost after epoch 10: 0.58826104852317
Epoch 11 complete
Cost after epoch 11: 0.5748479536224251
Epoch 12 complete
Cost after epoch 12: 0.5628460170288541
Epoch 13 complete
Cost after epoch 13: 0.5523029262846423
Epoch 14 complete
Cost after epoch 14: 0.5431979951421683
Epoch 15 complete
Cost after epoch 15: 0.535455183757475
Epoch 16 complete
Cost after epoch 16: 0.5289595553907965
Epoch 17 complete
Cost after epoch 17: 