In [12]:
import numpy as np
import random

The main neural network class.


In [13]:
class neural_net:
    def __init__(self, layers: list[int], activation: str, loss: str, lr: float = 0.1):
        self.layers = layers
        self.lr = lr
        self.hidden_activation = activation
        self.loss_name = loss

        self.act_fn, self.act_deriv = self._get_activation_fn(activation)
        self.loss_fn, self.loss_derivative = self._get_loss_fn(loss)

        self.weights = np.array([
            [[random.uniform(-1, 1) for _ in range(layers[i + 1])] for _ in range(layers[i])]
            for i in range(len(layers) - 1)
        ], dtype=object)
        self.biases = np.array([
            [[random.uniform(-1, 1) for _ in range(layers[i + 1])] for _ in range(layers[i])]
            for i in range(len(layers) - 1)
        ], dtype=object)

    def _get_activation_fn(self, name):
        match name:
            case "sigmoid":
                fn = lambda x: 1 / (1 + np.exp(-x))
                dfn = lambda x: fn(x) * (1 - fn(x))
            case "relu":
                fn = lambda x: np.maximum(0, x)
                dfn = lambda x: np.where(x > 0, 1, 0)
            case "tanh":
                fn = lambda x: np.tanh(x)
                dfn = lambda x: 1 - np.tanh(x) ** 2
            case _:
                raise ValueError("Unsupported activation")
        return fn, dfn

    def _get_loss_fn(self, name):
        match name:
            case "mse":
                return (
                    lambda y_pred, y_true: np.mean((y_pred - y_true) ** 2),
                    lambda y_pred, y_true: 2 * (y_pred - y_true) / len(y_true)
                )
            case _:
                raise ValueError("Unsupported loss")

    def _forward(self, x):
        self.a_values = [x]
        self.z_values = []

        a = x
        for l in range(len(self.layers) - 1):
            z = np.zeros(self.layers[l + 1])
            for i in range(self.layers[l]):
                w = np.array(self.weights[l][i], dtype=float)
                b = np.array(self.biases[l][i], dtype=float)
                z += a[i] * w + b
            self.z_values.append(z)
            a = self.act_fn(z)
            self.a_values.append(a)
        return a

    def _backward(self, y_true):
        y_true = np.array(y_true)
        deltas = [None] * (len(self.layers) - 1)

        error = self.loss_derivative(self.a_values[-1], y_true)
        deltas[-1] = error * self.act_deriv(self.z_values[-1])

        for l in reversed(range(len(self.layers) - 2)):
            delta = np.zeros(self.layers[l + 1])
            for i in range(self.layers[l + 1]):
                for j in range(self.layers[l + 2]):
                    delta[i] += deltas[l + 1][j] * self.weights[l + 1][i][j]
            deltas[l] = delta * self.act_deriv(self.z_values[l])

        for l in range(len(self.layers) - 1):
            for i in range(self.layers[l]):
                for j in range(self.layers[l + 1]):
                    grad = deltas[l][j] * self.a_values[l][i]
                    self.weights[l][i][j] -= self.lr * grad
                    self.biases[l][i][j] -= self.lr * deltas[l][j]

    def fit(self, x_data, y_data, epochs=1000, verbose=True):
        for e in range(epochs):
            total_loss = 0
            for x, y in zip(x_data, y_data):
                pred = self._forward(x)
                total_loss += self.loss_fn(pred, y)
                self._backward(y)
            if verbose and (e + 1) % 100 == 0:
                print(f"Epoch {e+1} – Loss: {total_loss / len(x_data):.6f}")

    def predict(self, x):
        return self._forward(x)

    def evaluate(self, x_data, y_data):
        correct = 0
        for x, y in zip(x_data, y_data):
            pred = self.predict(x)
            pred_class = np.round(pred)
            if np.array_equal(pred_class, y):
                correct += 1
        acc = correct / len(x_data) * 100
        print(f"Accuracy: {acc:.2f}%")

Training on the binary XOR problem

In [15]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

nn = neural_net([2, 4, 1], activation="tanh", loss="mse", lr=0.1)

print("Training on XOR problem...")
nn.fit(X, Y, epochs=2000, verbose=True)

print("\nEvaluation:")
nn.evaluate(X, Y)

print("\nPredictions:")
for x in X:
    print(f"{x} => {nn.predict(x)}")

Training on XOR problem...
Epoch 100 – Loss: 0.060965
Epoch 200 – Loss: 0.012182
Epoch 300 – Loss: 0.004213
Epoch 400 – Loss: 0.002625
Epoch 500 – Loss: 0.002181
Epoch 600 – Loss: 0.001596
Epoch 700 – Loss: 0.001277
Epoch 800 – Loss: 0.001007
Epoch 900 – Loss: 0.001133
Epoch 1000 – Loss: 0.000800
Epoch 1100 – Loss: 0.000885
Epoch 1200 – Loss: 0.000740
Epoch 1300 – Loss: 0.000601
Epoch 1400 – Loss: 0.000536
Epoch 1500 – Loss: 0.000516
Epoch 1600 – Loss: 0.000532
Epoch 1700 – Loss: 0.000517
Epoch 1800 – Loss: 0.000412
Epoch 1900 – Loss: 0.000381
Epoch 2000 – Loss: 0.000445

Evaluation:
Accuracy: 100.00%

Predictions:
[0 0] => [-0.02347271]
[0 1] => [0.98478386]
[1 0] => [0.98848227]
[1 1] => [0.00555959]
