[Reference](https://medium.com/@thisislong/building-a-neural-network-from-scratch-with-backpropagation-a789bec70b29)

# Building A Neural Network Without Using External Libraries

In [1]:
from math import exp

def sigmoid(x: float) -> float:
    return 1.0 / (1.0 + exp(-x))


def sigmoid_derivative(z: float) -> float:
    return z * (1.0 - z)

In [2]:
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class Neuron:
    weights: List[float]
    bias: float
    delta: Optional[float] = 0.0
    output: Optional[float] = 0.0

    def _set_output(self, output: float) -> None:
        self.output = output

    def set_delta(self, error: float) -> None:
        self.delta = error * sigmoid_derivative(self.output)

    def weighted_sum(self, inputs: List[float]) -> float:
        """
        Usually results in a big number, but we tend to use a value [0, 1] for activation
        Hence, after calculating this, we use the sigmoid function to normalize the result
        """
        ws = self.bias
        for i in range(len(self.weights)):
            ws += self.weights[i] * inputs[i]
        return ws

    def activate(self, inputs: List[float]) -> float:
        """
        Calculates the output of the neuron using a non-linear activation function
        In this case we use the sigmoid function
        """
        output = sigmoid(self.weighted_sum(inputs))
        self._set_output(output)
        return output


In [3]:
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class Layer:
    neurons: List[Neuron]

    @property
    def all_outputs(self) -> List[float]:
        return [neuron.output for neuron in self.neurons]

    def activate_neurons(self, inputs: List[float]) -> List[float]:
        return [neuron.activate(inputs) for neuron in self.neurons]

    def total_delta(self, previous_layer_neuron_idx: int) -> float:
        return sum(
            neuron.weights[previous_layer_neuron_idx] * neuron.delta
            for neuron in self.neurons
        )

In [7]:
from dataclasses import dataclass
from typing import List, Optional
import random

@dataclass
class Network:
    hidden_layers: List[Layer]
    output_layer: Layer
    learning_rate: float

    @property
    def layers(self) -> List[Layer]:
        return self.hidden_layers + [self.output_layer]

    def feed_forward(self, inputs: List[float]) -> List[float]:
        for layer in self.hidden_layers:
            # update inputs as outputs of previous layers as we go
            inputs = layer.activate_neurons(inputs)
        return self.output_layer.activate_neurons(inputs)

    def derivative_error_to_output(
        self, actual: List[float], expected: List[float]
    ) -> List[float]:
        """
        Derivative of error function with respect to the output
        """
        return [actual[i] - expected[i] for i in range(len(actual))]

    def back_propagate(self, inputs: List[float], errors: List[float]) -> None:
        """
        Compute the gradient and then update the weights
        """

        # Delta of output layer = derivative of the error functions times the derivative of output layer activation function
        # We calculate deltas of output layer first
        # So when we get to hidden layers, the output deltas are ready to be used in calculation (work backwards)
        for index, neuron in enumerate(self.output_layer.neurons):
            neuron.set_delta(errors[index])

        # Calculate deltas of hidden layer
        for layer_idx in reversed(range(len(self.hidden_layers))):
            layer = self.hidden_layers[layer_idx]
            next_layer = (
                self.output_layer
                if layer_idx == len(self.hidden_layers) - 1
                else self.hidden_layers[layer_idx + 1]
            )
            for neuron_idx, neuron in enumerate(layer.neurons):
                error_from_next_layer = next_layer.total_delta(neuron_idx)
                neuron.set_delta(error_from_next_layer)

        # Only update the weights after you've calculated the deltas
        # If you update the weights as you move through the network, it will affect the deltas of other layers
        self.update_weights_for_all_layers(inputs)

    def update_weights_for_all_layers(self, inputs: List[float]):
        """
        Update weights for all layers
        """
        # Update weights for hidden layers
        for layer_idx in range(len(self.hidden_layers)):
            layer = self.hidden_layers[layer_idx]
            previous_layer_outputs: List[float] = (
                inputs
                if layer_idx == 0
                else self.hidden_layers[layer_idx - 1].all_outputs
            )
            for neuron in layer.neurons:
                self.update_weights_in_a_layer(previous_layer_outputs, neuron)

        # Update weights for output layer
        for index, neuron in enumerate(self.output_layer.neurons):
            self.update_weights_in_a_layer(self.hidden_layers[-1].all_outputs, neuron)

    def update_weights_in_a_layer(
        self, previous_layer_outputs: List[float], neuron: Neuron
    ) -> None:
        """
        Update weights in all neurons in a layer
        """
        for idx in range(len(previous_layer_outputs)):
            neuron.weights[idx] -= (
                self.learning_rate * neuron.delta * previous_layer_outputs[idx]
            )
            neuron.bias -= self.learning_rate * neuron.delta

    def train(
        self,
        num_epoch: int,
        num_outputs: int,
        training_set: List[List[float]],
        training_output: List[float],
    ) -> None:
        for epoch in range(num_epoch):
            sum_error = 0.0
            for idx, row in enumerate(training_set):
                expected = [0 for _ in range(num_outputs)]
                expected[training_output[idx]] = 1  # one-hot encoding
                actual = self.feed_forward(row)
                errors = self.derivative_error_to_output(actual, expected)
                self.back_propagate(row, errors)
                sum_error += self.mse(actual, training_output)
            print(f"Mean squared error: {sum_error}")
            print(f"epoch={epoch}")

    def predict(self, inputs: List[float]) -> int:
        outputs = self.feed_forward(inputs)
        return outputs.index(max(outputs))

    def mse(self, actual: List[float], expected: List[float]) -> float:
        """
        Mean Squared Error formula
        """
        return sum((actual[i] - expected[i]) ** 2 for i in range(len(actual))) / len(
            actual
        )

In [9]:
def test_make_prediction_with_network():
    # Test making predictions with the network
    # Mock data is from https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/
    dataset = [
        [2.7810836, 2.550537003],
        [1.465489372, 2.362125076],
        [3.396561688, 4.400293529],
        [1.38807019, 1.850220317],
        [3.06407232, 3.005305973],
        [7.627531214, 2.759262235],
        [5.332441248, 2.088626775],
        [6.922596716, 1.77106367],
        [8.675418651, -0.242068655],
        [7.673756466, 3.508563011],
    ]
    expected = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    n_inputs = len(dataset[0])
    n_outputs = len(set(expected))
    hidden_layers = [
        Layer(
            neurons=[
                Neuron(weights=[random.random() for _ in range(n_inputs)], bias=random.random()),
                Neuron(weights=[random.random() for _ in range(n_inputs)], bias=random.random()),
            ],
        )
    ]
    output_layer = Layer(
        neurons=[
            Neuron(weights=[random.random() for _ in range(n_outputs)], bias=random.random()),
            Neuron(weights=[random.random() for _ in range(n_outputs)], bias=random.random()),
        ],
    )
    network = Network(
        hidden_layers=hidden_layers, output_layer=output_layer, learning_rate=0.5
    )
    network.train(40, n_outputs, dataset, expected)
    print(f"Hidden layer: {network.layers[0].neurons}")
    print(f"Output layer: {network.layers[1].neurons}")

    # This is just for demonstration only
    for i in range(len(dataset)):
        prediction = network.predict(dataset[i])
        print("Expected=%d, Got=%d" % (expected[i], prediction))


if __name__ == "__main__":
    test_make_prediction_with_network()

Mean squared error: 4.246857416591776
epoch=0
Mean squared error: 3.157097886930553
epoch=1
Mean squared error: 2.776520268796843
epoch=2
Mean squared error: 2.6494176465949275
epoch=3
Mean squared error: 2.6063303771640127
epoch=4
Mean squared error: 2.591541010399624
epoch=5
Mean squared error: 2.586370107079758
epoch=6
Mean squared error: 2.584464752967969
epoch=7
Mean squared error: 2.583648578883447
epoch=8
Mean squared error: 2.583168598425941
epoch=9
Mean squared error: 2.582755079784383
epoch=10
Mean squared error: 2.58229775935919
epoch=11
Mean squared error: 2.581731080819543
epoch=12
Mean squared error: 2.5809875148105283
epoch=13
Mean squared error: 2.579966734358143
epoch=14
Mean squared error: 2.578493041250705
epoch=15
Mean squared error: 2.576226357294675
epoch=16
Mean squared error: 2.572451023802584
epoch=17
Mean squared error: 2.5656427016275236
epoch=18
Mean squared error: 2.5539378778035595
epoch=19
Mean squared error: 2.5436556542262614
epoch=20
Mean squared error

# Building A Neural Network With Numpy

In [10]:
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, List

import numpy as np


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(z: float) -> float:
    return z * (1.0 - z)


@dataclass
class Layer:
    weights: np.array
    bias: np.array
    outputs: np.array
    deltas: np.array


@dataclass
class Network:
    layers: List[Layer]
    learning_rate: Optional[int] = 0.5

    @property
    def length(self) -> int:
        return len(self.layers)

    @property
    def outputs(self) -> np.array:
        return self.layers[-1].outputs

    @staticmethod
    def create(
        layers: List[int],
    ) -> Network:
        """
        Create a network with random weights and biases given a list of layers
        The "layers" is a list of the number of neurons in each layer
        """
        layers = [
            Layer(
                # layers[i] is the number of neurons in layer i (row), layers[i - 1] is the number of weights, matching with number of neurons in layer i - 1 (column)
                weights=np.random.rand(layers[i], layers[i - 1]),
                bias=np.random.rand(layers[i]),
                outputs=np.zeros(layers[i]),
                deltas=np.zeros(layers[i]),
            )
            for i in range(1, len(layers))
        ]

        return Network(layers=layers)

    def feed_forward(self, inputs: np.array) -> np.array:
        for layer in self.layers:
            # layer.outputs is a (3,1) - dimension we expect
            # layer.weights is a (3,2), inputs is a (2,1) - multiply to get (3,1)
            layer.outputs = sigmoid(layer.weights @ inputs + layer.bias)  # == np.matmul, https://stackoverflow.com/a/34142617
            inputs = layer.outputs
        return self.layers[-1].outputs

    def back_propagate(self, inputs: np.array, expected: np.array) -> None:
        for idx in reversed(range(self.length)):
            layer = self.layers[idx]
            if idx == len(self.layers) - 1:  # if last layer (output layer)
                layer.deltas = (layer.outputs - expected) * sigmoid_derivative(
                    layer.outputs
                )
            else:
                next_layer = self.layers[idx + 1]
                # layer.deltas is a (3,1) - the dimension we expect
                # next_layer.weights is a (2,3), next_layer.deltas is a (2,1)
                # need to transpose next_layer.weights to get (3,2) then multiply by next_layer.deltas (2,1) to get (3,1)
                layer.deltas = (
                    next_layer.weights.T @ next_layer.deltas
                    * sigmoid_derivative(layer.outputs)
                ) * sigmoid_derivative(layer.outputs)

        self.update_weights(inputs)

    def update_weights(self, inputs: np.array) -> None:
        for idx in range(self.length):
            layer = self.layers[idx]
            previous_layer_outputs = self.layers[idx - 1].outputs if idx > 0 else inputs
            # deltas (3,) -> deltas[np.newaxis] (1, 3) -> .T (3, 1)
            # previous_layer_outputs (2,) -> previous_layer_outputs[np.newaxis] (1, 2)
            # (3,1) @ (1,2) = (3,2) for weights
            layer.weights -= (
                layer.deltas[np.newaxis].T
                @ previous_layer_outputs[np.newaxis]
                * self.learning_rate
            )
            layer.bias -= layer.deltas * self.learning_rate

    def train(self, inputs: np.array, expected: np.array, epochs: int) -> None:
        for epoch in range(epochs):
            sum_error = 0.0
            for idx, row in enumerate(inputs):
                actual = self.feed_forward(row)
                self.back_propagate(row, expected[idx])
                sum_error += self.mse(actual, expected[idx])
            print(f"Mean squared error: {sum_error}")
            print(f"epoch={epoch}")

    def mse(self, actual: np.array, expected: np.array) -> float:
        return np.power(actual - expected, 2).mean()

    def predict(self, inputs: np.array) -> int:
        outputs = self.feed_forward(inputs)
        return np.where(outputs == outputs.max())[0][0]

In [11]:
def test_make_prediction_with_network():
    # Test making predictions with the network
    # Mock data is from https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/
    dataset = np.array(
        [
            [2.7810836, 2.550537003],
            [1.465489372, 2.362125076],
            [3.396561688, 4.400293529],
            [1.38807019, 1.850220317],
            [3.06407232, 3.005305973],
            [7.627531214, 2.759262235],
            [5.332441248, 2.088626775],
            [6.922596716, 1.77106367],
            [8.675418651, -0.242068655],
            [7.673756466, 3.508563011],
        ]
    )
    expected = np.array(
        [
            [1, 0],
            [1, 0],
            [1, 0],
            [1, 0],
            [1, 0],
            [0, 1],
            [0, 1],
            [0, 1],
            [0, 1],
            [0, 1],
        ]
    )
    # 2 input neurons, 3 hidden neurons, 2 output neurons
    network = Network.create([len(dataset[0]), 3, len(expected[0])])
    network.train(dataset, expected, 40)
    for i in range(len(dataset)):
        prediction = network.predict(dataset[i])
        print(
            f"{i} - Expected={np.where(expected[i] == expected[i].max())[0][0]}, Got={prediction}"
        )


if __name__ == "__main__":
    test_make_prediction_with_network()

Mean squared error: 3.578942020721095
epoch=0
Mean squared error: 3.0158318584631347
epoch=1
Mean squared error: 2.6781737181859366
epoch=2
Mean squared error: 2.6823987470746813
epoch=3
Mean squared error: 2.7161663842292865
epoch=4
Mean squared error: 2.7306571688562817
epoch=5
Mean squared error: 2.7357596877382626
epoch=6
Mean squared error: 2.737360976486021
epoch=7
Mean squared error: 2.737724005938362
epoch=8
Mean squared error: 2.737654816827882
epoch=9
Mean squared error: 2.7374345275320864
epoch=10
Mean squared error: 2.737160491228439
epoch=11
Mean squared error: 2.7368663096174157
epoch=12
Mean squared error: 2.736563535492673
epoch=13
Mean squared error: 2.7362561034099953
epoch=14
Mean squared error: 2.7359453157234777
epoch=15
Mean squared error: 2.7356315642600824
epoch=16
Mean squared error: 2.7353149245792587
epoch=17
Mean squared error: 2.734995360985862
epoch=18
Mean squared error: 2.734672797176156
epoch=19
Mean squared error: 2.7343471405034716
epoch=20
Mean squar

# Building A Neural Network With Tensorflow

In [12]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def build_model() -> tf.keras.Sequential:
    model = tf.keras.Sequential(
        [
            layers.Dense(units=3, activation="sigmoid", input_shape=(2,)),
            layers.Dense(units=2),
        ]
    )
    model.summary()
    loss_fn = keras.losses.MeanSquaredError()
    model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
    return model


if __name__ == "__main__":
    dataset = np.array(
        [
            [2.7810836, 2.550537003],
            [1.465489372, 2.362125076],
            [3.396561688, 4.400293529],
            [1.38807019, 1.850220317],
            [3.06407232, 3.005305973],
            [7.627531214, 2.759262235],
            [5.332441248, 2.088626775],
            [6.922596716, 1.77106367],
            [8.675418651, -0.242068655],
            [7.673756466, 3.508563011],
        ]
    )
    expected = np.array(
        [
            [1, 0],
            [1, 0],
            [1, 0],
            [1, 0],
            [1, 0],
            [0, 1],
            [0, 1],
            [0, 1],
            [0, 1],
            [0, 1],
        ]
    )
    model = build_model()
    # Convert the data to TensorFlow format
    dataset_tf = tf.constant(dataset, dtype=tf.float32)
    expected_tf = tf.constant(expected, dtype=tf.float32)

    # Train the model
    model.fit(dataset_tf, expected_tf, epochs=200)
    predictions = model.predict(dataset_tf)
    print(predictions)

    # Convert continuous predictions to class labels (0 or 1)
    class_predictions = np.argmax(predictions, axis=1)
    print(class_predictions)

    # Print the comparison
    for i, (expected_row, prediction) in enumerate(zip(expected, class_predictions)):
        print(f"{i} - Expected={expected_row.argmax()}, Got={prediction}")

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 3)                 9         
                                                                 
 dense_1 (Dense)             (None, 2)                 8         
                                                                 
Total params: 17 (68.00 Byte)
Trainable params: 17 (68.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/20