In [1]:
import sys
import numpy as np
from random import random

# Code from: https://www.youtube.com/watch?v=0oWnheK-gGk and https://www.youtube.com/watch?v=Z97XGNUUx9o

In [2]:
class MLP:

    def __init__(self, num_inputs=3, num_hidden=[3, 5], num_outputs=2):
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs

        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]

        # Initiate random weights.
        weights = []

        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i+1])
            weights.append(w)

        self.weights = weights

        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)

        self.activations = activations

        derivatives = []
        for i in range(len(layers) - 1): # Same number of weight matrices.
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)

        self.derivatives = derivatives


    def forward_propagate(self, inputs):
        activations = inputs
        self.activations[0] = inputs

        for i, w in enumerate(self.weights):
            # Calculate net inputs.
            net_inputs = np.dot(activations, w)
            
            # Calculate the activateions.
            activations = self._sigmoid(net_inputs)
            self.activations[i+1] = activations

        return activations

    def back_propagate(self, error, verbose=False):

        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            delta = error * self._sigmoid_derivative(activations)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activation = self.activations[i]
            current_activation_reshaped = current_activation.reshape(current_activation.shape[0], -1)

            self.derivatives[i] = np.dot(current_activation_reshaped, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)

            if verbose:
                print(f"Derivatives for W{i}: {self.derivatives[i]}")

        return error

    def gradient_descent(self, learning_rate):

        for i in range(len(self.weights)):
            # weights = self.weights[i]
            # derivatives = self.derivatives[i]
            self.weights[i] += self.derivatives[i] * learning_rate

    def train(self, inputs, targets, epochs, learning_rate):
        
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs, targets):
                # Perform forward prop.
                output = self.forward_propagate(input)

                # Calculate error.
                error = target - output

                self.back_propagate(error)#, verbose=True)

                # Apply Gradient Descent
                self.gradient_descent(learning_rate)

                sum_error += self._mse(target, output)
                        
            # Report error.
            print(f"Error {sum_error/len(inputs)} at epoch {i}")

    def _mse(self, target, output):
        return np.average((target - output)**2)

    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [12]:
# Create an MLP.
mlp = MLP(2, [5], 1)

# Create a dataset to train a network for the sum operation.
#inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
#targets = np.array([[i[0] + i[1]] for i in inputs])

inputs = np.array([[0,0], [0,1], [1,0], [1,1]])
targets = np.array([0,0,0,1])

# Train our model.
mlp.train(inputs, targets, 30, 0.01)
result = mlp.forward_propagate(np.array([1,1]))
print(result)
print(int(np.round(result)))

Error 0.4592583894673798 at epoch 0
Error 0.45777400027178955 at epoch 1
Error 0.4562852525265416 at epoch 2
Error 0.4547922537147543 at epoch 3
Error 0.45329511365883834 at epoch 4
Error 0.45179394450277505 at epoch 5
Error 0.4502888606922135 at epoch 6
Error 0.4487799789523597 at epoch 7
Error 0.4472674182636368 at epoch 8
Error 0.4457512998350922 at epoch 9
Error 0.44423174707553476 at epoch 10
Error 0.4427088855623881 at epoch 11
Error 0.4411828430082481 at epoch 12
Error 0.4396537492251373 at epoch 13
Error 0.4381217360864514 at epoch 14
Error 0.4365869374865975 at epoch 15
Error 0.4350494892983291 at epoch 16
Error 0.43350952932778164 at epoch 17
Error 0.43196719726722355 at epoch 18
Error 0.43042263464553454 at epoch 19
Error 0.42887598477643435 at epoch 20
Error 0.4273273927044812 at epoch 21
Error 0.4257770051488714 at epoch 22
Error 0.42422497044506935 at epoch 23
Error 0.422671438484307 at epoch 24
Error 0.4211165606509892 at epoch 25
Error 0.41956048975805554 at epoch 26
Er