In [None]:
import numpy as np  

class MLP(object):
    def __init__(self, num_inputs=3, hidden_layers=[4,3,6,3], num_outputs=4):
        self.num_inputs=num_inputs
        self.hidden_layers=hidden_layers
        self.num_outputs=num_outputs

        layers=[num_inputs] + hidden_layers + [num_outputs]
        
        # create random connection weights for the layers
        weights = []
        for i in range(len(layers)-1):
            w = np.random.randn(layers[i], layers[i+1]) * 0.01
            weights.append(w)
        self.weights=weights

        # save derivatives per layer
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # save activations per layer
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations

    def forward_propagation(self, inputs):
        activations = inputs
           # save the activations for backpropogation
        self.activations[0] = activations
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)

            # apply sigmoid activation function
            activations = self._sigmoid(net_inputs)

            # save the activations for backpropogation
            self.activations[i + 1] = activations
        return activations
    
    
    def backward_propagation(self,error):
        for i in reversed(range(len(self.derivatives))):
            # get activation for previous layer
            activations = self.activations[i+1]

            # apply sigmoid derivative function
            delta = error * self._sigmoid_derivative(activations)

            # reshape delta as to have it as a 2d array
            delta_re = delta.reshape(delta.shape[0], -1).T

            # get activations for current layer
            current_activations = self.activations[i]

            # reshape activations as to have them as a 2d column matrix
            current_activations = current_activations.reshape(current_activations.shape[0],-1)

            # save derivative after applying matrix multiplication
            self.derivatives[i] = np.dot(current_activations, delta_re)

            # backpropogate the next error
            error = np.dot(delta, self.weights[i].T)

    def train(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            sum_errors = 0

            # iterate through all the training data
            for j, input in enumerate(inputs):
                target = targets[j]

                # activate the network!
                output = self.forward_propagation(input)

                error = target - output

                self.backward_propagation(error)

                # now perform gradient descent on the derivatives
                # (this will update the weights
                self.gradient_descent(learning_rate)

                # keep track of the MSE for reporting later
                sum_errors += self._mse(target, output)

            # Epoch complete, report the training error
            print("Error: {} at epoch {}".format(sum_errors / len(items), i+1))

        print("Training complete!")
        print("=====")

    def gradient_descent(self, learning_rate=1):
         for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learning_rate

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)

    def _mse(self, target, output):
        return np.average((target - output) ** 2)

if __name__ == "__main__":

    # create a dataset to train a network for the sum operation
    items = np.array([[np.random.random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in items])

    # create a Multilayer Perceptron with one hidden layer
    mlp = MLP(2, [5], 1)

    # train network
    mlp.train(items, targets, 50, 0.1)

    # create dummy data
    input = np.array([0.3, 0.1])
    target = np.array([0.4])

    # get a prediction
    output = mlp.forward_propagation(input)

    print()
    print("Our network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))

Error: 0.04064519861341145 at epoch 1
Error: 0.040704429839658524 at epoch 2
Error: 0.04070370818589449 at epoch 3
Error: 0.04070284808519036 at epoch 4
Error: 0.04070178905728237 at epoch 5
Error: 0.04070041406349232 at epoch 6
Error: 0.04069856554503865 at epoch 7
Error: 0.040696025200426006 at epoch 8
Error: 0.04069248600651488 at epoch 9
Error: 0.04068751296482714 at epoch 10
Error: 0.040680487663764434 at epoch 11
Error: 0.04067052981054716 at epoch 12
Error: 0.04065638621735752 at epoch 13
Error: 0.040636274132903515 at epoch 14
Error: 0.04060766113691006 at epoch 15
Error: 0.040566958070604675 at epoch 16
Error: 0.04050909512963884 at epoch 17
Error: 0.0404269458267893 at epoch 18
Error: 0.04031056278346121 at epoch 19
Error: 0.04014620119802263 at epoch 20
Error: 0.03991514557035348 at epoch 21
Error: 0.03959244846250913 at epoch 22
Error: 0.039145872168651605 at epoch 23
Error: 0.038535624816065935 at epoch 24
Error: 0.03771587380949776 at epoch 25
Error: 0.036639310349947454 