# 1 Importing Necessary Libraries
Before we begin, we need to import the necessary Python libraries that will help us in matrix operations and other mathematical functions.

In [134]:
import numpy as np

# 2 Defining the Layers of the Neural Network


## 2.1 Base Layer Class
Every layer in our neural network will have a forward and backward propagation method. The base layer serves as a template for these methods.

In [135]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

## 2.2 Fully Connected Layer
A fully connected layer connects every neuron from the previous layer to every neuron in the current layer. It's the most basic type of layer in a neural network.

In [137]:
# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

## 2.3 Activation Layer
Neural networks require non-linearities to capture complex patterns. Activation functions introduce these non-linearities.

In [138]:
# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

# 3 Activation and Loss Functions Definitions
Activation functions introduce non-linearity into the network. In this implementation, the hyperbolic tangent (tanh) function is utilized due to its zero-centered output, which can help speed up convergence during training. Additionally, the Mean Squared Error (MSE) is employed as the loss function. It measures the average squared difference between the actual and predicted values, making it suitable for regression problems.


In [139]:
# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

# 4 Constructing the Neural Network
The `Network` class represents the structure and functionality of a neural network. Upon initialization, it sets up an empty list for layers and placeholders for the loss function and its derivative. The `add` method allows for the addition of layers to the network, while the `use` method sets the loss function and its derivative to be used during training. The `predict` method performs forward propagation through the network for a given input, returning the network's output. The `fit` method trains the network using the provided training data. During training, the method iterates over the specified number of epochs, performing forward propagation to compute the network's output, calculating the loss, and then executing backward propagation to update the weights and biases of the layers. After each epoch, the average error across all samples is displayed.



In [140]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

# 5 Training Data for XOR
The XOR function is a fundamental example in neural networks, often used to demonstrate their capabilities.

In [141]:
import numpy as np

# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

epoch 1/1000   error=0.404805
epoch 2/1000   error=0.318238
epoch 3/1000   error=0.303132
epoch 4/1000   error=0.298310
epoch 5/1000   error=0.296080
epoch 6/1000   error=0.294760
epoch 7/1000   error=0.293835
epoch 8/1000   error=0.293113
epoch 9/1000   error=0.292507
epoch 10/1000   error=0.291977
epoch 11/1000   error=0.291501
epoch 12/1000   error=0.291068
epoch 13/1000   error=0.290670
epoch 14/1000   error=0.290302
epoch 15/1000   error=0.289962
epoch 16/1000   error=0.289645
epoch 17/1000   error=0.289352
epoch 18/1000   error=0.289078
epoch 19/1000   error=0.288824
epoch 20/1000   error=0.288587
epoch 21/1000   error=0.288365
epoch 22/1000   error=0.288158
epoch 23/1000   error=0.287964
epoch 24/1000   error=0.287782
epoch 25/1000   error=0.287610
epoch 26/1000   error=0.287447
epoch 27/1000   error=0.287293
epoch 28/1000   error=0.287145
epoch 29/1000   error=0.287004
epoch 30/1000   error=0.286867
epoch 31/1000   error=0.286734
epoch 32/1000   error=0.286604
epoch 33/1000   e

# 6 Evaluating the Results
After training, it's essential to evaluate how well our network has learned.

In [142]:
# test
out = net.predict(x_train)

# Convert continuous outputs to binary outputs
binary_out = [1 if o[0] > 0.5 else 0 for o in out]

print(binary_out)


[0, 1, 1, 0]
