In [3]:
import numpy as np
import pandas as pd
import itertools as its
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

def tanh(x):
    return (1.0 - np.exp(-2 * x)) / (1.0 + np.exp(-2 * x))

def tanh_derivative(x):
    return (1 + tanh(x)) * (1 - tanh(x))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return 1 * (x > 0)

activation_functions = {
    'tanh': tanh,
    'sigmoid': sigmoid,
    'relu': relu
}

activation_functions_derivatives = {
    'tanh': tanh_derivative,
    'sigmoid': sigmoid_derivative,
    'relu': relu_derivative
}

class MLP:
    def __init__(self, network_config, functions):
        self.layers = len(network_config)
        self.steps_per_epoch = 1
        self.network_config = network_config
        self.activity = []
        self.activity_derivative = []
        self.weights = []

        for layer in range(self.layers - 1):
            self.activity.append(activation_functions[functions[layer]])
            self.activity_derivative.append(activation_functions_derivatives[functions[layer]])
            w = np.random.rand(network_config[layer] + 1, network_config[layer + 1]) - 1
            w *= np.sqrt(1 / network_config[layer] + network_config[layer + 1])
            self.weights.append(w)
    
    def feed_forward(self, x):
        y = x

        for i in range(len(self.weights) - 1):
            activation = np.dot(y[i], self.weights[i])
            activity = self.activity[i](activation)
            activity = np.concatenate((np.ones(1), np.array(activity)))
            y.append(activity)

        activation = np.dot(y[-1], self.weights[-1])
        activity = self.activity[-1](activation)
        y.append(activity)
        return y
    
    def back_propagation(self, y, target, learning_rate):
        error = target - y[-1]
        delta_vector = [error * self.activity_derivative[-1](y[-1])]
        for i in range(self.layers-2, 0, -1):
            error = np.dot(delta_vector[-1], self.weights[i][1:].T)
            error = error * self.activity_derivative[i](y[i][1:])
            delta_vector.append(error)

        delta_vector.reverse()
        for i in range(len(self.weights)):
            layer = y[i].reshape(1, self.network_config[i] + 1)
            delta = delta_vector[i].reshape(1, self.network_config[i + 1])
            self.weights[i] += learning_rate * np.dot(layer.T, delta)
    
    def training(self, data, labels, learning_rate=0.1, epochs=100):
        ones = np.ones((1, data.shape[0]))
        Z = np.concatenate((ones.T, data), axis=1)
        
        for k in range(epochs):
            if (k+1) % 10000 == 0:
                print('epochs: {}'.format(k+1))
        
            sample = np.random.randint(X.shape[0])
            x = [Z[sample]]
            y = self.feed_forward(x)
            target = labels[sample]
            self.back_propagation(y, target, learning_rate)
    
    def predict_single_input(self, x):
        val = np.concatenate((np.ones(1).T, np.array(x)))
        for i in range(0, len(self.weights)):
            val = self.activity[i](np.dot(val, self.weights[i]))
            val = np.concatenate((np.ones(1).T, np.array(val)))
        return val[1]
    
    def predict(self, X):
        Y = np.array([]).reshape(0, self.arch[-1])
        for x in X:
            y = np.array([[self.predict_single_input(x)]])
            Y = np.vstack((Y,y))
        return Y

    
    

# Initialize the NeuralNetwork with
# 2 input neurons
# 2 hidden neurons
# 1 output neuron
nn = MLP([2,2,1], ['sigmoid','sigmoid','sigmoid'])

# Set the input data
X = np.array([[0, 0], [0, 1],
                [1, 0], [1, 1]])

# Set the labels, the correct results for the xor operation
y = np.array([0, 1, 1, 0])

# Call the fit function and train the network for a chosen number of epochs
nn.training(X, y, epochs=100000)

# Show the prediction results
print("Final prediction")
for s in X:
    print(s, nn.predict_single_input(s))



epochs: 10000
epochs: 20000
epochs: 30000
epochs: 40000
epochs: 50000
epochs: 60000
epochs: 70000
epochs: 80000
epochs: 90000
epochs: 100000
Final prediction
[0 0] 0.023708287000529543
[0 1] 0.9742659390065683
[1 0] 0.9743686125675753
[1 1] 0.03207848300337047
