In [0]:
import matplotlib.pyplot as plt 
import numpy as np 
np.set_printoptions(suppress=True)
import math 

### Plotting parameters for the looks - Not neural network related ###
plt.rcParams['figure.figsize'] = [18, 9]
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.labelcolor'] = 'w'
plt.rcParams['axes.titlecolor'] = 'w'
plt.rcParams['axes.titlesize'] = 20 
plt.rcParams['legend.fontsize'] = 18

In [0]:
### Activation Function ###
# Used to introduce nonlinearity in the outputs
# Without this, the network won't be able to learn other functions than linear ones - linear regression that is
# Used in each layer of the network, taking as input the weighted sum of the inputs computed in the layer
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [0]:
x = np.linspace(-6, 6, 100) # evenly spaced 100 numbers over a [-6, 6] interval
sigmoid_plot_data = sigmoid(x)  # applies sigmoid function over x vector
sigmoid_derivative_plot_data = sigmoid_derivative(x) # same but with the derivative of sigmoid

### Plotting section ###
fig = plt.figure()
ax = fig.add_subplot(111)

ax.plot(x, sigmoid_plot_data)
ax.plot(x, sigmoid_derivative_plot_data)
ax.grid()
ax.set_xlabel('x')
ax.set_ylabel('f (x)')
ax.legend(['Sigmoid', 'Sigmoid derivative'])
ax.tick_params(axis='both', colors='w', labelsize='large')

plt.show()

In [0]:
class NeuralNetwork:

    def __init__(self, x, y):

        # Used to reproduce predictable results
        np.random.seed(6)
        
        self.input      = x     # Input layer - Placeholder
        self.y          = y     # Desired output - Placeholder
        self.weights1   = np.random.rand(self.input.shape[1], self.input.shape[0])      # Weights for the hidden layer - Randomly initialized
        self.weights2   = np.random.rand(self.y.shape[0], self.y.shape[1])              # Eights for the output layer - Randomly initialized
                                                                                        # Tip: look at the shape of the weights - see how they match considering
                                                                                        # matrix multiplication (done in feedforward)
        self.output     = np.zeros(y.shape)     # Network predicted output - Initialized with 0

        # List used to save loss values for plotting
        self.history    = list()
    
    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))    # Computing the output of the hidden layer
        self.output = sigmoid(np.dot(self.layer1, self.weights2))   # Computing the output of the output layer 
                                                                    # This is the network output, which should be as close to the real one (y) as possible

        self.history.append(self.loss())    # Append loss value to the history list for plotting purposes

    def backprop(self):

        # we can’t directly calculate the derivative of the loss function with respect to the weights and biases because the equation of the loss function
        # does not contain the weights and biases. Therefore, we need the chain rule to help us calculate it.

        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # update the weights with the derivative (slope) of the loss function - Gradient Descent
        # This is the step where the network actually learns
        self.weights1 += d_weights1
        self.weights2 += d_weights2

    # Sum of squares error
    # The function which shows us how well the network performs: the lower the loss score the better the network predicts
    def loss(self):
        sum = 0
        for label, pred in zip(self.y, self.output):
            sum += (label - pred) ** 2
        return sum[0]

In [0]:
### Dataset ###

### Function to learn: XOR 3 inputs

# Train Data - based on which the prediction is done
input_array = np.asarray(
    [[0, 0, 0],
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 1],
    [1, 0, 0],
    [1, 0, 1],
    [1, 1, 0],
    [1, 1, 1]]
    )

# Train Labels - the desired outputs that our network has to learn
output_array = np.asarray(
    [[0],
     [1],
     [1],
     [0],
     [1],
     [0],
     [0],
     [1]]
    )

In [0]:
network = NeuralNetwork(input_array, output_array)
print(network.loss())

### Training the network ###

epochs = 10000  # Number of learning cycles -> 10000 * (feedforward + backpropagation)
threshold_value = 0.001 # Stop the learning when the loss score is under this threshold
                        # Not a necessary parameter to the training, just a feature

# Training loop
for epoch in range(epochs):
    network.feedforward()
    network.backprop()
    if network.loss() <= threshold_value:
        last_epoch = epoch
        break

print(network.output)
print(network.loss())