In [1]:
import numpy as np
import pandas as pd

In [2]:
# Neural Network Implementation with backpropagation
class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
        self.W = []
        self.alpha = alpha
        self.layers = layers
        
        # Weights initialization
        # Before last two layers
        for i in np.arange(0, len(layers) - 2):
            # initialize weights by including a bias trick
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[i]))
        
        # Between last two layers
        w = np.random.randn(layers[-2] + 1, layers[-1])
        self.W.append(w / np.sqrt(layers[-2]))
        
    # This method allow us to print the network architecture
    def __repr__(self):
        return "Neural Network: {} ".format("-".join(str(l) for l in self.layers))
    
    # Define your activation function and its derivative
    # Sigmoid activation function
    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(x))
    
    # Derivative of sigmoid function
    def sigmoid_deriv(self, x):
        return x * (1 - x)
    
    # Train a network using fit method
    def fit(self, X, y, epochs = 1000, displayUpdate=100):
        # Format the input features to comply with a bias trick by inserting a column of 1
        X = np.c_[X, np.ones((X.shape[0]))]
        
        # Loop over the desired number of epochs and insert data into the network for learning
        for epoch in np.arange(0, epochs):
            for (x, target) in zip(X, y):
                self.partial_fit(x, target)
            
            # Display a training update by looking at loss and epochs
            if epoch == 0 or (epoch + 1) % displayUpdate == 0:
                loss = self.calculate_loss(X, y)
                print("[INFO] epoch={}, loss = {}". format(epoch + 1, loss))
    
    # This method clculates the activation output of each layer and store them in activation list
    def partial_fit(self, x, y):
        #FORWARD PASS
        # Initialize activation list with the first entry which is the same as the input feature vector it self
        A = [np.atleast_2d(x)]
        
        # Loop over the network and find the activations
        for layer in np.arange(0, len(self.W)):
            # calculate the net input by taking activation of the current layer times weight matrix
            net = A[layer].dot(self.W[layer])
            
            # calculate net output by passing net input through sigmoid function
            out = self.sigmoid(net)
            
            # Insert net out put to activation list
            A.append(out)
        
        # BACKPROPAGATION
        # First compute an error (The diference between our prediction and true target)
        # Remember activation list hold predictions for each neuron and the last neuron (A[-1]) is the general prediction a network provide
        net_pred = A[-1]
        error = A[-1] - y
        
        # Apply a chain rule and build a list of deltas D
        #The first entry of our delta is simply an error times the derivative of activation function for the output value
        D = [ error * self.sigmoid_deriv(net_pred)]
#         print(D)
        
        # To find other deltas simply loop over the layers in reverse order
        for layer in np.arange(len(A) - 2, 0, -1):
            # The delta for the currnt network is equal to the delta of the previous layer dotted with the weight matrix of the current layer
            # Followed by multiplying delta by the derivative of our activation function for the activation of the current layer
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)
        
        # Reverse list of Deltas
        reversed_deltas = D[::-1]
        
        # WEIGHT UPDATE
        # loop over the layers and perform the update
        for layer in np.arange(0, len(self.W)):
            self.W[layer] += -self.alpha * A[layer].T.dot(reversed_deltas[layer])
#         print('Activation List from feedforward')
#         print('')
#         print(A)
#         print('')
#         print('List of deltas')        
#         print(D)
#         print('')
#         print('Revesed Deltas')
#         print('')
#         print(reversed_deltas)
#         print('')
#         print('Updated weights')
#         print('')
#         print(self.W)

      # Predict method to give out predictions
    def predict(self, X, addBias=True):
        # While prediction initialize the output prediction as the input features, to help with forward pass to obtain the final prediction
        p = np.atleast_2d(X)
        
        # Check to see if we should add a bias column
        if addBias:
            # insert a columns of 1's to the input feature to reflect the bias trick
            p = np.c_[p, np.ones((p.shape[0]))]
            
            # Loop over the network layers
            for layer in np.arange(0, len(self.W)):
                # compute the prediction by taking the dot product btn the current activation value p and the weight matrix associated 
                # with the current layer, then pass this value through a nonlinear activation function (sigmoid function)
                p = self.sigmoid(np.dot(p, self.W[layer]))
        
        # return the predicted value
        return p
        
    def calculate_loss(self, X, targets):
        # Make predictions for the input data points then compute the loss
        targets = np.atleast_2d(targets)
        predictions = self.predict(X, addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)
        
        return loss

In [3]:

# Construct the 'XOR' dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Train the NN
print('[INFO]: Training....')
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
nn.fit(X, y, epochs=20000)

# Test the NN
print('[INFO]: Testing....')

# Loop over the data points
for (x, target) in zip(X, y):
    # Make a prediction and display the result
    pred = nn.predict(x)[0][0]
    step = 1 if pred > 0.5 else 0
    print('[INFO]: Data={}, Ground Truth={}, Prediction={:.4f}, Step={}'.format(x, target[0], pred, step))

[INFO]: Training....
[INFO] epoch=1, loss = 3.0
[INFO] epoch=100, loss = 3.0
[INFO] epoch=200, loss = 3.0
[INFO] epoch=300, loss = 3.0
[INFO] epoch=400, loss = 3.0
[INFO] epoch=500, loss = 3.0
[INFO] epoch=600, loss = 3.0
[INFO] epoch=700, loss = 3.0
[INFO] epoch=800, loss = 3.0
[INFO] epoch=900, loss = 3.0
[INFO] epoch=1000, loss = 3.0
[INFO] epoch=1100, loss = 3.0
[INFO] epoch=1200, loss = 3.0
[INFO] epoch=1300, loss = 3.0
[INFO] epoch=1400, loss = 3.0
[INFO] epoch=1500, loss = 3.0
[INFO] epoch=1600, loss = 3.0
[INFO] epoch=1700, loss = 3.0
[INFO] epoch=1800, loss = 3.0
[INFO] epoch=1900, loss = 3.0
[INFO] epoch=2000, loss = 3.0
[INFO] epoch=2100, loss = 3.0
[INFO] epoch=2200, loss = 3.0
[INFO] epoch=2300, loss = 3.0
[INFO] epoch=2400, loss = 3.0
[INFO] epoch=2500, loss = 3.0
[INFO] epoch=2600, loss = 3.0
[INFO] epoch=2700, loss = 3.0
[INFO] epoch=2800, loss = 3.0
[INFO] epoch=2900, loss = 3.0
[INFO] epoch=3000, loss = 3.0
[INFO] epoch=3100, loss = 3.0
[INFO] epoch=3200, loss = 3.0
[