In [1]:
import random
import math


class Neuron():
    '''
        A conceptual Neuron hat can be trained using a 
        fit and predict methodology, without any library
    '''
    
    def __init__(self, position_in_layer, is_output_neuron=False, is_sigmoid=True):
        self.weights = []
        self.inputs = []
        self.output = None
        
        # This is used for the backpropagation update
        self.updated_weights = []
        # This is used to know how to update the weights
        self.is_output_neuron = is_output_neuron
        # This delta is used for the update at the backpropagation
        self.delta = None
        # This is used for the backpropagation update
        self.position_in_layer = position_in_layer 
        self.is_sigmoid = is_sigmoid
        
    def attach_to_output(self, neurons):
        '''
            Helper function to store the reference of the other neurons
            To this particular neuron (used for backpropagation)
        '''
        
        self.output_neurons = neurons
    
    def activation(self, x):
        if self.is_sigmoid:
            return self.sigmoid(x)
        else:
            return self.Linear(x)

    def activationDerive(self, x):
        if self.is_sigmoid:
            return self.sigmoidDerive(x)
        else:
            return self.LinearDerive(x)
    
    def sigmoid(self, x):
        '''
            simple sigmoid function (logistic) used for the activation
        '''
        return 1 / (1 + math.exp(-x))

    def sigmoidDerive(self, x):
        '''
            simple sigmoid function (logistic) used for the activation
        '''
        return x*(1-x)

    def Linear(self, x):
        '''
            simple sigmoid function (logistic) used for the activation
        '''
        return x

    def LinearDerive(self, x):
        '''
            simple sigmoid function (logistic) used for the activation
        '''
        return 1
    
    def init_weights(self, num_input):
        '''
            This is used to setup the weights when we know how many inputs there is for
            a given neuron
        '''
        
        # Randomly initalize the weights
        for i in range(num_input+1):
            self.weights.append(random.uniform(0,1))
        
    def predict(self, row):
        '''
            Given a row of data it will predict what the output should be for
            this given neuron. We can have many input, but only one output for a neuron
        '''
        
        # Reset the inputs
        self.inputs = []
        
        # We iterate over the weights and the features in the given row
        activation = 0
        for weight, feature in zip(self.weights, row):
            self.inputs.append(feature)
            activation = activation + weight*feature
            
        
        self.output = self.activation(activation)
        return self.output
    
        
            
    def update_neuron(self):
        '''
            Will update a given neuron weights by replacing the current weights
            with those used during the backpropagation. This need to be done at the end of the
            backpropagation
        '''
        
        self.weights = []
        for new_weight in self.updated_weights:
            self.weights.append(new_weight)
    
    def calculate_update(self, learning_rate, target):
        '''
            This function will calculate the updated weights for this neuron. It will first calculate
            the right delta (depending if this neuron is a ouput or a hidden neuron), then it will
            calculate the right updated_weights. It will not overwrite the weights yet as they are needed
            for other update in the backpropagation algorithm.
        '''
        derive = self.activationDerive(self.output)
        if self.is_output_neuron:
            # Calculate the delta for the output
            self.delta = (self.output - target)*derive
        else:
            # Calculate the delta
            delta_sum = 0
            # this is to know which weights this neuron is contributing in the output layer
            cur_weight_index = self.position_in_layer 
            for output_neuron in self.output_neurons:
                delta_sum = delta_sum + (output_neuron.delta * output_neuron.weights[cur_weight_index])

            # Update this neuron delta
            self.delta = delta_sum*derive
            
            
        # Reset the update weights
        self.updated_weights = []
        
        # Iterate over each weight and update them
        for cur_weight, cur_input in zip(self.weights, self.inputs):
            gradient = self.delta*cur_input
            new_weight = cur_weight - learning_rate*gradient
            self.updated_weights.append(new_weight)

    def toString(self):
        print(self.weights)
         
class Layer():
    '''
        Layer is modelizing a layer in the fully-connected-feedforward neural network architecture.
        It will play the role of connecting everything together inside and will be doing the backpropagation 
        update.
    '''
    
    def __init__(self, num_neuron, is_output_layer = False, is_sigmoid=True):
        
        # Will create that much neurons in this layer
        self.is_output_layer = is_output_layer
        self.neurons = []
        for i in range(num_neuron):
            # Create neuron
            neuron = Neuron(i,  is_output_neuron=is_output_layer, is_sigmoid=is_sigmoid)
            self.neurons.append(neuron)
    
    def attach(self, layer):
        '''
            This function attach the neurons from this layer to another one
            This is needed for the backpropagation algorithm
        '''
        # Iterate over the neurons in the current layer and attach 
        # them to the next layer
        for in_neuron in self.neurons:
            in_neuron.attach_to_output(layer.neurons)
            
    def init_layer(self, num_input):
        '''
            This will initialize the weights of each neuron in the layer.
            By giving the right num_input it will spawn the right number of weights
        '''
        
        # Iterate over each of the neuron and initialize
        # the weights that connect with the previous layer
        for neuron in self.neurons:
            neuron.init_weights(num_input)
    
    def predict(self, row):
        '''
            This will calcualte the activations for the full layer given the row of data 
            streaming in.
        '''
        rowClone = row.copy()
        rowClone.append(1) # need to add the bias
        activations = [neuron.predict(rowClone) for neuron in self.neurons]
        return activations

    def toString(self):
        for neuron in self.neurons:
            neuron.toString(); 
        
class MultiLayerPerceptron():
    '''
        We will be creating the multi-layer perceptron with only two layer:
        an input layer, a perceptrons layer and a one neuron output layer which does binary classification
    '''
    def __init__(self, learning_rate = 0.01, num_iteration = 100, is_Classifier=True):
        
        random.seed(5)
        # Layers
        self.layers = []
                
        # Training parameters
        self.learning_rate = learning_rate
        self.num_iteration = num_iteration
        self.is_Classifier = is_Classifier
        
        
    def add_output_layer(self, num_neuron, is_sigmoid=True):
        '''
            This helper function will create a new output layer and add it to the architecture
        '''
        self.layers.insert(0, Layer(num_neuron, is_output_layer = True, is_sigmoid=is_sigmoid))
    
    def add_hidden_layer(self, num_neuron):
        '''
            This helper function will create a new hidden layer, add it to the architecture
            and finally attach it to the front of the architecture
        '''
        # Create an hidden layer
        hidden_layer = Layer(num_neuron)
        # Attach the last added layer to this new layer
        hidden_layer.attach(self.layers[0])
        # Add this layers to the architecture
        self.layers.insert(0, hidden_layer)
        
    def update_layers(self, target):
        '''
            Will update all the layers by calculating the updated weights and then updating 
            the weights all at once when the new weights are found.
        '''
        # Iterate over each of the layer in reverse order
        # to calculate the updated weights
        for layer in reversed(self.layers):
                           
            # Calculate update the hidden layer
            for neuron in layer.neurons:
                neuron.calculate_update(self.learning_rate, target)  
        
        # Iterate over each of the layer in normal order
        # to update the weights
        for layer in self.layers:
            for neuron in layer.neurons:
                neuron.update_neuron()
    
    def fit(self, X, y):
        '''
            Main training function of the neural network algorithm. This will make use of backpropagation.
            It will use stochastic gradient descent by selecting one row at random from the dataset and 
            use predict to calculate the error. The error will then be backpropagated and new weights calculated.
            Once all the new weights are calculated, the whole network weights will be updated
        '''

        
        num_row = len(X)
        num_feature = len(X[0]) # Here we assume that we have a rectangular matrix
        
        # Init the weights throughout each of the layer
        self.layers[0].init_layer(num_feature)
        
        for i in range(1, len(self.layers)):
            num_input = len(self.layers[i-1].neurons)
            self.layers[i].init_layer(num_input)
        
        self.toString()

        # Launch the training algorithm
        for i in range(self.num_iteration):
            
            # Stochastic Gradient Descent
            r_i = random.randint(0,num_row-1)
            row = X[r_i] # take the random sample from the dataset
            yhat = self.predict(row)
            target = y[r_i]
            
            # Update the layers using backpropagation   
            self.update_layers(target)
            
            # At every 100 iteration we calculate the error
            # on the whole training set
            if i % 10000 == 0:
                total_error = 0

                for r_i in range(num_row):
                    row = X[r_i]
                    yhat = self.predict(row)
                    error = (y[r_i] - yhat)
                    total_error = total_error + error**2
                mean_error = total_error/num_row
                print(f"Iteration {i} with error = {mean_error}")
                
        self.toString()
        
    
    def predict(self, row):
        '''
            Prediction function that will take a row of input and give back the output
            of the whole neural network.
        '''
        
        # Gather all the activation in the hidden layer
        
        activations = self.layers[0].predict(row)
        for i in range(1, len(self.layers)):
            activations = self.layers[i].predict(activations)

        outputs = []

        for activation in activations:
            if self.is_Classifier :
                # Decide if we output a 1 or 0
                if activation >= 0.5:
                    outputs.append(1.0)
                else:
                    outputs.append(0.0)
            else:
                outputs.append(activation)

                           
        # We currently have only One output allowed
        return outputs[0]

    def toString(self):
        for i, layer in enumerate(self.layers):
            print("Layer"+str(i))
            layer.toString()


In [2]:
# XOR function (one or the other but not both)
X = [[0,0], [0,1], [1,0], [1,1]]
y = [0, 1, 1, 0]

# Init the parameters for the network
clf = MultiLayerPerceptron(learning_rate = 0.5, num_iteration = 100000)
# Create the architecture backward
clf.add_output_layer(num_neuron = 1)
#clf.add_hidden_layer(num_neuron = 3)
clf.add_hidden_layer(num_neuron = 2)
# Train the network
clf.fit(X,y)

Layer0
[0.6229016948897019, 0.7417869892607294, 0.7951935655656966]
[0.9424502837770503, 0.7398985747399307, 0.922324996665417]
Layer1
[0.029005228283614737, 0.46562265437810535, 0.9433567169983137]
Iteration 0 with error = 0.5
Iteration 10000 with error = 0.0
Iteration 20000 with error = 0.0
Iteration 30000 with error = 0.0
Iteration 40000 with error = 0.0
Iteration 50000 with error = 0.0
Iteration 60000 with error = 0.0
Iteration 70000 with error = 0.0
Iteration 80000 with error = 0.0
Iteration 90000 with error = 0.0
Layer0
[4.894069078311957, 4.902513295574223, -7.51599992979117]
[6.804556223570668, 6.832792275230364, -3.0754123531554205]
Layer1
[-11.244196642530676, 10.52821968186535, -4.909593041931376]


In [3]:
print("Expected 0.0, got: ",clf.predict([0,0]))
print("Expected 1.0, got: ",clf.predict([0,1]))
print("Expected 1.0, got: ",clf.predict([1,0]))
print("Expected 0.0, got: ",clf.predict([1,1]))


Expected 0.0, got:  0.0
Expected 1.0, got:  1.0
Expected 1.0, got:  1.0
Expected 0.0, got:  0.0


In [4]:
import numpy as np
# 1. Creation of test data set

X = [
      [1],
      [2],
      [3]
]
Y = [
      2,
      3,
      2.5
]


# Init the parameters for the network
clf = MultiLayerPerceptron(learning_rate = 0.05, num_iteration = 100000, is_Classifier = False)
# Create the architecture backward
clf.add_output_layer(num_neuron = 1, is_sigmoid=False)
clf.add_hidden_layer(num_neuron = 5)
# Train the network
clf.fit(X,Y)


Layer0
[0.6229016948897019, 0.7417869892607294]
[0.7951935655656966, 0.9424502837770503]
[0.7398985747399307, 0.922324996665417]
[0.029005228283614737, 0.46562265437810535]
[0.9433567169983137, 0.6489745531369242]
Layer1
[0.9009004917506227, 0.11320596465314436, 0.46906904778216374, 0.24657283261983032, 0.5437608592359304, 0.5739411879281008]
Iteration 0 with error = 0.12512867255686413
Iteration 10000 with error = 0.0508024048067219
Iteration 20000 with error = 0.0005740217020324812
Iteration 30000 with error = 7.019788673630293e-08
Iteration 40000 with error = 5.616885089795582e-12
Iteration 50000 with error = 6.311150612722709e-16
Iteration 60000 with error = 7.215746722103003e-20
Iteration 70000 with error = 8.416529298172356e-24
Iteration 80000 with error = 7.192439303352575e-28
Iteration 90000 with error = 1.0518145402946823e-30
Layer0
[2.8245779778904674, -2.3878341052405223]
[0.47721395054360394, 0.2380056219798162]
[1.7017119033945256, 0.42518754329611186]
[0.9136638742670695,

In [5]:
print("Expected 2, got: ",clf.predict([1]))
print("Expected 3, got: ",clf.predict([2]))
print("Expected 2.5, got: ",clf.predict([3]))

Expected 2, got:  1.9999999999999998
Expected 3, got:  2.999999999999999
Expected 2.5, got:  2.500000000000001


In [15]:
# 1. Creation of test data set

X = [
      [1, 1],
      [2, 2],
      [3, 1]
]
Y = [
      2,
      3,
      2.5
]

# Init the parameters for the network
clf = MultiLayerPerceptron(learning_rate = 0.1, num_iteration = 100000, is_Classifier = False)
# Create the architecture backward
clf.add_output_layer(num_neuron = 1, is_sigmoid=False)
clf.add_hidden_layer(num_neuron = 10)
# Train the network
clf.fit(X,Y)

Layer0
[0.6229016948897019, 0.7417869892607294, 0.7951935655656966]
[0.9424502837770503, 0.7398985747399307, 0.922324996665417]
[0.029005228283614737, 0.46562265437810535, 0.9433567169983137]
[0.6489745531369242, 0.9009004917506227, 0.11320596465314436]
[0.46906904778216374, 0.24657283261983032, 0.5437608592359304]
[0.5739411879281008, 0.013114189588902203, 0.21672980046384815]
[0.2794823660111103, 0.9163453718085519, 0.7657254516291417]
[0.15960421235803823, 0.7971469914312045, 0.13876741839890316]
[0.6174525204661166, 0.1266992325502697, 0.0017748622025346439]
[0.8714047447242821, 0.2094563824951179, 0.21548116922473226]
Layer1
[0.9824211088259253, 0.8724077654368019, 0.2893051677469265, 0.9614779889500835, 0.5392234688708106, 0.6778304772505923, 0.20477951453379284, 0.9409760010879991, 0.6906419411069082, 0.9665643123171954, 0.8937416775764785]
Iteration 0 with error = 0.4957854564830943
Iteration 10000 with error = 2.3458856350463867e-25
Iteration 20000 with error = 0.0
Iteration 3

In [16]:
print("Expected 2, got: ",clf.predict([1, 1]))
print("Expected 3, got: ",clf.predict([2, 2]))
print("Expected 2.5, got: ",clf.predict([3, 1]))

Expected 2, got:  2.0
Expected 3, got:  3.0
Expected 2.5, got:  2.5
