In [5]:
import numpy as np

In [51]:
# functions to use

def sigmoid(x):
    return 1 / (1 + np.exp(-1*x))

def sigmoid_prime(x):
    return x*(1-x)

def error_func(t, y_hat):
    return 0.5 (t - y_hat)**2

def error_func_prime(t,y_hat):
    return -(t - y_hat)


def accuracy_func(t, y_hat):
    if ((t==1) and (y_hat>=0.5)) or ((t==0) and (y_hat<0.5)):
        return True
    else:
        return False

In [52]:
class Perceptron:
    
    def __init__(self, input_units, act_func= sigmoid):
        # set number of inputs
        self.input_units = input_units
        
        # initialize weighted connections from inputs to the Perceptron
        self.weights = np.random.randn(input_units +1) # +1 for bias weights
        
        # set learning rate to be 0.01
        self.lr = 0.01
        
        # initialize activation function variable
        self.act_func = act_func
        
        # to keep track of this Perceptron's drive and activation, initialize variables for drive
        # and the inputs it receives.
        self.drive = 0
        self.inputs = 0
        self.activation = 0
        
        # initialize the error signal for the perceptron
        self.delta = None
        
    def forward_step(self, inputs):
        """
        Performs a forward step for the perceptron.
        """
        
        # insert a value of 1 as the first entry in the perceptrons inputs 
        #(activations from previous layer or the MLPs input) for the bias 
        self.inputs = np.insert(inputs,0,1)
        
        # calculate the weighted sum of inputs weighted by their weights
        self.drive =  self.weights @ self.inputs
        
        # return the activation for the Perceptron given the inputs and weights(including bias)
        self.activation = self.act_func(self.drive)
        return self.activation
    
    def update(self, delta):
        """
        This will allow to update weights associated with this perceptron given that we have it's delta.
        """
        
        # compute gradients of the weights to this perceptron (including bias) by multiplying it's error signal with
        # the it's unweighted inputs (including a 1 at index 0 for the bias weights)
        gradient_weights = delta * self.inputs
        
        # update the weights (including bias) by subtracting learning rate * gradients 
        self.weights -= self.lr * gradient_weights

In [80]:
class MLP:
    """
    input dimensions, hidden dimensions and output dimensions given as lists in distinct arguments
    """
    def __init__(self, total_dim):
        
        hidden_dim = total_dim[1:-1]
        input_dim = total_dim[0]
        output_dim = total_dim[-1]
        print(total_dim)
        
        # create a nested list of Perceptrons where each perceptron has the input_units set to 
        # the previous number of units (including input units at n=0)
        self.hidden_layers = [
                [Perceptron(input_units = total_dim[n]) for _ in range(layer_units)] 
                 for n, layer_units in enumerate(hidden_dim)
        ]
        
        # initialize perceptrons for the output units (in case we have multiple outputs)
        self.output_units = [Perceptron(input_units = hidden_dim[-1]) for _ in range(output_dim)]
        
        # initialize MLP output to 0 and initialize an empty list for individual layer's perceptron's activations
        self.output = [0 for _ in range(output_dim)]
        self.layer_activations = []
        
    
    def forward_step(self, inputs):
        """
        Perform a forward step given inputs
        """
        # make sure inputs match the input_units for the first hidden layer perceptrons.
        assert len(inputs) == len(self.hidden_layers[0][0].weights) - 1,'input dimension should match the initalisation'
        
        self.layer_activations = [inputs]
        
        # for each layer calculate the activation for all it's perceptrons. 
        # Then store these activations in a list and store this list in a list for all layer's activations.
        for layer in self.hidden_layers:
            activations = []
            for unit in layer:
                unit_activation = unit.forward_step(self.layer_activations[-1])
                activations.append(unit_activation)
            self.layer_activations.append(activations)
        
        # calculate output
        self.output = [out_unit.forward_step(self.layer_activations[-1]) for out_unit in self.output_units]
        
    def backprop_step(self, targets):
        
        error_signals = []
        # Start by calculating error signals for the output units.
        
        # For this we use the formula delta_i = -(target_i - output_i) * sigmoid_prime(drive of output unit_i)
        # create a nested list for the deltas in each layer
        error_signals = []
        output_deltas = []
        for i, (target, output) in enumerate(zip(targets,self.output)):
            
            # we want to reuse the activation values and not recompute the sigmoid
            delta_i = -(target - output) * (sigmoid_prime(self.output_units[i].activation))
            
            #store delta in a variable of the perceptron unit
            self.output_units[i].delta = delta_i
            
            # append error signal to output error signals list
            output_deltas.append(delta_i)
            
            # perform weight update for output unit i using the error signal associated with this unit
            self.output_units[i].update(delta_i)
            
        error_signals.append(output_deltas)
        
        # now compute the error signals for the hidden layers, 
        # to do this, we compute the sums for all units in a layer simultaneously and then multiply
        # the corresponding sum_i for unit_i in layer l with the sigmoid derivative of unit_i's drive.
        
        for n, layer in enumerate(reversed(self.hidden_layers)):
            # initialize a list of error signals for this layer
            layer_deltas = []
            sums_for_layer = np.zeros(len(layer))
            
            # since output units and hidden units are stored in independent variables, we have a
            # slightly different procedure for n == 0.
            
            if n == 0:
                for o_unit_k in self.output_units:
                    sums_for_layer += o_unit_k.delta * np.array(o_unit_k.weights[1:])
                
                for i, sum_i in enumerate(sums_for_layer):
                    # multiply the sum for unit i by the sigmoid derivative of it's drive to obtain it's delta
                    delta = sum_i * sigmoid_prime(layer[i].activation)
                    layer_deltas.append(delta)
                    layer[i].delta = delta
                error_signals.append(layer_deltas)
                    
            else:
                for hidden_unit_k in reversed(self.hidden_layers)[n+1]:
                    sums_for_layer += hidden_unit_k.delta * np.array(hidden_unit_k.weights[1:])
                    
                for i, sum_i in enumerate(sums_for_layer):
                    delta = sum_i * sigmoid_prime(layer[i].activation)
                    layer_deltas.append(delta)
                    layer[i].delta = delta
                
                error_signals.append(layer_deltas)
        
        # once we have all the deltas computed without having updated weights in between 
        #(this would distort the process), we update the weights.
        
        # update unit using the delta we assigned to it
        for unit in self.output_units:
            unit.update(unit.delta)
            
        for layer in self.hidden_layers:
            for unit in layer:
                unit.update(unit.delta) 
        

                    

In [81]:
x = np.array([[0,0],[0,1],[1,0],[1,1]])

target_and = np.array([0,0,0,1])
target_or = np.array([0,1,1,1])
target_nand = np.array([1,1,1,0])
target_xor = np.array([0,1,1,0])
target_nor = np.array([1,0,0,0])

In [92]:
or_mlp = MLP([2,4,1])
or_mlp.hidden_layers[0][0].weights

[2, 4, 1]


array([-0.82042274, -0.78460147,  0.43058687])

In [93]:
print(or_mlp)
for x_i,t_i in zip(x,target_or):
    or_mlp.forward_step(x_i)
    or_mlp.backprop_step([t_i])
    

<__main__.MLP object at 0x000001A378DA9848>


In [94]:
or_mlp.hidden_layers[0][0].weights

array([-0.8204904 , -0.78464652,  0.43052614])