In [1]:
import numpy as np

# save activations derivatives
# implement backpropagation
# implement gradient descent
# implement train
# train our net with some dummy dataset
# make some predictions

class MLP:
    
    def __init__(self, num_inputs = 3, num_hidden = [3, 5], num_outputs = 2):
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        
        # initiate random weights
        self.weights = []
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            self.weights.append(w)
            
        self.activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            self.activations.append(a)
            
        self.derivatives = []
        for i in range(len(layers) - 1):
            a = np.zeros((layers[i], layers[i + 1]))
            self.derivatives.append(a)
        
    def forward_propagate(self, inputs):
        # first layers
        activations = inputs
        self.activations[0] = inputs
        
        for i, w in enumerate(self.weights):
            # calculate net inpus
            net_inputs = np.dot(activations, w)
            
            # calculate the activations
            activations = self._sigmoid(net_inputs)
            self.activations[i + 1] = activations
        
        # a_3 = s(h_3)
        # h_3 = a_2 * W_2
            
        return activations
    
    def back_propagate(self, error, verbose = False):
        
        # dE/dW_i = (y - a_[i+1]) s'(h_[i+1]) a_i
        # s'(h_[i+1]) = s(h_[i+1])(1 - s(h_[i+1]))
        # s(h_[i+1]) = a_[i+1]
        
        # dE/dW_[i-1] = (y - a_[i+1]) s'(h_[i+1]) W_i s'(h_i) a_[i-1]
        
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i + 1]
            delta = error * self._sigmoid_derivative(activations)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i]
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
            
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            
            error = np.dot(delta, self.weights[i].T)
            
            if verbose:
                print("Derivatives for W{}:\n{}".format(i, self.derivatives[i]))
        
        return error
    
    def gradient_descent(self, learning_rate, verbose = False):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            if verbose:
                print("Original W{}:\n{}".format(i, weights))
            derivatives = self.derivatives[i]
            weights += derivatives * learning_rate
            if verbose:
                print("Updated W{}:\n{}".format(i, weights))
                
    def train(self, inputs, targets, epochs, learning_rate, verbose = False):
        for i in range(epochs):
            sum_error = 0
            for (inp, tar) in zip(inputs, targets):
                
                # forward propagation
                outputs = self.forward_propagate(inp)

                # calculate error
                error = tar - outputs

                # back propagation
                self.back_propagate(error)

                # apply gradient descent
                self.gradient_descent(learning_rate)
                
                # caculate accumulative error
                sum_error += self._mse(tar, outputs)
                
            # report error
            if verbose:
                print("Error: {} at epoch {}".format(sum_error / len(inputs), i))
                
            
    def _sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))
    
    def _sigmoid_derivative(self, x):
        return x * (1 - x)

    def _mse(self, target, outputs):
        return np.average((target - outputs)**2)
        

In [2]:
if __name__ == "__main__":
    
    # create an mlp
    mlp = MLP(2, [5, 5], 1)
    
    # create dummy data
    inputs = np.array([.1, .2])
    target = np.array([.3])
    
    # forward propagation
    outputs = mlp.forward_propagate(inputs)
    
    # calculate error
    error = target - outputs
    
    # back propagation
    mlp.back_propagate(error, True)
    
    # apply gradient descent
    mlp.gradient_descent(0.01, True)
    mlp.gradient_descent(0.01, True)
    

Derivatives for W2:
[[-0.07102142]
 [-0.06999514]
 [-0.05377381]
 [-0.06346849]
 [-0.06927592]]
Derivatives for W1:
[[-0.00142864 -0.0008026  -0.00409886 -0.00581119 -0.00045723]
 [-0.00147105 -0.00082643 -0.00422052 -0.00598368 -0.00047081]
 [-0.0014674  -0.00082438 -0.00421005 -0.00596884 -0.00046964]
 [-0.0014267  -0.00080151 -0.00409329 -0.0058033  -0.00045661]
 [-0.00143396 -0.00080559 -0.00411412 -0.00583283 -0.00045894]]
Derivatives for W0:
[[-3.64774517e-04 -9.24329540e-05 -1.30066940e-04 -2.32659677e-04
  -4.31600919e-04]
 [-7.29549034e-04 -1.84865908e-04 -2.60133880e-04 -4.65319354e-04
  -8.63201839e-04]]
Original W0:
[[0.14557939 0.77749819 0.49347512 0.11932386 0.45712787]
 [0.67032956 0.67585158 0.7901218  0.6687976  0.55478191]]
Updated W0:
[[0.14557574 0.77749726 0.49347382 0.11932153 0.45712356]
 [0.67032226 0.67584973 0.7901192  0.66879295 0.55477327]]
Original W1:
[[0.63985565 0.89933603 0.13699123 0.95240083 0.32361689]
 [0.63914487 0.14776707 0.08376032 0.06293039 0

In [3]:
import random

# create a dataset to train a network for the sum operation
items = np.array([[random.random() / 2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in items])

# create an mlp
mlp2 = MLP(2, [5], 1)

# train our mlp
mlp2.train(items, targets, 500, 0.05, True)

# create dummy data
inp_test = np.array([.1, .2])
tar_test = np.array([.3])

output_test = mlp2.forward_propagate(inp_test)
print("Our network believes that {} + {} is equal to {}".format(inp_test[0], inp_test[1], output_test[0]))

Error: 0.04475841543317108 at epoch 0
Error: 0.04137272418394314 at epoch 1
Error: 0.04119058499695978 at epoch 2
Error: 0.04099579309724114 at epoch 3
Error: 0.04078639880488115 at epoch 4
Error: 0.040558502017975756 at epoch 5
Error: 0.040307956021318 at epoch 6
Error: 0.04003032883886175 at epoch 7
Error: 0.03972086716263121 at epoch 8
Error: 0.039374474682704656 at epoch 9
Error: 0.0389857107763632 at epoch 10
Error: 0.03854881738771088 at epoch 11
Error: 0.03805778383951942 at epoch 12
Error: 0.03750646086643285 at epoch 13
Error: 0.03688873572558739 at epoch 14
Error: 0.03619877892741834 at epoch 15
Error: 0.0354313688073423 at epoch 16
Error: 0.034582291692862185 at epoch 17
Error: 0.03364880213500321 at epoch 18
Error: 0.03263011006494885 at epoch 19
Error: 0.03152784225992672 at epoch 20
Error: 0.03034640899023686 at epoch 21
Error: 0.029093199829667602 at epoch 22
Error: 0.027778541832955323 at epoch 23
Error: 0.026415381766478306 at epoch 24
Error: 0.02501869865001361 at epo

Error: 0.0005309174021108649 at epoch 201
Error: 0.0005303826586179156 at epoch 202
Error: 0.0005298508036532435 at epoch 203
Error: 0.0005293218055167873 at epoch 204
Error: 0.0005287956336817697 at epoch 205
Error: 0.0005282722587167033 at epoch 206
Error: 0.0005277516522129412 at epoch 207
Error: 0.000527233786717349 at epoch 208
Error: 0.0005267186356697438 at epoch 209
Error: 0.0005262061733447787 at epoch 210
Error: 0.0005256963747979637 at epoch 211
Error: 0.0005251892158155152 at epoch 212
Error: 0.0005246846728677849 at epoch 213
Error: 0.0005241827230659967 at epoch 214
Error: 0.0005236833441220923 at epoch 215
Error: 0.0005231865143114245 at epoch 216
Error: 0.0005226922124381257 at epoch 217
Error: 0.0005222004178029599 at epoch 218
Error: 0.000521711110173471 at epoch 219
Error: 0.0005212242697562698 at epoch 220
Error: 0.0005207398771713219 at epoch 221
Error: 0.0005202579134280669 at epoch 222
Error: 0.0005197783599032642 at epoch 223
Error: 0.0005193011983204296 at epoc

Error: 0.00046186847845365754 at epoch 398
Error: 0.00046164114409187926 at epoch 399
Error: 0.00046141461992446674 at epoch 400
Error: 0.000461188901548745 at epoch 401
Error: 0.00046096398459192806 at epoch 402
Error: 0.00046073986471087815 at epoch 403
Error: 0.00046051653759187624 at epoch 404
Error: 0.00046029399895039866 at epoch 405
Error: 0.00046007224453087695 at epoch 406
Error: 0.0004598512701064814 at epoch 407
Error: 0.00045963107147889746 at epoch 408
Error: 0.0004594116444780957 at epoch 409
Error: 0.00045919298496211776 at epoch 410
Error: 0.00045897508881685716 at epoch 411
Error: 0.00045875795195584365 at epoch 412
Error: 0.00045854157032002577 at epoch 413
Error: 0.00045832593987755896 at epoch 414
Error: 0.0004581110566235949 at epoch 415
Error: 0.00045789691658007454 at epoch 416
Error: 0.00045768351579551444 at epoch 417
Error: 0.0004574708503448095 at epoch 418
Error: 0.00045725891632902155 at epoch 419
Error: 0.00045704770987518053 at epoch 420
Error: 0.00045683