In [1]:
import numpy as np
from random import random

# save activations and derivatives
# implement backpropagation
# implement gradient descent
# implement train
# train our net with some dummy dataset
# make some predictions

class MLP(object):
    """A Multilayer Perception class.
    """
    
    def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
        """Constructor for the MLP. Takes the number of inputs,
           a variable number of hidden layers, and number of outputs
           
        Args:
            num_inputs (int): Number of inputs
            hidden_layers (list): A list of ints for the hidden layers
            num_outputs (int): Number of outputs
        """

        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs
    
        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]  # 리스트 안의 숫자는 각 layer 안의 뉴런의 수를 의미
        
        # create random connection weights for the layers
        weights = []
        for i in range(len(layers)-1):
            w = np.random.rand(layers[i], layers[i+1])                     # np.random.rand(m,n) : 0부터 1사이에서 균일한 확률 분포로 실수 난수를 matrix array(m,n)에 생성
            weights.append(w)
        self.weights = weights
        
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations
        
        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives
            
            
            
    def forward_propagate(self, inputs):  
        """Computes forward propagation of the network based on input signals.
        
        Args:
            inputs (ndarray): Input signals
        Return:
            activations (ndarray): Output values
        """
        
        # the input layer activation is just the input itself
        activations = inputs                                               # a vector 
        self.activations[0] = inputs
        
        # iterate through the network layers
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)
            
            # apply sigmoid activation function
            activations = self._sigmoid(net_inputs)
            self.activations[i+1] = activations
        
        
        # a_3 = s(h_3)
        # h_3 = a_2 * W_2
        
        # return output layer activation
        return activations
    
    
    def back_propagate(self, error, verbose=False):
        # dE/dW_i = (y - a_[i+1]) s'(h_[i+1])) a_i
        # s'(h_[i+1]) = s(h_[i+1])(1 - s(h_[i+1]))
        # s(h_[i+1]) = a_[i+1]
        
        # dE/dW_[i-1] = (y - a_[i+1]) s'(h_[i+1])) W_i s'(h_i) a_[i-1]
         
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]               
            delta = error * self._sigmoid_derivative(activations)  # ndarray([0.1, 0.2]) --> ndarray([[0.1, 0.2]]) : 즉, (1, 2)형태    
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i]              # ndarray([0.1, 0.2]) --> ndarray([[0.1], [0.2]]) : 즉, (2, 1)형태
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)
            
            if verbose:
                print("Derivatives for W{}: {}".format(i, self.derivatives[i]))
                
        return error
               
    
    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights [i]
            
            derivatives = self.derivatives[i]
            
            weights += derivatives * learning_rate
            
            
    def train(self, inputs, targets, epochs, learning_rate):
        
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs, targets):
                
                # forward propagation
                output = self.forward_propagate(input)
    
                # calculate error
                error = target - output
    
                # back propagation
                self.back_propagate(error)
    
                # apply gradient descent
                self.gradient_descent(learning_rate)
                
                sum_error += self._mse(target, output)
                
            # report error
            print("Error: {} at epoch {}".format(sum_error / len(inputs), i))

    def _mse(self, target, output):
        return np.average((target - output)**2)
        
        
    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)
    
    
    
    
    def _sigmoid(self, x):
        """Sigmoid activation function
        Args:
            x (float): Value to be processed
        Returns:
            y (float): Output
        """
        
        y = 1.0 / (1 + np.exp(-x))
        return y
    
    
if __name__ == "__main__":
    
    # cerate a dataset to train a network for the sum operation
    inputs = np.array([[random() / 2 for _ in range(2)] for _ in range(1000)])  # array([[0.1, 0.2], [0.3, 0.4]])
    targets = np.array([[i[0] + i[1]] for i in inputs])  # array([[0.3], [0.7]])
    
    # create an mlp
    mlp = MLP(2, [5], 1) 
    
    # train our mlp
    mlp.train(inputs, targets, 50, 0.1)
    
    # create dummy data
    input = np.array([0.3, 0.1])
    target = np.array([0.4])
    
    output = mlp.forward_propagate(input)
    print()
    print()
    print("Out network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))


Error: 0.04956481422258355 at epoch 0
Error: 0.04437248542318584 at epoch 1
Error: 0.04407979518652315 at epoch 2
Error: 0.04372764575635074 at epoch 3
Error: 0.04328006184726788 at epoch 4
Error: 0.042694431139935415 at epoch 5
Error: 0.04191995542799014 at epoch 6
Error: 0.04089777784941229 at epoch 7
Error: 0.03956418610134619 at epoch 8
Error: 0.03785878696880684 at epoch 9
Error: 0.03573904784381622 at epoch 10
Error: 0.03319991945440775 at epoch 11
Error: 0.030291995652949363 at epoch 12
Error: 0.02712691348847592 at epoch 13
Error: 0.023861122305617505 at epoch 14
Error: 0.020661420757054866 at epoch 15
Error: 0.01766843689672538 at epoch 16
Error: 0.014974577124235725 at epoch 17
Error: 0.012621417509847344 at epoch 18
Error: 0.01061037291711361 at epoch 19
Error: 0.00891752083023237 at epoch 20
Error: 0.0075064999088463825 at epoch 21
Error: 0.006337373300699629 at epoch 22
Error: 0.005371737823895301 at epoch 23
Error: 0.004575162459178629 at epoch 24
Error: 0.003918004722171

In [2]:
import numpy as np

class MLP:
    """A Multilayer Perception class.
    """
    
    def __init__(self, num_inputs=3, num_hidden=[3, 5], num_outputs=2):
        """Constructor for the MLP. Takes the number of inputs,
           a variable number of hidden layers, and number of outputs
           
        Args:
            num_inputs (int): Number of inputs
            hidden_layers (list): A list of ints for the hidden layers
            num_outputs (int): Number of outputs
        """

        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
    
        # create a generic representation of the layers
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]  # 리스트 안의 숫자는 각 layer 안의 뉴런의 수를 의미
        
        # create random connection weights for the layers
        weights = []
        for i in range(len(layers)-1):
            w = np.random.rand(layers[i], layers[i+1])                     # np.random.rand(m,n) : 0부터 1사이에서 균일한 확률 분포로 실수 난수를 matrix array(m,n)에 생성
            weights.append(w)
        self.weights = weights   
            
            
    def forward_propagate(self, inputs):  
        """Computes forward propagation of the network based on input signals.
        
        Args:
            inputs (ndarray): Input signals
        Return:
            activations (ndarray): Output values
        """
        
        # the input layer activation is just the input itself
        activations = inputs                                               # a vector 
        
        # iterate through the network layers
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)
            
            # apply sigmoid activation function
            activations = self._sigmoid(net_inputs)
        
        # return output layer activation
        return activations
    
    
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    
if __name__ == "__main__":
    
    # create an MLP
    mlp = MLP() 
    
    # create some inputs
    inputs = np.random.rand(mlp.num_inputs)                                 # inputs shuold be a vector
    
    # perform forward prop
    outputs = mlp.forward_propagate(inputs)
    
    # print the results
    print("The network input is: {}".format(inputs))
    print("The network output is: {}".format(outputs))

The network input is: [0.06378894 0.39837112 0.82979261]
The network output is: [0.81498821 0.87741717]
