In [1]:
import numpy as np

## Functii de activare necesare

In [48]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

## Clasa cu care definim reteaua

In [107]:
class MLP():
    """
    Varianta definire MLP cu specificare input si output pentru fiecare unitate.
    """
    def __init__(self, architecture, random_seed=42):
        np.random.seed(random_seed)
        
        self.activation_functions = {
            'sigmoid': sigmoid,
            'relu': relu
        }
        
        number_of_layers = len(architecture)
        params_values = {}

        for idx, layer in enumerate(architecture):
            layer_idx = idx + 1
            
            layer_input_size = layer["input_dim"]
            
            layer_output_size = layer["units"]

            params_values['W' + str(layer_idx)] = np.random.randn(layer_output_size, layer_input_size) * 0.1
            
            params_values['b' + str(layer_idx)] = np.random.randn(layer_output_size, 1) * 0.1
            
            
        self.params = params_values
        self.architecture = architecture
        
    def summary(self):
        print("{:^15s} {:^15s} {:^15s} {:^15s} {:^15s} {:^15s} \n".format(
            "Input shape",
            "Output shape",
            "Weights shape",
            "Bias shape",
            "Activation",
            "Params"
        ))
        print("{:_<100s}".format(''))
        total_params = 0
        for idx, layer in enumerate(self.architecture):
            layer_idx = idx + 1
            in_shape = layer["input_dim"]
            out_shape = layer["units"]
            
            weights_shape = self.params['W' + str(layer_idx)].shape
            bias_shape = self.params['b' + str(layer_idx)].shape
            
            activation = layer["activation"]
            
            weights_params = 1
            for dim in weights_shape:
                weights_params *= dim
            
            bias_params = 1
            for dim in bias_shape:
                bias_params *= dim
            
            num_params = bias_params + weights_params
            total_params += num_params
            
            print("{:^15d} {:^15d} {:^15s} {:^15s} {:^15s} {:^15d} \n".format(
                in_shape,
                out_shape,
                str(weights_shape),
                str(bias_shape),
                activation,
                num_params
            ))
            print("-"*100)
            
        print("Total number of parameters: {}".format(total_params))

    def forward(self, w, b, x, activation='relu'):
        z = np.dot(w, x) + b
    
        return self.activation_functions[activation](z), z
        
    def predict(self, x):
        """ Functie cu care realizam predictii prin metoda propagarii inainte. """
        memory = {}
        current_activation = x

        for idx, layer in enumerate(self.architecture):
            layer_idx = idx + 1
            previous_activation = current_activation
            activation_function = layer["activation"]
            w = self.params["W" + str(layer_idx)]
            b = self.params["b" + str(layer_idx)]

            current_activation, z = self.forward(w, b, current_activation, activation_function)

            memory["x" + str(idx)] = previous_activation
            memory["z" + str(layer_idx)] = z

        return current_activation, memory

In [108]:
architecture = [
    {
        "input_dim": 2,
        "units": 4,
        "activation": "relu"
    },
    {
        "input_dim": 4,
        "units": 6,
        "activation": "relu"
    },
    {
        "input_dim": 6,
        "units": 6,
        "activation": "relu"
    },
    {
        "input_dim": 6,
        "units": 4,
        "activation": "relu"
    },
    {
        "input_dim": 4,
        "units": 1,
        "activation": "sigmoid"
    },
]

In [109]:
nn = MLP(architecture)

In [110]:
nn.summary()

  Input shape    Output shape    Weights shape    Bias shape      Activation        Params      

____________________________________________________________________________________________________
       2               4            (4, 2)          (4, 1)           relu             12        

----------------------------------------------------------------------------------------------------
       4               6            (6, 4)          (6, 1)           relu             30        

----------------------------------------------------------------------------------------------------
       6               6            (6, 6)          (6, 1)           relu             42        

----------------------------------------------------------------------------------------------------
       6               4            (4, 6)          (4, 1)           relu             28        

----------------------------------------------------------------------------------------------------
     

Realizam procesul de propagare inainte pentru date sintetice

In [116]:
x = np.random.randn(2, 10)

In [117]:
x.shape

(2, 10)

In [118]:
y, mem = nn.predict(x)

In [119]:
y.shape

(1, 10)

In [120]:
y

array([[0.49936576, 0.49936671, 0.49936659, 0.49936564, 0.49936593,
        0.49936672, 0.49936757, 0.49936672, 0.49936672, 0.49936323]])

Putem observa ca output-ul retelei are dimensiunea dorita. In partea urmatoare vom folosi clasa de mai sus pentru MLP pentru a antrena reteaua. 