In [184]:
import numpy as np
import abc


# 1


In [185]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

# Initialization techniques of weights here defined
class WeightInitialization(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def init(self, lower, upper, shape):
        pass

class UniformDistributionWeight(WeightInitialization):

    def init(self, lower, upper, shape):
        return np.random.uniform(lower, upper, size=shape)

In [186]:
weights = UniformDistributionWeight().init(1, 1, 9)
weights 

array([1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [187]:
# constant
nodes_per_layer = [2,2,1]
bias_value = 1


def get_nr_weights(nodes_per_layer):

    nr_weights = 0

    for idx in range(len(nodes_per_layer) - 1):
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]

        nr_weights = nr_weights + (current_layer + 1) * next_layer

    return nr_weights


weights = UniformDistributionWeight().init(-1, 1, get_nr_weights(nodes_per_layer))
print(weights)

    

[-0.20701001 -0.0545706  -0.66013669  0.32184591  0.55363062 -0.97741152
 -0.30342364 -0.75639842 -0.03300825]


In [188]:
def divide_in_layers_matrix(weights, nodes_per_layer):

    layers = []
    last = 0
    for idx in range(len(nodes_per_layer) - 1):
        
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]
        temp = (current_layer + 1) * next_layer
        layers.append(weights[last : last + temp].reshape(current_layer + 1, next_layer))
        #print("{}:{}".format(last, last + temp))
        last = temp

    return layers

divide_in_layers_matrix(weights, nodes_per_layer)


[array([[-0.20701001, -0.0545706 ],
        [-0.66013669,  0.32184591],
        [ 0.55363062, -0.97741152]]),
 array([[-0.30342364],
        [-0.75639842],
        [-0.03300825]])]

In [189]:

def forward_pass(x1, x2 , weights):
    input_layer = np.array([x1, x2])
    values_output = [[x1, x2, bias_value]]
    for idx in range(len(weights)):
        weight = weights[idx]
        input_layer = np.append(input_layer, [bias_value])
        net = np.dot(input_layer, weight)
        output = sigmoid(net)
        values_output.append(output)
        input_layer = output

    #values_input.append(input_layer)
    return values_output 


forward_pass(1, 0, divide_in_layers_matrix(weights, nodes_per_layer))

[[1, 0, 1], array([0.58579785, 0.26270001]), array([0.39904184])]

# 2


In [193]:
values_output = []
values_input = []

def mse(weights):

    inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
    expected_output = np.array([[0],[1],[1],[0]])
    predicted = []
    values_output = []
    for possible_inputs in inputs:
        val= forward_pass(possible_inputs[0], possible_inputs[1], divide_in_layers_matrix(weights, nodes_per_layer))
        predicted.append(val[-1][0])
        values_output.append(val)
        #print(val)
    
    predicted = np.array(predicted)

    error = expected_output - predicted
    error = error * error
    #print(error)
    return np.mean(error), expected_output , values_output

print(mse(weights))

(0.26080918996175373, array([[0],
       [1],
       [1],
       [0]]), [[[0, 0, 1], array([0.63497751, 0.2734057 ]), array([0.39353451])], [[0, 1, 1], array([0.47339862, 0.34173644]), array([0.39290022])], [[1, 0, 1], array([0.58579785, 0.26270001]), array([0.39904184])], [[1, 1, 1], array([0.42225674, 0.32956874]), array([0.39881233])]])


# 3

In [191]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [192]:
def grdmse(weights):
    mse_value, y, outputs =  mse(weights) # returns for the four different inputs
    w = divide_in_layers_matrix(weights, nodes_per_layer)


    # for the first example
    output = outputs[0] # output first layer, output second layer, ..., output last layer
    target = y[0]

    layerK = len(nodes_per_layer)-1

    ######## gradient for the weights between layer n-1 and n #################
    delta_j =  (output[layerK] - target) * sigmoid_derivative(output[layerK]) # (y - d) * derivative
    print(np.array(output[layerK]).T.dot(delta_j))
    
    layerK = layerK - 1
    ############### get other gradient through back propagations ###############
    while layerK > 0:
        
        #delta_k = delta_j
        delta_j = np.dot(delta_j, w[layerK].T)
        delta_j =  sigmoid_derivative(np.array(output[layerK])) * delta_j
        print(np.array(output[layerK]).T.dot(delta_j))
        layerK = layerK - 1
    
    
    #print(gradients)



grdmse(weights)
    

0.03696192250786115


ValueError: operands could not be broadcast together with shapes (2,) (3,) 

In [197]:
def grdmse(weights):
    #print(weights)
    mse_value, y, outputs =  mse(weights) # returns for the four different inputs
    w = divide_in_layers_matrix(weights, nodes_per_layer)


    # for the first example
    output = outputs[0] # output first layer, output second layer, ..., output last layer
    target = y[0]
    print(output[2],target)

    layerK = len(nodes_per_layer)-1
    ######## gradient for the weights between layer n-1 and n #################
    #print(sigmoid_derivative(w[layerK-1]))
    delta_j =  (target - output[layerK]) * sigmoid_derivative(output[layerK]) # (y - d) * derivative)
    delta_j = delta_j * [*output[layerK - 1],1]

    #print(output[layerK - 1])
    delta_i = np.empty([2,3])
    for i in range(2):
        for j in range(3):
            delta_i[i,j] = delta_j[j] * sigmoid_derivative(output[layerK - 1][i]) * output[layerK - 2][i]
    delta_weights = np.append(delta_i.transpose().flatten(),delta_j)
    #print(delta_j)
    #print(delta_i)
    weights += delta_weights


for i in range(5):
    grdmse(weights)

[0.28642374] [0]
[0.26968459] [0]
[0.25504185] [0]
[0.24214789] [0]
[0.23071972] [0]
