In [417]:
import numpy as np
import abc


# 1


In [418]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

# Initialization techniques of weights here defined
class WeightInitialization(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def init(self, lower, upper, shape):
        pass

class UniformDistributionWeight(WeightInitialization):

    def init(self, lower, upper, shape):
        return np.random.uniform(lower, upper, size=shape)

In [419]:
weights = UniformDistributionWeight().init(-1, 1, 9)
weights

array([ 0.96805139,  0.58536658,  0.23981411,  0.18636149, -0.69045174,
       -0.307527  ,  0.36091206,  0.4239523 ,  0.07894475])

In [420]:
# constant
nodes_per_layer = [2,2,1]
bias_value = 1


def get_nr_weights(nodes_per_layer):

    nr_weights = 0

    for idx in range(len(nodes_per_layer) - 1):
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]

        nr_weights = nr_weights + (current_layer + 1) * next_layer

    return nr_weights


weights = UniformDistributionWeight().init(-1, 1, get_nr_weights(nodes_per_layer))
print(weights)

    

[ 0.4373332   0.06204287  0.41012146 -0.71179974 -0.63281368  0.0892423
 -0.95646875 -0.17346942 -0.31750509]


In [421]:
def divide_in_layers_matrix(weights, nodes_per_layer):

    layers = []
    last = 0
    for idx in range(len(nodes_per_layer) - 1):
        
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]
        temp = (current_layer + 1) * next_layer
        layers.append(weights[last : last + temp].reshape(current_layer + 1, next_layer))
        #print("{}:{}".format(last, last + temp))
        last = temp

    return layers

divide_in_layers_matrix(weights, nodes_per_layer)


[array([[ 0.4373332 ,  0.06204287],
        [ 0.41012146, -0.71179974],
        [-0.63281368,  0.0892423 ]]),
 array([[-0.95646875],
        [-0.17346942],
        [-0.31750509]])]

In [462]:

def forward_pass(x1, x2 , weights):
    input_layer = np.array([x1, x2])
    values_output = [[x1, x2, bias_value]]
    for idx in range(len(weights)):
        weight = weights[idx]
        input_layer = np.append(input_layer, [bias_value])
        net = np.dot(input_layer, weight)
        output = sigmoid(net)
        values_output.append(output)
        input_layer = output

    #values_input.append(input_layer)
    return values_output 


forward_pass(1, 0, divide_in_layers_matrix(weights, nodes_per_layer))

[[1, 0, 1], array([0.45128491, 0.53774932]), array([0.30102365])]

# 2


In [463]:
values_output = []
values_input = []

def mse(weights):

    inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
    expected_output = np.array([[0],[1],[1],[0]])

    predicted = []

    for possible_inputs in inputs:
        val= forward_pass(possible_inputs[0], possible_inputs[1], divide_in_layers_matrix(weights, nodes_per_layer))
        predicted.append(val[-1][0])
        values_output.append(val)
        #print(val)
    
    predicted = np.array(predicted)

    error = expected_output - predicted
    error = error * error
    #print(error)
    return np.mean(error), expected_output , values_output

print(mse(weights))

(0.2881571955798743, array([[0],
       [1],
       [1],
       [0]]), [[[0, 0, 1], array([0.34687282, 0.52229578]), array([0.32303029])], [[0, 1, 1], array([0.44455589, 0.34920003]), array([0.30932339])], [[1, 0, 1], array([0.45128491, 0.53774932]), array([0.30102365])], [[1, 1, 1], array([0.55345518, 0.36342841]), array([0.28701797])]])


# 3

In [424]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [515]:
def grdmse(weights):
    mse_value, y, outputs =  mse(weights) # returns for the four different inputs
    w = divide_in_layers_matrix(weights, nodes_per_layer)



    # for the first example
    output = outputs[0] # output first layer, output second layer, ..., output last layer
    target = y[0]

    layerK = len(nodes_per_layer)-1

    ######## gradient for the weights between layer n-1 and n #################
    delta_j =  (output[layerK] - target) * sigmoid_derivative(output[layerK]) # (y - d) * derivative
    print(np.array(output[layerK]).T.dot(delta_j))

    layerK = layerK - 1
    ############### get other gradient through back propagations ###############
    while layerK > 0:
        
        #delta_k = delta_j
        delta_j = np.dot(delta_j, w[layerK].T)
        delta_j =  sigmoid_derivative(np.array(output[layerK])) * delta_j
        print(np.array(output[layerK]).T.dot(delta_j))
        layerK = layerK - 1
    
    
    #print(gradients)



grdmse(weights)
    

0.022819124072430638


ValueError: operands could not be broadcast together with shapes (2,) (3,) 