In [3]:
import numpy as np
import abc


# 1


In [4]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

# Initialization techniques of weights here defined
class WeightInitialization(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def init(self, lower, upper, shape):
        pass

class UniformDistributionWeight(WeightInitialization):

    def init(self, lower, upper, shape):
        return np.random.uniform(lower, upper, size=shape)

In [5]:
weights = UniformDistributionWeight().init(-1, 1, 9)
weights

array([-0.83141498,  0.41602419, -0.66019125,  0.25985005,  0.42095398,
        0.31099385,  0.93167715,  0.09505453, -0.91957213])

In [6]:
# constant
nodes_per_layer = [2,2,1]
bias_value = 1


def get_nr_weights(nodes_per_layer):

    nr_weights = 0

    for idx in range(len(nodes_per_layer) - 1):
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]

        nr_weights = nr_weights + (current_layer + 1) * next_layer

    return nr_weights


weights = UniformDistributionWeight().init(-1, 1, get_nr_weights(nodes_per_layer))
print(weights)

    

[-0.56884324 -0.87109001  0.7295165  -0.45189619 -0.74623203 -0.43330786
 -0.2749001  -0.16168629 -0.83897505]


In [7]:
def divide_in_layers_matrix(weights, nodes_per_layer):

    layers = []
    last = 0
    for idx in range(len(nodes_per_layer) - 1):
        
        current_layer = nodes_per_layer[idx]
        next_layer = nodes_per_layer[idx + 1]
        temp = (current_layer + 1) * next_layer
        layers.append(weights[last : last + temp].reshape(current_layer + 1, next_layer))
        #print("{}:{}".format(last, last + temp))
        last = temp

    return layers

divide_in_layers_matrix(weights, nodes_per_layer)


[array([[-0.56884324, -0.87109001],
        [ 0.7295165 , -0.45189619],
        [-0.74623203, -0.43330786]]),
 array([[-0.2749001 ],
        [-0.16168629],
        [-0.83897505]])]

In [8]:

def forward_pass(x1, x2 , weights):
    input_layer = np.array([x1, x2])
    values_output = [[x1, x2, bias_value]]
    for idx in range(len(weights)):
        weight = weights[idx]
        input_layer = np.append(input_layer, [bias_value])
        net = np.dot(input_layer, weight)
        output = sigmoid(net)
        values_output.append(output)
        input_layer = output

    #values_input.append(input_layer)
    return values_output 


forward_pass(1, 0, divide_in_layers_matrix(weights, nodes_per_layer))

[[1, 0, 1], array([0.21163881, 0.21342579]), array([0.28258753])]

# 2


In [9]:
values_output = []
values_input = []

def mse(weights):

    inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
    expected_output = np.array([[0],[1],[1],[0]])

    predicted = []

    for possible_inputs in inputs:
        val= forward_pass(possible_inputs[0], possible_inputs[1], divide_in_layers_matrix(weights, nodes_per_layer))
        predicted.append(val[-1][0])
        values_output.append(val)
        #print(val)
    
    predicted = np.array(predicted)

    error = expected_output - predicted
    error = error * error
    #print(error)
    return np.mean(error), expected_output , values_output

print(mse(weights))

(0.30129068051041197, array([[0],
       [1],
       [1],
       [0]]), [[[0, 0, 1], array([0.32164288, 0.39333672]), array([0.27071621])], [[0, 1, 1], array([0.49582122, 0.29210053]), array([0.26453974])], [[1, 0, 1], array([0.21163881, 0.21342579]), array([0.28258753])], [[1, 1, 1], array([0.35765453, 0.14725509]), array([0.27665732])]])


# 3

In [10]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [11]:
def grdmse(weights):
    mse_value, y, outputs =  mse(weights) # returns for the four different inputs
    w = divide_in_layers_matrix(weights, nodes_per_layer)



    # for the first example
    output = outputs[0] # output first layer, output second layer, ..., output last layer
    target = y[0]

    layerK = len(nodes_per_layer)-1

    ######## gradient for the weights between layer n-1 and n #################
    delta_j =  (output[layerK] - target) * sigmoid_derivative(output[layerK]) # (y - d) * derivative
    print(np.array(output[layerK]).T.dot(delta_j))

    layerK = layerK - 1
    ############### get other gradient through back propagations ###############
    while layerK > 0:
        
        #delta_k = delta_j
        delta_j = np.dot(delta_j, w[layerK].T)
        delta_j =  sigmoid_derivative(np.array(output[layerK])) * delta_j
        print(np.array(output[layerK]).T.dot(delta_j))
        layerK = layerK - 1
    
    
    #print(gradients)



grdmse(weights)
    

0.014469027710764188


ValueError: operands could not be broadcast together with shapes (2,) (3,) 

In [104]:
def magic(x1, x2 , weights):

    input_layer = np.array([x1, x2])
    weights =  divide_in_layers_matrix(weights, nodes_per_layer)


    inputs = []
    
    #forward pass
    for idx in range(len(weights)):
        input_layer = np.append(input_layer, [bias_value])
        inputs.append(input_layer)
        weight = weights[idx]
        net = np.dot(input_layer, weight)
        output = sigmoid(net)
        input_layer = output # the input of the next layer is the output of the current

    inputs = np.array(inputs)
    length = len(inputs) - 1

    
    #backward pass
    delta =np.array([np.array(output - (x1 != x2)) * sigmoid_derivative(output)])
    print("DW for second weights:")

    print(delta * inputs[length].reshape(-1,1))
    length -=1

    #print(weights[1])
    a = sigmoid_derivative(inputs[length]).reshape(1, len(inputs[length]))
    some = delta *  weights[1].dot(a) 
    print("DW for first weights:")
    print(np.multiply(some, inputs[0][:, np.newaxis]))



    

    

magic(0,1,weights)



DW for second weights:
[[-0.07094707]
 [-0.04179667]
 [-0.14309002]]
DW for first weights:
[[-0. -0. -0.]
 [-0. -0. -0.]
 [-0. -0. -0.]]


In [10]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_prime(x):
    return x * (1.0 - x)

epochs = 5000
input_size, hidden_size, output_size = 2, 2, 1
learning_rate = 0.1

# Truth table
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

# Fill hidden and output layers with random values.
w_hidden = np.random.uniform(size=(input_size, hidden_size))
w_output = np.random.uniform(size=(hidden_size, output_size))

# Learning iteration
for epoch in range(epochs):
    # Forward propagation
    actual_hidden = sigmoid(np.dot(X, w_hidden))
    output = np.dot(actual_hidden, w_output)

    # Calculate error (expected output - calculated output)
    error = Y - output

    # Backward Propagation
    dZ = error
    w_output += learning_rate* actual_hidden.T.dot(dZ)

    dH = dZ.dot(w_output.T) * sigmoid_prime(actual_hidden)
    w_hidden += X.T.dot(dH)
    #print(w_output)
    #print(w_hidden)
    #print("---")

    
actual_hidden = sigmoid(np.dot([0, 0], w_hidden))
actual_output = np.dot(actual_hidden, w_output)
print('[0, 0]', actual_output)

actual_hidden = sigmoid(np.dot([0, 1], w_hidden))
actual_output = np.dot(actual_hidden, w_output)
print('[0, 1]', actual_output)

actual_hidden = sigmoid(np.dot([1, 0], w_hidden))
actual_output = np.dot(actual_hidden, w_output)
print('[1, 0]', actual_output)

actual_hidden = sigmoid(np.dot([1, 1], w_hidden))
actual_output = np.dot(actual_hidden, w_output)
print('[1, 1]', actual_output)

[0, 0] [-0.09373024]
[0, 1] [0.96915948]
[1, 0] [0.9691595]
[1, 1] [0.10536446]
