In [6]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np

In [7]:
# Funcion de activacion 
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [8]:
# Inicializacion de los parametros 
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x)
    b1 = np.random.randn(n_h, 1)
    W2 = np.random.randn(n_y, n_h)
    b2 = np.random.randn(n_y, 1)
    
    parameters = {
        "W1": W1,
        "b1" : b1,
        "W2": W2,
        "b2" : b2
    }
    return parameters


In [9]:
def forward_prop(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {
        "A1": A1,
        "A2": A2
    }
    return A2, cache

In [10]:
# Funcion de perdida
def loss_function(A2, Y):
    cost = -np.sum(np.multiply(Y, np.log(A2)) +  np.multiply(1-Y, np.log(1-A2)))/m
    cost = np.squeeze(cost)

    return cost

In [11]:
def backward_prop(X, Y, cache, parameters):
    A1 = cache["A1"]
    A2 = cache["A2"]

    W2 = parameters["W2"]

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2, axis=1, keepdims=True)/m
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2))
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1, axis=1, keepdims=True)/m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }

    return grads

In [12]:
def update_parameters(parameters, grads, learning_rate):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    
    new_parameters = {
        "W1": W1,
        "W2": W2,
        "b1" : b1,
        "b2" : b2
    }

    return new_parameters

In [13]:
def model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate):
    parameters = initialize_parameters(n_x, n_h, n_y)

    for i in range(0, num_of_iters+1):
        a2, cache = forward_prop(X, parameters)

        cost = loss_function(a2, Y)

        grads = backward_prop(X, Y, cache, parameters)

        parameters = update_parameters(parameters, grads, learning_rate)

        if(i%100 == 0):
            print('Cost after iteration# {:d}: {:f}'.format(i, cost))

    return parameters

In [14]:
def predict(X, parameters):
    a2, cache = forward_prop(X, parameters)
    yhat = a2
    yhat = np.squeeze(yhat)
    if(yhat >= 0.5):
        y_predict = 1
    else:
        y_predict = 0

    return y_predict

In [15]:
np.random.seed(2)

# The 4 training examples by columns
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]

# Set the hyperparameters
n_x = 2     #No. of neurons in first layer
n_h = 2     #No. of neurons in hidden layer
n_y = 1     #No. of neurons in output layer
num_of_iters = 1000
learning_rate = 0.3

trained_parameters = model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate)

# Test 2X1 vector to calculate the XOR of its elements. 
# Try (0, 0), (0, 1), (1, 0), (1, 1)
X_test = np.array([[1], [1]])

print(trained_parameters)

y_predict = predict(X_test, trained_parameters)

print('Neural Network prediction for example ({:d}, {:d}) is {:d}'.format(
    X_test[0][0], X_test[1][0], y_predict))

Cost after iteration# 0: 1.052558
Cost after iteration# 100: 0.695402
Cost after iteration# 200: 0.693668
Cost after iteration# 300: 0.693206
Cost after iteration# 400: 0.692966
Cost after iteration# 500: 0.692779
Cost after iteration# 600: 0.692587
Cost after iteration# 700: 0.692352
Cost after iteration# 800: 0.692030
Cost after iteration# 900: 0.691539
Cost after iteration# 1000: 0.690679
{'W1': array([[-0.59247105, -0.47282144],
       [-2.06763357, -0.23592616]]), 'W2': array([[-0.43995116, -0.16049007]]), 'b1': array([[-1.50109455],
       [-1.77729809]]), 'b2': array([[-0.56827845]])}
Neural Network prediction for example (1, 1) is 1


In [16]:
def initialize_parameters_zeros(n_x, n_h, n_y):
    W1 = np.zeros((n_h, n_x))
    b1 = np.zeros((n_h, 1))
    W2 = np.zeros((n_y, n_h))
    b2 = np.zeros((n_y, 1))
    
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return parameters

# Probar inicialización en 0
parameters_zeros = initialize_parameters_zeros(n_x, n_h, n_y)
trained_parameters_zeros = model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate)


Cost after iteration# 0: 1.013592
Cost after iteration# 100: 0.590854
Cost after iteration# 200: 0.525336
Cost after iteration# 300: 0.464032
Cost after iteration# 400: 0.132533
Cost after iteration# 500: 0.062978
Cost after iteration# 600: 0.040478
Cost after iteration# 700: 0.029604
Cost after iteration# 800: 0.023256
Cost after iteration# 900: 0.019114
Cost after iteration# 1000: 0.016207


In [17]:
# Comparar la convergencia
print("Parámetros entrenados con inicialización en 0:")
print(trained_parameters_zeros)
print("\nParámetros entrenados con inicialización aleatoria:")
print(trained_parameters)


Parámetros entrenados con inicialización en 0:
{'W1': array([[-2.92684964,  3.09007586],
       [ 3.65689327, -3.64943032]]), 'W2': array([[-4.72800176, -4.657755  ]]), 'b1': array([[1.37578737],
       [1.80261412]]), 'b2': array([[4.16176135]])}

Parámetros entrenados con inicialización aleatoria:
{'W1': array([[-0.59247105, -0.47282144],
       [-2.06763357, -0.23592616]]), 'W2': array([[-0.43995116, -0.16049007]]), 'b1': array([[-1.50109455],
       [-1.77729809]]), 'b2': array([[-0.56827845]])}


In [18]:
# Definir diferentes learning rates
learning_rates = [0.01, 0.1, 0.5]

for lr in learning_rates:
    print(f"\nEntrenando con learning rate = {lr}")
    try:
        trained_parameters = model(X, Y, n_x, n_h, n_y, num_of_iters, lr)
        print("Parámetros entrenados:")
        print(trained_parameters)
    except Exception as e:
        print(f"Error al entrenar con learning rate = {lr}: {e}")


Entrenando con learning rate = 0.01
Cost after iteration# 0: 0.713866
Cost after iteration# 100: 0.701341
Cost after iteration# 200: 0.696097
Cost after iteration# 300: 0.693756
Cost after iteration# 400: 0.692557
Cost after iteration# 500: 0.691809
Cost after iteration# 600: 0.691242
Cost after iteration# 700: 0.690750
Cost after iteration# 800: 0.690291
Cost after iteration# 900: 0.689846
Cost after iteration# 1000: 0.689407
Parámetros entrenados:
{'W1': array([[-0.75129342, -0.12552864],
       [-0.89116051, -0.28699891]]), 'W2': array([[-0.26632084, -0.45057698]]), 'b1': array([[ 0.16617547],
       [-0.97463227]]), 'b2': array([[-0.44931124]])}

Entrenando con learning rate = 0.1
Cost after iteration# 0: 0.814060
Cost after iteration# 100: 0.637562
Cost after iteration# 200: 0.602264
Cost after iteration# 300: 0.577495
Cost after iteration# 400: 0.559278
Cost after iteration# 500: 0.545698
Cost after iteration# 600: 0.535411
Cost after iteration# 700: 0.527460
Cost after iteratio