Programa el algoritmo de retropropagación usando NumPy para una tarea de clasificación binaria, presuponiendo una red densa con dos capas ocultas y la función de perdida de entropía cruzada binaria. Describe las fórmulas y reglas de actualización de los pesos y sesgos de cada capa y entrena y evalúa la red en algún conjunto de datos.

In [None]:
#1.-Añadir librerías:
import numpy as np

In [None]:
#2.- Función de activación
def sigmoide(z):
    return 1 / (1 + np.exp(-z))
#Como se busca realizar una retropropagación, se requiere utilizar el gradiente, y necesitamos una función de activación que puede ser diferenciable

In [None]:
def sigmoide_dos(z):
    return 1 / (1 + np.exp(-z)) + 1

In [None]:
# Derivada de la función de activación para trabajar un backprop
def derivada_sigmoide(x):
    return np.multiply(sigmoide(x), (1.0 - sigmoide(x)))

Note:
The widespread saturation of sigmoidal units can make gradient-based learning very diﬃcult. For this reason, their use as hidden units in feedforward networks is now discouraged. Their use as output units is compatible with the use of gradient-based learning when an appropriate cost function can undo the saturation of the sigmoid in the output
layer.

The main reason why we use sigmoid function is because it exists between (0 to 1). Therefore, it is especially used for models where we have to predict the probability as an output.Since probability of anything exists only between the range of 0 and 1, sigmoid is the right choice.

The function is differentiable.That means, we can find the slope of the sigmoid curve at any two points.

The function is monotonic but function’s derivative is not.

The logistic sigmoid function can cause a neural network to get stuck at the training time

In [None]:
# 3.- Función de pérdida:
def entropia_cruzada_binaria(y, p):
    p[p == 0] = np.nextafter(0., 1.)
    p[p == 1] = np.nextafter(1., 0.)
    return -(np.log(p[y == 1]).sum() + np.log(1 - p[y == 0]).sum())   #fórmula de la entrópia binaria cruzada


La función de pérdida por entropía cruzada, determina que tan diferente es la distribución aproximada con la verdadera.

Nota: nextafter es una función que regresa el siguiente valor flotante en el primer caso en dirrección 0 a 1, y determina el valor de probabilidad que nuestra entrada corresponda a la clase 0, en el segundo caso busca el flotante más cercano en dirreción de 1 a 0 (el número flotante va disminuyendo) ésta corresponde a la probabilidad de que nuestra entrada no sea de la clase 0 (en nuestro caso, que pertenezca a la clase 1)

In [None]:
# 4.- Exactitud:
def exactitud(y, y_predicha):
    return (y == y_predicha).mean() * 100   #Correctos/total

In [None]:
def exactitud2(y, y_predicha):

  y_hat = np.zeros(y.shape[0])
  for i in range(y.shape[0]):
    if (y[i] == y_predicha[i]):
      y_hat[i] = 1
    else:
      y_hat[i] = 0

  return (y_hat).mean() * 100

In [None]:
# 5.- Feedforward:

def Estimar_Entradas(x, W1, b1, W2, b2, W3, b3):
    a1 = np.dot(x, W1) + b1
    a2 = (a1) * W2 + b2
    H2 = sigmoide(a2)
    y_hat = sigmoide(np.dot(H2, W3) + b3)

    return y_hat, H2, a2, a1

In [None]:
def Segunda_adelante(x, W4, b4):
    return sigmoide_dos(np.dot(x, W4) + b4)

In [116]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def binary_crossentropy(y_true, y_pred):
    eps = 1e-10
    return -np.mean(y_true * np.log(y_pred + eps) +
                    (1 - y_true) * np.log(1 - y_pred + eps))

def accuracy(y_true, y_pred):
    return np.mean(y_true == np.round(y_pred)) * 100

def forward_layer(X, W, b):
    z = np.dot(X, W) + b
    return sigmoid(z), z

def forward_stage1(x, params):
    a1, z1 = forward_layer(x, params['W1'], params['b1'])
    a2, z2 = forward_layer(a1, params['W2'], params['b2'])
    phi = np.dot(a2, params['W3']) + params['b3']
    return phi, a1, z1, a2, z2

def forward_classifier(x, W, b):
    return sigmoid(np.dot(x, W) + b)

def init_params(input_dim, hidden_dim, phi_dim, out_dim):
    params = {
        'W1': np.random.randn(input_dim, hidden_dim) * np.sqrt(1 / input_dim),
        'b1': np.zeros((1, hidden_dim)),

        'W2': np.random.randn(hidden_dim, hidden_dim) * np.sqrt(1 / hidden_dim),
        'b2': np.zeros((1, hidden_dim)),

        'W3': np.random.randn(hidden_dim, phi_dim) * np.sqrt(1 / hidden_dim),
        'b3': np.zeros((1, phi_dim)),

        'W4': np.random.randn(phi_dim, out_dim) * np.sqrt(1 / phi_dim),
        'b4': np.zeros((1, out_dim)),

        'W5': np.random.randn(phi_dim, out_dim) * np.sqrt(1 / phi_dim),
        'b5': np.zeros((1, out_dim))
    }
    return params



def train_model(X, Phi_target, y1_target, y2_target, lr, epochs, hidden_dim):

    n_samples, input_dim = X.shape
    phi_dim = Phi_target.shape[1]
    out_dim = y1_target.shape[1]

    params = init_params(input_dim, hidden_dim, phi_dim, out_dim)

    # Buffers
    Phi_pred = np.zeros_like(Phi_target)
    Y1_pred = np.zeros_like(y1_target)
    Y2_pred = np.zeros_like(y2_target)


    for epoch in range(epochs):
        for i in range(n_samples):

            phi, a1, z1, a2, z2 = forward_stage1(X[i], params)

            # Gradients - Stage 1
            dphi = phi - Phi_target[i]

            dW3 = np.dot(a2.T, dphi)
            db3 = dphi

            dH2 = np.dot(dphi, params['W3'].T) * sigmoid_derivative(z2)
            dW2 = np.dot(a1.T.reshape(-1,1), dH2)
            db2 = dH2

            dH1 = np.dot(dH2, params['W2'].T) * sigmoid_derivative(z1)
            dW1 = np.dot(X[i].reshape(-1,1), dH1)
            db1 = dH1

            # Update
            for W, dW in zip(['W1','W2','W3'], [dW1, dW2, dW3]):
                params[W] -= lr * dW
            for b, db in zip(['b1','b2','b3'], [db1, db2, db3]):
                params[b] -= lr * db

            Phi_pred[i] = phi

        loss = binary_crossentropy(Phi_target, Phi_pred)
        acc = accuracy(Phi_target, Phi_pred)

        print(f"[Stage 1] Epoch {epoch} - Loss: {loss:.4f} Acc: {acc:.1f}%")

        if acc == 100:
            Entradas = np.round(Phi_pred)
            break

    for epoch in range(epochs):
        for i in range(n_samples):

            y1 = forward_classifier(Entradas[i], params['W4'], params['b4'])
            y2 = forward_classifier(Entradas[i], params['W5'], params['b5'])

            # Gradients Stage 2
            dy1 = y1 - y1_target[i]
            dy2 = y2 - y2_target[i]

            params['W4'] -= lr * np.dot(Entradas[i].reshape(-1,1), dy1)
            params['b4'] -= lr * dy1

            params['W5'] -= lr * np.dot(Entradas[i].reshape(-1,1), dy2)
            params['b5'] -= lr * dy2

            Y1_pred[i] = y1
            Y2_pred[i] = y2

        loss1 = binary_crossentropy(y1_target, Y1_pred)
        acc1 = accuracy(y1_target, Y1_pred)

        loss2 = binary_crossentropy(y2_target, Y2_pred)
        acc2 = accuracy(y2_target, Y2_pred)

        print(f"[Stage 2] Epoch {epoch} - Loss1: {loss1:.4f} Acc1: {acc1:.1f}%" )
        print(f"[Stage 2] Epoch {epoch} - Loss2: {loss2:.4f} Acc2: {acc2:.1f}%" )

        if acc1 == 100 and acc2 == 100:
            print("\nPrediction Y1:\n", np.round(Y1_pred))
            print("\nPrediction Y2:\n", np.round(Y2_pred))
            break

    return params


In [None]:
X = np.array([[0, 0], [0, 1], [1,0], [1,1]])
Phi_target = np.array([[0, 0, 0, 1],[0, 1, 1, 0]]).T
y_target = np.array([[1, 1, 1, 2]]).T
y_target2 = np.array([[0, 1, 1, 1]]).T

In [117]:
#Sumador de dos bits con resultado en decimal:
W1, b1, W2, b2, W3, b3, W4, b4 = retropropagacion(X, Phi_target, y_target, .5, 100, 10)



Epoca 0: Error = 6.948298279448793 Exactitud: 50.0
Epoca 1: Error = 6.870571020781195 Exactitud: 62.5
Epoca 2: Error = 6.7336478639267545 Exactitud: 62.5
Epoca 3: Error = 6.589001924637884 Exactitud: 62.5
Epoca 4: Error = 6.455620050539748 Exactitud: 62.5
Epoca 5: Error = 6.334047127600248 Exactitud: 62.5
Epoca 6: Error = 6.221887334299726 Exactitud: 62.5
Epoca 7: Error = 6.116729735177984 Exactitud: 62.5
Epoca 8: Error = 6.01661858475369 Exactitud: 62.5
Epoca 9: Error = 5.9200531860309145 Exactitud: 62.5
Epoca 10: Error = 5.825913444380527 Exactitud: 62.5
Epoca 11: Error = 5.733398039766715 Exactitud: 62.5
Epoca 12: Error = 5.641989534905204 Exactitud: 50.0
Epoca 13: Error = 5.5514390423382824 Exactitud: 50.0
Epoca 14: Error = 5.461753779837109 Exactitud: 50.0
Epoca 15: Error = 5.373168343971043 Exactitud: 50.0
Epoca 16: Error = 5.286087338402624 Exactitud: 50.0
Epoca 17: Error = 5.20100225140507 Exactitud: 50.0
Epoca 18: Error = 5.1184001386695535 Exactitud: 50.0
Epoca 19: Error = 5.

In [118]:
def phi (x, W1, b1, W2, b2, W3, b3):
    return np.round(sigmoide(np.dot((sigmoide((np.dot(x, W1) + b1)* W2 + b2)), W3) + b3))

In [131]:
bit_1 = 1
bit_2 = 1

phi_x = phi([bit_1, bit_2], W1, b1, W2, b2, W3, b3)

suma = np.round(sigmoide_dos(np.dot(phi_x, W4) + b4))
suma2 = np.round(sigmoide(np.dot(phi_x, W4) + b4))
suma3 = np.round((np.dot(phi_x, W4) + b4))

print(suma3)

[[2.]]


In [None]:
y = np.round(sigmoide(np.dot(sigmoide(((np.dot([1, 0], W1)+b1)*W2)+b2),W3)+b3))
print(y)

[[1. 0.]]


In [None]:
print(W4)
print(b4)

[[-4.65219534]
 [ 5.21475299]]
[[0.06344762]]


In [None]:
y = np.round(sigmoide(np.dot([1, 1], W4)+b4))
print(y)

[[1.]]


In [None]:
for i in range(Epocas):
        for k in range(0, 3, 1):
            y_entrada = np.round(sigmoide(np.dot(sigmoide(((np.dot(X[k+1], W1)+b1)*W2)+b2),W3)+b3))
            y_hat2 = Segunda_adelante(y_entrada, W4, b4)

            db4 = y_hat2 - y_out[k]
            dw4 = np.dot(y_entrada.T, db4)

            W4 = W4 - alpha * dw4
            b4 = b4 - alpha * db4
            Activacion(y_hat2)
            y_predicha[k+1] = y_hat2
            print(y_predicha[k+1])