## Implementación de una compuerta XOR con redes neuronales

In [1]:
import numpy as np

### Primer prueba - Realizar un ajuste lineal para resolver el problema

In [52]:
X = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]])
y = np.array([0,1,1,0])
W = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y.T)
np.set_printoptions(precision=2)
np.set_printoptions(suppress=True)
print('W: {}'.format(W))

W: [0.  0.  0.5]


Esto no funciona. El resultado de estos pesos aplicados sobre la entrada nos da siempre como salida 0.5

### Probamos con una red neuronal de dos neuronas en una capa oculta, una neurona en la capa de salida

In [2]:
def sigmoid(z):
    out = 1/(1 + np.exp(-z))
    return out

def trainNetwork(X_train, y_train, lr=0.01, amt_epochs=100):
    # Inicializamos random los parámatros
    #====================================
    W1 = np.random.rand(2,2)
    W2 = np.random.rand(2,1)
    b1 = np.random.rand(2,1)
    b2 = np.random.rand()
    n = X_train.shape[0]
    
    #y_hat = np.zeros(y_train.shape)
    for i in range (amt_epochs):
        for j in range(n):
            # Forward
            #--------
            z1 = X_train[j,:] @ W1[0,:] + b1[0]
            z2 = X_train[j,:] @ W1[1,:] + b1[1]
            a1 = sigmoid(z1)
            a2 = sigmoid(z2)
            y_hat = a1 * W2[0] + a2 * W2 [1] + b2
        
            # Calculo el error del pasaje "forward" en esta corrida
            #err = np.sum(y_train-y_hat)
            err = y_train[j]-y_hat
            #print(err)
            
            # Backpropagation
            #----------------
            # Output (actualización de parámetros)
            W2[0] = W2[0] + lr*2*err*a1
            W2[1] = W2[1] + lr*2*err*a2
            b2 = b2 + lr*2*err

            # Hidden (actualización de parámetros)
            W1[0,0] = W1[0,0] + lr * (2*err*W2[0] * (a1*(1-a1)) * X_train[j,0])
            W1[0,1] = W1[0,1] + lr * (2*err*W2[0] * (a1*(1-a1)) * X_train[j,1])
            W1[1,0] = W1[1,0] + lr * (2*err*W2[1] * (a2*(1-a2)) * X_train[j,0])
            W1[1,1] = W1[1,1] + lr * (2*err*W2[1] * (a2*(1-a2)) * X_train[j,1])
            b1[0] = b1[0] + lr * (2*err*W2[0] * (a1*(1-a1))) 
            b1[1] = b1[1] + lr * (2*err*W2[1] * (a2*(1-a2))) 
    return W1,W2,b1,b2

def testNetwork(W1,W2,b1,b2,X_test):
    n = X_test.shape[0]
    y_hat = np.zeros((n,1), dtype=float)
    for i in range(n):
        z1 = X_test[i,:] @ W1[0,:] + b1[0]
        z2 = X_test[i,:] @ W1[1,:] + b1[1]
        a1 = sigmoid(z1)
        a2 = sigmoid(z2)
        y_hat[i] = a1 * W2[0] + a2 * W2 [1] + b2
    
    return y_hat

In [3]:
# Entrenamos la red
#------------------
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([0,1,1,0])
W1,W2,b1,b2 = trainNetwork(X,y,lr=0.05, amt_epochs=3000)

In [4]:
y = testNetwork(W1,W2,b1,b2,X)
print(y)
print('Exito!!')

[[6.34683461e-09]
 [9.99999994e-01]
 [9.99999992e-01]
 [7.94919819e-09]]
Exito!!
