# Backprop y entrenamiento para XOR

In [1]:
import numpy as np
import matplotlib.pyplot as plt

El objetivo es contruir por medio de una red neuronal con back propagation un ajuste a la función XOR, la cual tiene la siguiente forma y resultados:

In [2]:
def operacion_XOR(x1,x2):
    _and = x1+x2-1>0
    _or = x1+x2>0
    return int(_or-_and)

print(operacion_XOR(0,0),operacion_XOR(0,1),operacion_XOR(1,0),operacion_XOR(1,1))

0 1 1 0


Para estimarla probamos primero con la función de error MSE. Y definimos la siguiente clase:

In [29]:
class NeuralNetwork(object):
    def __init__(self, layers = [3, 2, 2, 1], activations=['relu', 'relu','linear'],mu= 0, sigma = 0.1,loos = 'MSE',seed = 0): # Definimos la forma que creemos nos ayudará
        assert(len(layers) == len(activations)+1)
        self.layers = layers
        self.activations = activations
        self.weights = []
        self.biases = []
        self.mu = mu
        self.sigma = sigma
        self.loos = loos
        self.seed = seed
        for i in range(len(layers)-1): # Iniciamos los pesos y sesgos de manera random según una distribución normal.
            np.random.seed(seed=self.seed+10*i)
            self.weights.append(np.random.normal(self.mu, self.sigma, (layers[i+1], layers[i])))
            #print('capa ', i, self.weights[-1])
            np.random.seed(seed=self.seed+10*i+1)
            self.biases.append(np.random.normal(self.mu, self.sigma, (layers[i+1], 1)))
            #print('capa ', i, self.biases[-1])
                
    @staticmethod
    def getActivationFunction(name): # Método que nos guarda las funciones de activación necesarias
        if(name == 'sigmoid'):
            return lambda x : 1/(1+np.exp(-x))
        elif(name == 'linear'):
            return lambda x : x
        elif(name == 'relu'):
            def relu(x):
                y = np.copy(x)
                y[y<0] = 0
                return y
            return relu
        else:
            print('Unknown activation function. linear is used')
            return lambda x: x
        
    @staticmethod
    def getDerivitiveActivationFunction(name): # Método que nos guarda las derivadas de las funciones de activación necesarias.
        if(name == 'sigmoid'):
            sig = lambda x : np.exp(x)/(1+np.exp(x))
            return lambda x :sig(x)*(1-sig(x)) 
        elif(name == 'linear'):
            return lambda x: 1
        elif(name == 'relu'):
            def relu_diff(x):
                y = np.copy(x)
                y[y>=0] = 1
                y[y<0] = 0
                return y
            return relu_diff
        else:
            print('Unknown activation function. linear is used')
            return lambda x: 1
        
    def error(self,y,y_hat,loos):
        if loos == 'MSE':
            return y-y_hat
        if loos == 'Cross-entropy':
            N = y_hat.shape[0]
            ce = -(y * np.log(y_hat))
            return ce
    
    def feedforward(self, x):
        a = np.copy(x)
        z_s = []
        a_s = [a]
        for i in range(len(self.weights)):
            #print('Vamos en la capa ', i)
            activation_function = self.getActivationFunction(self.activations[i])
            #print(self.weights[i],self.weights[i].shape)
            #print(a,a.shape)
            z_s.append(self.weights[i].dot(a) + self.biases[i]) 
            a = activation_function(z_s[-1])
            a_s.append(a)
        return (z_s, a_s)
    
    def backpropagation(self,y, z_s, a_s):
        dw = []  # dC/dw
        db = []  # dC/db
        deltas = [None] * len(self.weights)  # delta = dC/dZa
        deltas[-1] = ((self.error(y,a_s[-1],self.loos))*(self.getDerivitiveActivationFunction(self.activations[-1]))(z_s[-1])) # Delta L
        for i in reversed(range(len(deltas)-1)): # Calculamos los deltas para atrás
            deltas[i] = self.weights[i+1].T.dot(deltas[i+1])*(self.getDerivitiveActivationFunction(self.activations[i])(z_s[i]))        
        batch_size = y.shape[0] 
        db = [d.dot(np.ones((batch_size,1)))/float(batch_size) for d in deltas] # guardamos las derivadas respecto a los sesgos
        dw = [d.dot(a_s[i].T)/float(batch_size) for i,d in enumerate(deltas)] # guardamos las derivadas respecto a los pesos
        return dw, db          
    
    def train(self, x, y, epochs=100, lr = 0.01):
        for e in range(epochs): 
            z_s, a_s = self.feedforward(x)
            dw, db = self.backpropagation(y, z_s, a_s)
            self.weights = [w+lr*dweight for w,dweight in  zip(self.weights, dw)]
            self.biases = [w+lr*dbias for w,dbias in  zip(self.biases, db)]
            if (e+1)%np.floor(epochs/10) == 0:
                print("Epoch {} with loss = {}".format( e,np.linalg.norm(a_s[-1]-y)) )
        print("Final epoch with loss = {}".format(np.linalg.norm(a_s[-1]-y) )) 
                
    


Definimos los datasets a utilizar en el entrenamiento.

In [4]:
x1 = [0,1]
x2 = [0,1]
dataset_x = np.array([[a,b,1] for a in x1 for b in x2]).reshape(3, -1,order='F')
dataset_y_xor = np.zeros(len(x1)*len(x2))
for i in range(len(dataset_y_xor)):
    a = dataset_x[0][i]
    b = dataset_x[1][i]
    dataset_y_xor[i] = operacion_XOR(int(a),int(b))
dataset_x,dataset_y_xor

(array([[0, 1, 0, 1],
        [0, 0, 1, 1],
        [1, 1, 1, 1]]),
 array([0., 1., 1., 0.]))

In [30]:
nn1 = NeuralNetwork(loos = 'Cross-entropy')

nn1.train(dataset_x, dataset_y_xor, epochs=100,  lr = 0.01)
_, a_s = nn1.feedforward(dataset_x)

Epoch 9 with loss = 1.2277822111553804
Epoch 19 with loss = 1.1320224927552718
Epoch 29 with loss = 1.0723555226990684
Epoch 39 with loss = 1.0351642852826382
Epoch 49 with loss = 1.0136027420347316
Epoch 59 with loss = 1.003334180367543
Epoch 69 with loss = 1.0013156177993405
Epoch 79 with loss = 1.0053096634729246
Epoch 89 with loss = 1.013637645403523
Epoch 99 with loss = 1.0250304951551408
Final epoch with loss = 1.0250304951551408


In [31]:
a_s[-1]

array([[0.60625811, 0.61475495, 0.60882006, 0.61992377]])

In [33]:
nn1.weights

[array([[ 0.18261837,  0.04630825,  0.11037946],
        [ 0.22716406,  0.18986958, -0.09153927]]),
 array([[ 0.16533424,  0.07606243],
        [-0.15454003, -0.00083838]]),
 array([[0.23390158, 0.0195865 ]])]