# XOR

In [16]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import json, matplotlib


from IPython.core.pylabtools import figsize
figsize(11, 5)


from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

import numpy as np

## Función de activación

In [17]:
# Devuelve la función logística evaluada
# componente por componente
def logistica(z):
    return 1 / (1 + np.exp(-z))

## Función que, dado un arreglo de valores z
## calcula el valor de la derivada para cada entrada.
def derivadaLogistica(z):
    g = logistica(z)
    return g * (1 - g)


## Función de perdida


In [39]:
def cross_entropy(predictions, targets, epsilon=1e-12):
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(targets * np.log(predictions+1e-9)) / N
    return ce

def bin_cross_entropy(predictions, targets):
    N = predictions.shape[0]
    sum = 0
    for i in range(N):
        sum += targets[i] * np.log(predictions[i])
        sum += (1 - targets[i]) * np.log(1 - predictions[i])
        
    print("the result is:" + str(N))
    print("the result is:" + str(targets[0] * np.log(predictions[0])))
    return - sum / N







## Red neuronal
La red implementa encadenamiento hacia adelante (para evaluar) y hacia atrás (para entrenarse).

In [19]:
np.random.seed(0)

In [40]:
class XOR:
    def __init__(self):
        self.Theta_0 = np.random.random((2,3))
        self.Theta_1 = np.random.random((1,3))
    
    def feedForward(self, X, vector = None):
        """ Calcula las salidas, dados los datos de entrada. """
        if vector is None:
            Theta_0 = self.Theta_0
            Theta_1 = self.Theta_1
        else:
            Theta_0, Theta_1 = self.reconstructMatrices(vector)
        
        self.A0 = np.vstack((np.ones((1, X.shape[0])), X.T))
        self.Z1 = np.dot(Theta_0, self.A0)
        self.A1 = np.vstack((np.ones((1, self.Z1.shape[1])), logistica(self.Z1)))
        self.Z2 = np.dot(Theta_1, self.A1)
        self.A2 = logistica(self.Z2)
        
    def backPropagate(self, X, Y):
        """ Calcula el error y su gradiente,
        dados los pesos actuales de la red y los resultados
        esperados.
        """
        self.feedForward(X)
        
        m = X.shape[0]
        Delta_2 = (Y.T - self.A2)
        self.error = bin_cross_entropy(self.A2, Y.T)
        
        Delta_2 = Delta_2 * derivadaLogistica(self.Z2)
        self.Grad_1 = - np.dot(Delta_2, self.A1.T) / m
        
        Delta_1 = np.dot(self.Theta_1[:,1:].T, Delta_2) * derivadaLogistica(self.Z1)
        self.Grad_0 = - np.dot(Delta_1, self.A0.T) / m
        
    def calcError(self, X, Y, vector):
        """
        Calcula el error que se cometería utilizando los pesos en 'vector'.
        """
        self.feedForward(X, vector)
        return cross_entropy(self.A2, Y.T)
    
    def vectorWeights(self):
        """
        Acomoda a todos los parámetros en las matrices de pesos, en un solo vector.
        """
        vector = np.vstack((self.Theta_0.reshape((self.Theta_0.size, 1)),
                          self.Theta_1.reshape((self.Theta_1.size, 1))))
        #print(self.Theta_0, self.Theta_1, vector)
        return vector
    
    def reconstructMatrices(self, vector):
        """
        Dado un vector, rearma matrices del tamaño de las matrices de pesos.
        """
        M0 = vector[0:self.Theta_0.size].reshape(self.Theta_0.shape)
        M1 = vector[self.Theta_0.size:].reshape(self.Theta_1.shape)
        return M0, M1
        
    def approxGradient(self, X, Y):
        """
        Aproxima el valor del gradiente alrededor de los pesos actuales,
        perturbando cada valor, uno por uno.
        """
        vector = self.vectorWeights().copy()
        approx = np.zeros(vector.shape)
        perturb = np.zeros(vector.shape)
        epsilon = 0.0001
        
        for i in range(len(vector)):
            perturb[i] = epsilon
            loss1 = self.calcError(X, Y, vector - perturb)
            loss2 = self.calcError(X, Y, vector + perturb)
            perturb[i] = 0
            approx[i] = (loss2 - loss1) / (2 * epsilon)
        return self.reconstructMatrices(approx)
        
    def gradientDescent(self, X, Y, alpha, ciclos=10, checkGradient = False):
        """ Evalúa y ajusta los pesos de la red,
        de acuerdo a los datos en X y los resultados
        esperados, en Y.
        """
        errores = np.zeros(ciclos)
        for i in range(ciclos):
            self.backPropagate(X, Y)
            Grad_1 = self.Grad_1
            Grad_0 = self.Grad_0
            if checkGradient:
                ApproxT0, ApproxT1 = self.approxGradient(X, Y)
                
                print("Grad 0 = ", Grad_0, end="\n\n")
                print("Approx = ", ApproxT0, end="\n\n")
                print("Diff = ", Grad_0 - ApproxT0, end="\n\n")
                print()
                print("Grad 0 = ", Grad_1, end="\n\n")
                print("Approx = ", ApproxT1, end="\n\n")
                print("Diff = ", Grad_1 - ApproxT1, end="\n\n")
                
            self.Theta_1 -= alpha * Grad_1
            self.Theta_0 -= alpha * Grad_0
            errores[i] = self.error
        if ciclos > 1:
            plt.plot(np.arange(ciclos), errores)
        
    def printOutput(self, do_print = True):
        data = np.hstack((self.A0.T[:,1:], self.A2.T))
        if do_print: print(data)
            
        return data

In [36]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])
xor = XOR()
xor.feedForward(X)
xor.printOutput(False)

array([[0.        , 0.        , 0.71735807],
       [0.        , 1.        , 0.73084344],
       [1.        , 0.        , 0.7388301 ],
       [1.        , 1.        , 0.74823761]])

In [37]:
print(xor.vectorWeights(), end='\n\n')
print(xor.Theta_0, end='\n\n')
print(xor.Theta_1, end='\n\n')

T0, T1 = xor.reconstructMatrices(xor.vectorWeights())
print(T0, end='\n\n')
print(T1, end='\n\n')

[[0.94466892]
 [0.52184832]
 [0.41466194]
 [0.26455561]
 [0.77423369]
 [0.45615033]
 [0.56843395]
 [0.0187898 ]
 [0.6176355 ]]

[[0.94466892 0.52184832 0.41466194]
 [0.26455561 0.77423369 0.45615033]]

[[0.56843395 0.0187898  0.6176355 ]]

[[0.94466892 0.52184832 0.41466194]
 [0.26455561 0.77423369 0.45615033]]

[[0.56843395 0.0187898  0.6176355 ]]



In [41]:
xor.gradientDescent(X, Y, 0.3, 1, checkGradient = True)

NameError: name 'double' is not defined

In [None]:
@interact_manual(ciclos = (5000, 80000))
def trainXOR(ciclos):
    xor.gradientDescent(X, Y, 0.5, ciclos)

In [None]:
xor.feedForward(X)
xor.printOutput()
print(np.round(xor.printOutput(False)), end='\n\n')

print("Theta_0 = ", xor.Theta_0, end="\n\n")
print("Theta_1 = ", xor.Theta_1, end="\n\n")