# XOR

In [33]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import json, matplotlib
#s = json.load( open("styles/bmh_matplotlibrc.json") )
#matplotlib.rcParams.update(s)
from IPython.core.pylabtools import figsize
figsize(11, 5)
colores = ["#348ABD", "#A60628","#06A628"]

In [34]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [35]:
import numpy as np

## Función de activación

In [36]:
# Devuelve la función logística evaluada
# componente por componente
def logistica(z):
    return 1 / (1 + np.exp(-z))

In [37]:
## Función que, dado un arreglo de valores z
## calcula el valor de la derivada para cada entrada.

def derivadaLogistica(z):
    g = logistica(z)
    return g * (1 - g)

## Función de Error

In [38]:
# input: (val_obtenido, val_esperado)
def entropia_cruzada(hip,y):
    return -y*np.log(hip) - (1-y)*np.log(1-hip)

def entropia_derivada1(hip,y):
    return 1

## Red neuronal
La red implementa encadenamiento hacia adelante (para evaluar) y hacia atrás (para entrenarse).

In [39]:
np.random.seed(10)

In [40]:
class XOR:
    def __init__(self):
        self.Theta_0 = np.random.random((2,3))
        self.Theta_1 = np.random.random((1,3))
    
    def feedForward(self, X, vector = None):
        """ Calcula las salidas, dados los datos de entrada. """
        if vector is None:
            Theta_0 = self.Theta_0
            Theta_1 = self.Theta_1
        else:
            Theta_0, Theta_1 = self.reconstructMatrices(vector)
        
        self.A0 = np.vstack((np.ones((1, X.shape[0])), X.T))
        self.Z1 = np.dot(Theta_0, self.A0)
        self.A1 = np.vstack((np.ones((1, self.Z1.shape[1])), logistica(self.Z1)))
        self.Z2 = np.dot(Theta_1, self.A1)
        self.A2 = logistica(self.Z2)
        
    def backPropagate(self, X, Y):
        """ Calcula el error y su gradiente,
        dados los pesos actuales de la red y los resultados
        esperados.
        """
        self.feedForward(X)
        
        m = X.shape[0]
        Delta_2 = (self.A2.T - Y)
        self.error = np.sum(entropia_cruzada(self.A2,Y.T)) / m # Suma renglones (clases) y columnas (ejemplares)
    
        #Delta_1 = Delta_2 * self.Theta_1[:,1:] *  derivadaLogistica(self.Z2)
        Delta_1 = self.Theta_1[:,1:] * Delta_2 * derivadaLogistica(self.Z2.T)
        self.Grad_1 = np.dot(Delta_2.T,self.A1.T) / m
        
        #Delta_0 = self.Theta_0[:,1:].T * Delta_1 * derivadaLogistica(self.Z1.T)
        self.Grad_0 = np.dot(Delta_1.T, self.A0.T) / m
        
    def calcError(self, X, Y, vector):
        """
        Calcula el error que se cometería utilizando los pesos en 'vector'.
        """
        self.feedForward(X, vector)
        m = X.shape[0]
        error = np.sum(entropia_cruzada(self.A2,Y.T)) / m
        return error
    
    def vectorWeights(self):
        """
        Acomoda a todos los parámetros en las matrices de pesos, en un solo vector.
        """
        vector = np.vstack((self.Theta_0.reshape((self.Theta_0.size, 1)),
                          self.Theta_1.reshape((self.Theta_1.size, 1))))
        #print(self.Theta_0, self.Theta_1, vector)
        return vector
    
    def reconstructMatrices(self, vector):
        """
        Dado un vector, rearma matrices del tamaño de las matrices de pesos.
        """
        M0 = vector[0:self.Theta_0.size].reshape(self.Theta_0.shape)
        M1 = vector[self.Theta_0.size:].reshape(self.Theta_1.shape)
        return M0, M1
        
    def approxGradient(self, X, Y):
        """
        Aproxima el valor del gradiente alrededor de los pesos actuales,
        perturbando cada valor, uno por uno.
        """
        vector = self.vectorWeights().copy()
        approx = np.zeros(vector.shape)
        perturb = np.zeros(vector.shape)
        epsilon = 0.0001
        
        for i in range(len(vector)):
            perturb[i] = epsilon
            loss1 = self.calcError(X, Y, vector - perturb)
            loss2 = self.calcError(X, Y, vector + perturb)
            perturb[i] = 0
            approx[i] = (loss2 - loss1) / (2 * epsilon)
        return self.reconstructMatrices(approx)
        
    def gradientDescent(self, X, Y, alpha, ciclos=10, checkGradient = False):
        """ Evalúa y ajusta los pesos de la red,
        de acuerdo a los datos en X y los resultados
        esperados, en Y.
        """
        errores = np.zeros(ciclos)
        for i in range(ciclos):
            self.backPropagate(X, Y)
            Grad_1 = self.Grad_1
            Grad_0 = self.Grad_0
            if checkGradient:
                ApproxT0, ApproxT1 = self.approxGradient(X, Y)
                print("Grad 0 = ", Grad_0, "\nApprox = ", ApproxT0, "\nDiff = ", Grad_0 - ApproxT0,
                     "\nGrad 1 = ", Grad_1, "\nApprox = ", ApproxT1, "\nDiff = ", Grad_1 - ApproxT1)
            self.Theta_1 -= alpha * Grad_1
            self.Theta_0 -= alpha * Grad_0
            errores[i] = self.error
        if ciclos > 1:
            plt.plot(np.arange(ciclos), errores)
        
    def printOutput(self):
        print(np.hstack((self.A0.T[:,1:], self.A2.T)))

In [41]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])
xor = XOR()
xor.feedForward(X)
xor.printOutput()

[[0.         0.         0.69697434]
 [0.         1.         0.71737828]
 [1.         0.         0.70117284]
 [1.         1.         0.72086962]]


In [42]:
print(xor.vectorWeights())
print(xor.Theta_0, '\n', xor.Theta_1)
T0, T1 = xor.reconstructMatrices(xor.vectorWeights())
print(T0, T1)

[[0.77132064]
 [0.02075195]
 [0.63364823]
 [0.74880388]
 [0.49850701]
 [0.22479665]
 [0.19806286]
 [0.76053071]
 [0.16911084]]
[[0.77132064 0.02075195 0.63364823]
 [0.74880388 0.49850701 0.22479665]] 
 [[0.19806286 0.76053071 0.16911084]]
[[0.77132064 0.02075195 0.63364823]
 [0.74880388 0.49850701 0.22479665]] [[0.19806286 0.76053071 0.16911084]]


In [43]:
xor.gradientDescent(X, Y, 0.3, 1, checkGradient = True)

Grad 0 =  [[0.03276727 0.01567413 0.0166842 ]
 [0.0072861  0.00348528 0.00370988]] 
Approx =  [[0.0293725  0.00922161 0.01291035]
 [0.00648158 0.00243598 0.00224845]] 
Diff =  [[0.00339478 0.00645252 0.00377386]
 [0.00080451 0.0010493  0.00146143]] 
Grad 1 =  [[0.20909877 0.15629306 0.15556282]] 
Approx =  [[0.20909877 0.15629306 0.15556282]] 
Diff =  [[1.43075829e-10 6.12830897e-11 6.26605989e-11]]


In [32]:
@interact_manual(ciclos = (50, 2000))
def trainXOR(ciclos):
    xor.gradientDescent(X, Y, 0.3, ciclos)

interactive(children=(IntSlider(value=1025, description='ciclos', max=2000, min=50), Button(description='Run I…

In [248]:
xor.feedForward(X)
xor.printOutput()
print("Theta_0 = ", xor.Theta_0, "\nTheta_1", xor.Theta_1)

[[0.         0.         0.75686341]
 [0.         1.         0.79387685]
 [1.         0.         0.78401582]
 [1.         1.         0.81093438]]
Theta_0 =  [[0.08833981 0.68535982 0.95339335]
 [0.00394827 0.51219226 0.81262096]] 
Theta_1 [[0.61252607 0.72175532 0.29187607]]


In [51]:
from IPython.core.display import HTML
def css_styling():
    styles = open("../P1-HodgkinHuxley/styles/custom.css", "r").read() #or edit path to custom.css
    return HTML(styles)
css_styling()