## Tutorial de implementación de Backpropagation

In [None]:
import numpy as np
import time

### Backprop con compuertas simples

In [None]:
# Clase que representa un arco en el grafo de cómputo.
# Almacena un valor propagado hacia adelante, y un gradiente
# que es propagado posteriormente hacia atrás.
class Link:
    def __init__(self, value, parent = None):
        self.value = np.array(value)
        self.__grad = np.zeros(self.value.shape)
        self.__parent = parent
    
    def get_grad(self):
        return np.copy(self.__grad)

    def propagate_grad(self, grad):
        self.__grad += grad
        if self.__parent != None:
            self.__parent.backward()
    
    def update_value(self, step_size):
        self.value += step_size*self.__grad

In [None]:
# Compuerta/Nodo que implementa una operación/función arbitraria.
# Las compuertas son además las encargadas de propagar los valores y gradientes 
class AddGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        self.z = Link(self.x.value + self.y.value, self)
        return self.z
    def backward(self):
        self.x.propagate_grad(1. * self.z.get_grad())
        self.y.propagate_grad(1. * self.z.get_grad())

In [None]:
class MulGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        self.z = Link(self.x.value * self.y.value, self)
        return self.z
    def backward(self):
        self.x.propagate_grad(self.y.value * self.z.get_grad()) 
        self.y.propagate_grad(self.x.value * self.z.get_grad())

In [None]:
class SigmoidGate:
    def __init__(self):
        self.x = None
        self.z = None
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    def forward(self, x):
        self.x = x
        self.z = Link(SigmoidGate.sigmoid(self.x.value), self)
        return self.z
    def backward(self):
        s = self.z.value
        self.x.propagate_grad(s * (1-s) * self.z.get_grad())

In [None]:
# definición del grafo de cómputos (compuertas/nodos y arcos)
w0 = Link([2.0])
w1 = Link([-3.0])
x0 = Link([-1.0])
x1 = Link([-2.0])


mulg0 = MulGate()
mulg1 = MulGate()
addg0 = AddGate()
sg0 = SigmoidGate()

In [None]:
# forward pass
def forwardNetwork():
    prod1 = mulg0.forward(w0, x0)
    prod2 = mulg1.forward(w1, x1)
    res = addg0.forward(prod1, prod2)
    return  sg0.forward(res)

s = forwardNetwork()
print('network output: ' + str(s.value))

In [None]:
# backward pass
s.propagate_grad(1.0)

In [None]:
# one step of gradient ascent
step_size = 0.1;
w0.update_value(step_size)
w1.update_value(step_size)
x0.update_value(step_size)
x1.update_value(step_size)

s = forwardNetwork()
print('network output after one step of gradient ascent: ' + str(s.value))

In [None]:
print(w0.get_grad())
print(x0.get_grad())
print(w1.get_grad())
print(x1.get_grad())

In [None]:
print(w0.value)

### Backprop con compuertas combinadas

In [None]:
class DotProdGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        self.z = Link(self.x.value.dot(self.y.value), self)
        return self.z
    def backward(self):
        self.x.propagate_grad(self.y.value * self.z.get_grad())
        self.y.propagate_grad(self.x.value * self.z.get_grad())

In [None]:
# value and gates definition
w = Link([2.0,-3.0])
x = Link([-1.0, -2.0])

dp = DotProdGate()
sg = SigmoidGate()

In [None]:
def forwardNetwork():
    wx = dp.forward(w, x)
    output = sg.forward(wx) 
    return output

s = forwardNetwork()
print('network output: ' + str(s.value))

In [None]:
# backward pass
s.propagate_grad(1.0)

In [None]:
# one step of gradient ascent
step_size = 0.1;
w.update_value(step_size) 
x.update_value(step_size)

s = forwardNetwork()
print('network output after one step of gradient ascent: ' + str(s.value))

In [None]:
print(w.get_grad())
print(x.get_grad())

### Backprop con función de pérdida

In [None]:
class PerceptronGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    def forward(self, x, y):
        self.x = x
        self.y = y
        dotProd = self.x.value.dot(self.y.value)
        s = PerceptronGate.sigmoid(dotProd)
        self.z = Link(s, self)
        return self.z
    def backward(self):
        s = self.z.value
        self.x.propagate_grad(self.y.value * s * (1 - s) * self.z.get_grad())
        self.y.propagate_grad(self.x.value * s * (1 - s) * self.z.get_grad())

In [None]:
class LossGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        self.z = Link((self.x.value-self.y.value)**2, self)
        return self.z
    def backward(self):
        self.x.propagate_grad((self.x.value-self.y.value) * self.z.get_grad())
        self.y.propagate_grad(-1.0*(self.x.value-self.y.value) * self.z.get_grad())

In [None]:
# value and gates definition
w = Link([2.0,-3.0])
x = Link([-1.0, -2.0])
y = Link(0.12)


perceptron = PerceptronGate()
loss = LossGate()

In [None]:
# forward pass
def forwardNetwork():
    p = perceptron.forward(w,x)
    return loss.forward(p,y)

In [None]:
# gradient descent
step_size = 0.01;
s = forwardNetwork()
i = 0
while s.value > 1e-4:
    print('Iteration ' + str(i) + ': current loss: ' + str(s.value))
    s.propagate_grad(1.0)
    w.update_value(-step_size) 
    s = forwardNetwork()
    i+=1
print('Iteration ' + str(i) + ': current loss: ' + str(s.value))

In [None]:
print(y.value)

In [None]:
p = perceptron.forward(w,x)
print(p.value)
