# Programando una red neuronal desde 0

In [1]:
from keras.datasets import mnist
import numpy as np
from math import e

In [26]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

## Clase RedNeuronal

Sea $C(a)$ la funcion del coste definida como...
$$C=\frac{1}{2}\sum_i (y_i - a_i)^2$$

Con la funcion de activacion sigmoidea $a(Z^L)$...
$$a(z) = \frac{1}{a+e^{-z}}$$

Para la sumatoria $Z$ de la capa $L$...
$$Z^L = \sum w_i^L x_i^{L-1} + B^L$$

### Forward Propagation:

El output de la L-esima capa es:

$$ out^{L} = a(Z^{L}) = a(W^{L}a^{L-1} + b^{L}) = a(\sum w_i^{L} x_i + b^{L}) $$

Donde $a^{L-1}$ es el output de la capa anterior o, en su defecto, el input.

### Backward Propagation:

Sea $\delta^L$ el error imputado de la neurona:

$$\delta^L = \frac{\partial C}{\partial a} * \frac{\partial a}{\partial Z^L}$$

Vemos el caso de la ultima capa, por regla de la cadena:

$$\frac{\partial C}{\partial W^L} = \delta^L * \frac{\partial Z^L}{\partial W^L}$$
$$\frac{\partial C}{\partial B^L} = \delta^L * \frac{\partial Z^L}{\partial B}$$

En particular, valen las derivadas:

$$\frac{\partial Z}{\partial B} = 1$$
$$\frac{\partial Z}{\partial W^L} = a^{L-1}$$

Entonces nos queda:

$$\frac{\partial C}{\partial W^L} = \delta^L * a^{L-1}$$
$$\frac{\partial C}{\partial B^L} = \delta^L $$

Y si L es la primer capa $\rightarrow a^{L-1}$ es el input.

Luego, para las capas anteriores:

$$\frac{\partial C}{\partial W^{L-1}} = \delta^L * \frac{\partial Z^L}{\partial a^{L-1}}
 * \frac{\partial a^{L-1}}{\partial Z^{L-1}} * \frac{\partial Z^{L-1}}{\partial W^{L-1}} $$

$$\frac{\partial C}{\partial B^{L-1}} = \frac{\partial Z^L}{\partial a^{L-1}}
 * \frac{\partial a^{L-1}}{\partial Z^{L-1}} * \frac{\partial Z^{L-1}}{\partial B^{L-1}}$$

Y por regla de la cadena queda:

$$\frac{\partial C}{\partial W^{L-1}} = \delta^L * W^L * \frac{\partial a^{L-1}}{\partial z^{L-1}} * a^{L-2} $$

$$\frac{\partial C}{\partial W^{L-1}} = \delta^L * W^L * \frac{\partial a^{L-1}}{\partial z^{L-1}} $$


In [13]:
class Layer:

    def __init__(self, neurons, input_shape, activation):
        self.neurons = neurons
        self.input_shape = input_shape
        self.weights = np.random.rand(self.input_shape, self.neurons)
        self.bias = np.random.rand(self.neurons)
        self.activation = activation

    def forward(self, inputs):
        return self.activation(np.dot(inputs, self.weights) + self.bias)

In [25]:
class NeuralNetwork:

    def __init__(self, input_size, layers, l_rate, output_size):
        self.input_size = input_size
        self.layers = layers
        self.output_size = output_size  
        self.l_rate = l_rate

    def forward(self, x, retLayers=False):
        # Forward pass del modelo
        t = x
        layer_outputs = []
        for layer in self.layers:
            t = layer.forward(t)
            layer_outputs.append(t)

        if retLayers:
            return t, layer_outputs

        return t          

    def train(self, X, Y):        

        # Entreno el modelo
        
        for i, x_ in enumerate(X):

            x = x.reshape(784,1)

            # Forwardpropagation:
            out, a = self.forward(x, retLayers=True) # output_layers

            # Backpropagation:
            
            # Calculo el gradiente de la ultima capa
            L = len(self.layers)
            error_L = -(Y[i] - a[L]) * e**(-a[L])/(1+e**(-a[L]))**2
            W_grad_L = error_L * a[L-1]
            B_grad_L = error_L

            # Actualizo los pesos de la ultima capa
            self.layers[-1].weights -= self.l_rate * W_grad_L
            self.layers[-1].bias -= self.l_rate * B_grad_L

            # Repito el procedimiento en las capas anteriores 
            for j in range(L-1, 0, -1):
                l = L - j # capa actual 
                da = e**(-a[l])/(1+e**(-a[l]))**2 # derivada de la activacion
                # Calculo el gradiente de la capa actual
                W_grad_j = error_L * self.layers[l+1].weights * da * a[l-1]
                B_grad_j = error_L * self.layers[l+1].weights * da
                # Actualizo los pesos de la capa actual
                self.layers[j].weights -= self.l_rate * W_grad_j
                self.layers[j].bias -= self.l_rate * B_grad_j
            
            pass
        pass



In [16]:
model = NeuralNetwork(
                784,
                [Layer(16, 784, lambda x: 1/(1+e**(-x))),
                 Layer(16, 16, lambda x: 1/(1+e**(-x))),
                 Layer(10, 16, lambda x: 1/(1+e**(-x)))],
                0.1,
                10)

model.forward(np.random.rand(784))

array([0.99994666, 0.99989188, 0.99942884, 0.99998529, 0.99986706,
       0.99998856, 0.99991913, 0.9999316 , 0.99945283, 0.999824  ])

In [47]:
sample_x = X_train[0].reshape(784,)
sample_x.shape

(784,)

In [48]:
np.dot(model.layers[0].weights, sample_x) + model.layers[0].bias 

ValueError: shapes (784,16) and (784,) not aligned: 16 (dim 1) != 784 (dim 0)