<a href="https://colab.research.google.com/github/DavidJimenez10/Red-Neuronal-desde-0/blob/main/RedNeuronalDesdeCero.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Red neuronal desde 0
Este notebook muestra el proceso de la creacion de una red neuronal densa desde 0, utilizando unicamente numpy y programacion orientada a objetos en python. 

In [None]:
import numpy as np
seed = 12345
rng = np.random.default_rng(seed)

In [None]:
!pip install nnfs

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nnfs
  Downloading nnfs-0.5.1-py3-none-any.whl (9.1 kB)
Installing collected packages: nnfs
Successfully installed nnfs-0.5.1


In [None]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

## Capas

In [None]:
class LayerDense():

    def __init__(self, n_inputs, n_neurons):
        
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    
    def forward(self, inputs):
        
        self.inputs = inputs
        
        self.output = np.dot(inputs, self.weights) + self.biases

    
    def backward(self, dvalues):
        
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        
        self.dinputs = np.dot(dvalues, self.weights.T)

## Funciones de Activacion

In [None]:
class ActivationReLU():
  def forward(self,inputs):
    self.inputs = inputs
    self.output = np.maximum(0,inputs)

  def backward(self,dvalues):
    self.dinputs = dvalues.copy()

    self.dinputs[self.inputs <= 0] = 0

class ActivationSoftmax():
  def forward(self,inputs):
    inputs_negativos = inputs - np.max(inputs,axis=1,keepdims=True)
    exponential = np.exp(inputs_negativos)
    sum_batch = np.sum(exponential,axis=1,keepdims=True)
    self.output = exponential / sum_batch

  def backward(self,dvalues):
    #Creo pero no inicializo un array
    self.dinputs = np.empty_like(dvalues)

    for index, (single_output, single_dvalues) in enumerate(zip(self.output,dvalues)):

      single_output = single_output.reshape(-1,1)
      #calculo de la matriz jacobiana de derivadas parciales
      jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)

      self.dinputs[index] = np.dot(jacobian_matrix,single_dvalues)



## Perdida

In [None]:
class Loss():
  def calculate(self,output,y):
    sample_loss = self.forward(output,y)
    data_loss = np.mean(sample_loss)
    return data_loss

class LossCategoricalCrossEntropy(Loss):
  def forward(self,y_pred,y_real):
    samples = len(y_pred)
    y_pred_clipped = np.clip(y_pred,1e-7,1-1e-7)

    if len(y_real.shape) == 1:
      correct_confidence = y_pred_clipped[
                                        range(samples),
                                        y_real]
    elif len(y_real.shape) == 2:
      correct_confidence = np.sum(y_pred_clipped*y_real,
                                  axis=1)

    negative_log_likehoods = np.log(correct_confidence)

    return negative_log_likehoods

  def backward(self,dvalues,y_true):
    """
    dvalues : valores prediccion
    y_true : valores reales
    """
    samples = len(dvalues)
    labels = len(dvalues[0])

    #One-hot encoding
    if len(y_true.shape) == 1:
      y_true = np.eye(labels)[y_true]

    #calculando el gradiente
    self.dinputs = -y_true/dvalues
    #normalizar el gradiente
    self.dinputs = self.dinputs / samples

    

In [None]:
#Implementando la derivada simplificada
class ActivationSoftmaxLossCategoricalCrossEntropy():

  def __init__(self):
    self.activation = ActivationSoftmax()
    self.loss = LossCategoricalCrossEntropy()

  def forward(self,inputs,y_true):
    self.activation.forward(inputs)
    self.output = self.activation.output
    return self.loss.calculate(self.output,y_true)

  def backward(self,dvalues,y_true):

    samples = len(dvalues)

    #quitando one-hot encoding del label real
    if len(y_true.shape) == 2:
      y_true = np.argmax(y_true,axis=1)#Obtengo el indice del valor maximo de cada columna
    

    self.dinputs = dvalues.copy()
    #calculo gradiente
    self.dinputs[range(samples), y_true] -= 1
    #Normalizacion del gradiente
    self.dinputs = self.dinputs / samples

In [None]:
class OptimizerSGD():
  def __init__(self,learning_rate=1.0,decay=0., momentum = 0.):
    self.learning_rate = learning_rate
    self.current_learning_rate = learning_rate
    self.decay = decay
    self.iterations = 0
    self.momentum = momentum

  def pre_update_params(self):
    if self.decay:
      self.current_learning_rate = self.learning_rate * (1.0 / (1.0 + self.decay * self.iterations))

  def update_params(self,layer):
    if self.momentum:
      #inicializacion de la matriz de momentos
      if not hasattr(layer, 'weight_momentums'):
        layer.weight_momentums = np.zeros_like(layer.weights)
        layer.bias_momentums = np.zeros_like(layer.biases)
        
      weight_updates = self.momentum * layer.weight_momentums - self.current_learning_rate * layer.dweights
      layer.weight_momentums = weight_updates 

      bias_updates = self.momentum * layer.bias_momentums - self.current_learning_rate * layer.dbiases
      layer.bias_momentums = bias_updates
    
    else:
      weight_updates = - self.current_learning_rate * layer.dweights
      bias_updates = - self.current_learning_rate * layer.dbiases


    layer.weights += weight_updates
    layer.biases += bias_updates
  
  def post_update_params(self):
    self.iterations += 1

class OptimizerAdaGrad():
  def __init__(self,learning_rate=1.0,decay=0., epsilon = 1e-7):
    self.learning_rate = learning_rate
    self.current_learning_rate = learning_rate
    self.decay = decay
    self.iterations = 0
    self.epsilon = epsilon

  def pre_update_params(self):
    if self.decay:
      self.current_learning_rate = self.learning_rate * (1.0 / (1.0 + self.decay * self.iterations))

  def update_params(self,layer):
    #inicializacion de la matriz de momentos
    if not hasattr(layer, 'weight_cache'):
      layer.weight_cache = np.zeros_like(layer.weights)
      layer.bias_cache = np.zeros_like(layer.biases)
      
    layer.weight_cache += layer.dweights**2

    layer.bias_cache += layer.dbiases**2
    

    layer.weights += -self.current_learning_rate * layer.dweights / (np.sqrt(layer.weight_cache)+self.epsilon)
    layer.biases += -self.current_learning_rate * layer.dbiases / (np.sqrt(layer.bias_cache)+self.epsilon)
  
  def post_update_params(self):
    self.iterations += 1

class OptimizerRMSprop():
  def __init__(self,learning_rate=0.001, decay=0., epsilon=1e-7,rho=0.9):
    self.learning_rate = learning_rate
    self.current_learning_rate = learning_rate
    self.decay = decay
    self.iterations = 0
    self.epsilon = epsilon
    self.rho = rho
  
  def pre_update_params(self):
    if self.decay:
      self.current_learning_rate = self.current_learning_rate * (1. / (1. + self.decay * self.iterations))

  def update_params(self,layer):
    if not hasattr(layer,'weight_cache'):
      layer.weight_cache = np.zeros_like(layer.weights)
      layer.bias_cache = np.zeros_like(layer.biases)

    layer.weight_cache = self.rho * layer.weight_cache + (1 - self.rho) * layer.dweights**2
    layer.bias_cache = self.rho * layer.bias_cache + (1 - self.rho) * layer.dbiases**2

    layer.weights += -self.current_learning_rate * layer.dweights / (np.sqrt(layer.weight_cache) + self.epsilon)
    layer.biases += -self.current_learning_rate * layer.dbiases / (np.sqrt(layer.bias_cache) + self.epsilon)

  def post_update_params(self):
    self.iterations += 1


class OptimizerAdam():
  def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7, beta_1=0.9, beta_2=0.999):
    self.learning_rate = learning_rate
    self.current_learning_rate = learning_rate
    self.decay = decay
    self.iterations = 0
    self.epsilon = epsilon
    self.beta_1 = beta_1
    self.beta_2 = beta_2
  
  def pre_update_params(self):
    if self.decay:
      self.current_learning_rate = self.current_learning_rate * (1. / (1. + self.decay * self.iterations))

  def update_params(self,layer):
    if not hasattr(layer,'weight_cache'):
      layer.weight_momentums = np.zeros_like(layer.weights)
      layer.weight_cache = np.zeros_like(layer.weights)

      layer.bias_momentums = np.zeros_like(layer.biases)
      layer.bias_cache = np.zeros_like(layer.biases)

    layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1) * layer.dweights
    layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1 - self.beta_1) * layer.dbiases
    
    weight_momentums_corrected = layer.weight_momentums / (1 - self.beta_1 ** (self.iterations + 1))
    bias_momentums_corrected = layer.bias_momentums / (1 - self.beta_1 ** (self.iterations + 1))

    layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights**2
    layer.bias_cache = self.beta_2 * layer.bias_cache + (1 - self.beta_2) * layer.dbiases**2

    weight_cache_corrected = layer.weight_cache / (1 - self.beta_2 ** (self.iterations + 1))
    bias_cache_corrected = layer.bias_cache / (1 - self.beta_2 ** (self.iterations + 1))

    layer.weights += -self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)
    layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)

  def post_update_params(self):
    self.iterations += 1



In [None]:
softmax_output = np.array([[0.7,0.1,0.2],
                           [0.1,0.5,0.4],
                           [0.02,0.9,0.08]])

class_targets = np.array([0, 1, 1])

#Esta implementacion resulta ser aproximadamente 7 veces mas rapida
softmax_loss = ActivationSoftmaxLossCategoricalCrossEntropy()
softmax_loss.backward(softmax_output, class_targets)
dvalues1 = softmax_loss.dinputs


activation = ActivationSoftmax()
activation.output = softmax_output
loss = LossCategoricalCrossEntropy()
loss.backward(softmax_output,class_targets)
activation.backward(loss.dinputs)
dvalues2 = activation.dinputs

print("Gradients: combined loss and activation:")
print(dvalues1)
print("Gradients: separate loss and activation:")
print(dvalues2)

Gradients: combined loss and activation:
[[-0.1         0.03333333  0.06666667]
 [ 0.03333333 -0.16666667  0.13333333]
 [ 0.00666667 -0.03333333  0.02666667]]
Gradients: separate loss and activation:
[[-0.09999999  0.03333334  0.06666667]
 [ 0.03333334 -0.16666667  0.13333334]
 [ 0.00666667 -0.03333333  0.02666667]]


In [None]:
X,y = spiral_data(samples=100, classes=3)
dense1 = LayerDense(2,64)
activation1 = ActivationReLU()
dense2 = LayerDense(64,3)
loss_activation = ActivationSoftmaxLossCategoricalCrossEntropy()
# activation2 = ActivationSoftmax()
# loss_function = LossCategoricalCrossEntropy()
#optimizer = OptimizerSGD(decay=1e-3)
#optimizer = OptimizerAdaGrad(decay=1e-4)
#optimizer = OptimizerRMSprop(learning_rate=0.02, decay=1e-4, rho=0.999) 
optimizer = OptimizerAdam(learning_rate=0.05, decay=5e-7)

for epoch in range(10001):
  dense1.forward(X)
  activation1.forward(dense1.output)

  dense2.forward(activation1.output)
  #activation2.forward(dense2.output)

  #loss = loss_function.calculate(activation2.output,y)

  loss = loss_activation.forward(dense2.output,y)

  predictions = np.argmax(loss_activation.output, axis=1)
  if len(y.shape) == 2:
    y = np.argmax(y, axis=1)
  accuracy =  np.mean(predictions==y)

  if not epoch % 100:
    print(f'epoch: {epoch}, acc: {accuracy:.3f}, loss: {loss:.3f}, learning rate {optimizer.current_learning_rate}')


  #Backward pass
  loss_activation.backward(loss_activation.output,y)
  dense2.backward(loss_activation.dinputs)
  activation1.backward(dense2.dinputs)
  dense1.backward(activation1.dinputs)

  optimizer.pre_update_params()
  optimizer.update_params(dense1)
  optimizer.update_params(dense2)
  optimizer.post_update_params()

print(dense1.dweights)
print(dense1.dbiases)
print(dense2.dweights)
print(dense2.dbiases)

epoch: 0, acc: 0.350, loss: -1.099, learning rate 0.05
epoch: 100, acc: 0.570, loss: -0.902, learning rate 0.049876405061425835
epoch: 200, acc: 0.620, loss: -0.864, learning rate 0.0495049832512073
epoch: 300, acc: 0.640, loss: -0.821, learning rate 0.04889128328816377
epoch: 400, acc: 0.637, loss: -0.794, learning rate 0.0480444037704678
epoch: 500, acc: 0.643, loss: -0.780, learning rate 0.04697676874534755
epoch: 600, acc: 0.667, loss: -0.759, learning rate 0.045703824504893936
epoch: 700, acc: 0.660, loss: -0.743, learning rate 0.0442436679478586
epoch: 800, acc: 0.670, loss: -0.720, learning rate 0.042616618997994474
epoch: 900, acc: 0.687, loss: -0.693, learning rate 0.04084475118162425
epoch: 1000, acc: 0.750, loss: -0.630, learning rate 0.03895139548299276
epoch: 1100, acc: 0.780, loss: -0.602, learning rate 0.036960632988575076
epoch: 1200, acc: 0.780, loss: -0.572, learning rate 0.03489679160511355
epoch: 1300, acc: 0.790, loss: -0.547, learning rate 0.03278396132495571
epoc