<a href="https://colab.research.google.com/github/DannielWhatever/some_notebooks/blob/main/Modelo_para_deep_learning%2C_just_NumPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Modelo para deep learning, just NumPy

### Funciones utilitarias  

- debug
- weights_initialization: Para inicializar los pesos (Kaiming He).
- scaler: Para normalizar los datos (basado en fit_transform de sklearn)
- dataset_split: Para divivir los datos en train y test (también basada en una función de sklearn).

In [24]:
import numpy as np
import time

DEBUG = True

def debug(str):
  if DEBUG:
    print(str)

def weights_initialization(out_features, in_features):
  """
  Función para inicializar los pesos de una capa.
  """
  return np.random.randn(out_features, in_features) * np.sqrt(2 / in_features)

def scaler(X):
  """
  Función para normalizar los datos.
  """
  mean = np.mean(X, axis=0)
  std = np.std(X, axis=0)
  return (X - mean) / std

def dataset_split(X, y, test_size=0.2):
  """
  Función para dividir los datos en train y test.
  """
  new_indexes = np.arange(len(X))
  np.random.shuffle(new_indexes)

  X = X[new_indexes]
  y = y[new_indexes]

  idx = actual_test_size = int(len(X) * test_size)

  X_train, X_test = X[:-idx], X[-idx:]
  y_train, y_test = y[:-idx], y[-idx:]

  return X_train, X_test, y_train, y_test


### Clase Model   

Constructor(layers, loss_function)
Métodos:
- forward
- backward
- fit
- zero_grad
- evaluate
- summary



In [31]:
class Model:
  """
  Clase que representa un modelo de red neuronal.
  """
  def __init__(self, layers, loss_function):
    self.layers = layers
    self.loss_function = loss_function
    self.training = False

  def forward(self, X):
    debug("Model.forward")
    for layer in self.layers:
      if hasattr(layer, 'training'):
        layer.training = self.training
      X = layer.forward(X)
    return X

  def backward(self, grad_output):
    debug("Model.backward")
    for layer in reversed(self.layers):
      grad_output = layer.backward(grad_output)
    return grad_output

  def fit(self, X, y, epochs, learning_rate, optimizer_class, print_every=1000):
    debug("Model.fit")
    start_time = time.time()
    self.training = True
    optimizer = optimizer_class(self, learning_rate)
    for epoch in range(epochs):
      debug(f"Epoch {epoch}")
      self.zero_grad()
      output = self.forward(X)
      debug(f"output shape: {output.shape}")
      debug(f"y shape: {y.shape}")
      loss, grad_loss = self.loss_function(output, y)
      self.backward(grad_loss)
      optimizer.step()
      if epoch % print_every == 0:
        print(f"Epoch {epoch}, Loss: {loss}")
    self.training = False
    print(f"Training complete. Time elapsed: {(time.time() - start_time):.2f} seconds")


  def zero_grad(self):
    for layer in self.layers:
      if hasattr(layer, 'zero_grad'):
        layer.zero_grad()

  def evaluate(self, X, y):
    output = self.forward(X)
    loss, _ = self.loss_function(output, y, calculate_grad=False)
    return loss, output

  def summary(self):
    for i, layer in enumerate(self.layers):
      if hasattr(layer, 'weights'):
        print(f"Layer {i + 1}: {layer.__class__.__name__}. Params: {layer.weights.shape}")
      else:
        print(f"Layer {i + 1}: {layer.__class__.__name__}")

### Layers disponibles

- LinearLayer
- ReLU
- Dropout




In [29]:
class LinearLayer:
  """
  Clase que representa una capa lineal en una red neuronal.
  """
  def __init__(self, in_features, out_features, bias=True):
    self.weights = weights_initialization(out_features, in_features)
    self.bias = np.random.randn(out_features) if bias else None
    self.grad_weights = np.zeros_like(self.weights)
    self.grad_bias = np.zeros_like(self.bias) if bias else None
    self.X = None

  def forward(self, X):
    debug(" LinearLayer.forward")
    debug(f"    X shape: {X.shape}")
    debug(f"    weights shape: {self.weights.shape}")
    self.X = X
    output = np.dot(X, self.weights.T)
    if self.bias is not None:
      output += self.bias
    debug(f"    output shape: {output.shape}")
    return output

  def backward(self, grad_output):
    debug(" LinearLayer.backward")
    debug(f"    grad_output shape: {grad_output.shape}")
    debug(f"    self.weights shape: {self.weights.shape}")
    debug(f"    self.X shape: {self.X.shape}")
    self.grad_weights = np.dot(grad_output.T, self.X)
    if self.bias is not None:
        self.grad_bias = np.sum(grad_output, axis=0)
    grad_input = np.dot(grad_output, self.weights)
    return grad_input

  def get_params(self):
    return {'weights': self.weights, 'grad_weights': self.grad_weights, 'bias': self.bias, 'grad_bias': self.grad_bias}

  def get_params(self):
    return {
        'weights': self.weights,
        'grad_weights': self.grad_weights,
        'bias': self.bias,
        'grad_bias': self.grad_bias
        }

  def zero_grad(self):
    if self.grad_weights is not None:
      self.grad_weights = np.zeros_like(self.weights)
    if self.bias is not None and self.grad_bias is not None:
      self.grad_bias = np.zeros_like(self.bias)


class ReLU:
  """
  Clase que representa la función de activación ReLU.
  """
  def __init__(self):
    self.input = None

  def forward(self, X):
    debug(" ReLU.forward")
    debug(f"    X shape: {X.shape}")
    self.input = X
    output = np.maximum(0, X)
    debug(f"    output shape: {output.shape}")
    return output

  def backward(self, grad_output):
    debug(" ReLU.backward")
    relu_grad = self.input > 0  # Derivada de ReLU: 1 si input > 0, 0 en caso contrario
    return grad_output * relu_grad  # Element-wise multiplicación por el gradiente de salida


class Dropout:
  """
  Clase que representa la capa de Dropout.
  """
  def __init__(self, dropout_rate):
    self.dropout_rate = dropout_rate
    self.mask = None
    self.training = True  # Para indicar si estamos en modo entrenamiento o inferencia

  def forward(self, X):
    debug(" Dropout.forward")
    if self.training:
        self.mask = (np.random.rand(*X.shape) > self.dropout_rate) / (1.0 - self.dropout_rate)
        output = X * self.mask
    else:
        output = X
    debug(f"    X shape: {X.shape}")
    debug(f"    output shape: {output.shape}")
    return output

  def backward(self, grad_output):
    debug(" Dropout.backward")
    return grad_output * self.mask  # Aplicar la máscara a los gradientes durante el backpropagation




### Funciones de pérdida

- MSELoss



In [5]:

class MSELoss:
  """
  Clase que representa la función de pérdida MSE.
  """
  def __call__(self, y_pred, y_true, calculate_grad=True):
    debug(" MSELoss")
    debug(f"    Shape y_pred: {y_pred.shape}")
    debug(f"    Shape y_true: {y_true.shape}")
    grad_loss = None
    loss = np.mean((y_pred - y_true) ** 2)
    debug(f"    Loss shape: {loss.shape}")
    if calculate_grad:
      grad_loss = 2 * (y_pred - y_true) / y_true.size
      debug(f"    grad_loss shape: {grad_loss.shape}")
    return loss, grad_loss

### Optimizers  
  
- SimpleOptimizer (Gradient descent)
- SGDOptimizer
- AdamOptimizer



In [33]:

class SimpleOptimizer:
  """
  Clase que representa el optimizador base.
  """
  def __init__(self, model, learning_rate=0.01):
    self.updatable_layers = model.layers.filter(lambda x: hasattr(x, 'get_params'))
    self.learning_rate = learning_rate

  def step(self):
    for i, layer in enumerate(self.updatable_layers):
      if hasattr(layer, 'get_params'):
        params = layer.get_params()
        grad_weights = params['grad_weights']
        grad_bias = params['grad_bias'] if 'grad_bias' in params else None
        layer.weights -= self.learning_rate * grad_weights
        if layer.bias is not None:
          layer.bias -= self.learning_rate * grad_bias


class SGDOptimizer(SimpleOptimizer):
  """
  Clase que representa el optimizador SGD.
  """
  def __init__(self, model, learning_rate=0.01, momentum=0.9):
    super().__init__(model, learning_rate)
    self.momentum = momentum
    self.velocities_weights = []
    self.velocities_bias = []
    for i, layer in enumerate(self.updatable_layers):
      params = layer.get_params()
      self.velocities_weights.append(np.zeros_like(params['weights']))
      self.velocities_bias.append(np.zeros_like(params['bias']) if 'bias' in params else None)

  def calculate_weight(self, layer, velocity_weights):
    params = layer.get_params()
    return self.momentum * velocity_weights + (1 - self.momentum) * params['grad_weights']

  def calculate_bias(self, layer, velocity_bias):
    params = layer.get_params()
    return self.momentum * velocity_bias + (1 - self.momentum) * params['grad_bias']

  def step(self):
    for i, layer in enumerate(self.updatable_layers):
      layer.weights -= self.learning_rate * self.calculate_weight(layer, self.velocities_weights[i])
      if layer.bias is not None:
        layer.bias -= self.learning_rate * self.calculate_bias(layer, self.velocities_bias[i])

class AdamOptimizer(SimpleOptimizer):
  """
  Clase que representa el optimizador Adam.
  """
  def __init__(self, model, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
    super().__init__(model, learning_rate)
    self.beta1 = beta1
    self.beta2 = beta2
    self.epsilon = epsilon
    self.m_weights = []
    self.v_weights = []
    self.m_bias = []
    self.v_bias = []

    for i, layer in enumerate(self.updatable_layers):
      params = layer.get_params()
      self.m_weights.append(np.zeros_like(params['weights']))
      self.v_weights.append(np.zeros_like(params['weights']))
      self.m_bias.append(np.zeros_like(params['bias']) if 'bias' in params else None)
      self.v_bias.append(np.zeros_like(params['bias']) if 'bias' in params else None)

  def step(self):
    layer_idx = 0
    for i, layer in enumerate(self.updatable_layers):
      params = layer.get_params()
      self.m_weights[i] = self.beta1 * self.m_weights[i] + (1 - self.beta1) * params['grad_weights']
      self.v_weights[i] = self.beta2 * self.v_weights[i] + (1 - self.beta2) * params['grad_weights'] ** 2
      self.m_bias[i] = self.beta1 * self.m_bias[i] + (1 - self.beta1) * params['grad_bias']
      self.v_bias[i] = self.beta2 * self.v_bias[i] + (1 - self.beta2) * params['grad_bias'] ** 2
      m_hat_weights = self.m_weights[i] / (1 - self.beta1)
      v_hat_weights = self.v_weights[i] / (1 - self.beta2)
      m_hat_bias = self.m_bias[i] / (1 - self.beta1)
      v_hat_bias = self.v_bias[i] / (1 - self.beta2)
      layer.weights -= self.learning_rate * m_hat_weights / (np.sqrt(v_hat_weights) + self.epsilon)
      if layer.bias is not None:
        layer.bias -= self.learning_rate * m_hat_bias / (np.sqrt(v_hat_bias) + self.epsilon)


## Utilizando el modelo

### Cargando y preparando los datos



In [1]:
# https://archive.ics.uci.edu/dataset/320/student+performance
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [20]:
from ucimlrepo import fetch_ucirepo

DATASET_ID = 320

student_performance = fetch_ucirepo(id=DATASET_ID)

X = student_performance.data.features
y = student_performance.data.targets

# Dividir los datos
X = X[['studytime', 'failures', 'goout', 'absences', 'Medu', 'Fedu', 'Dalc', 'Walc']]
y = y['G3']
X = scaler(X.values)
X_train, X_test, y_train, y_test = dataset_split(X, y, test_size=0.05)

X_train = X_train
y_train = y_train.values.reshape(-1, 1)
X_test = X_test
y_test = y_test.values.reshape(-1, 1)

print(f"X_train. Shape: {str(X_train.shape):<8} Type: {type(X_train)}")
print(f"y_train. Shape: {str(y_train.shape):<8} Type: {type(y_train)}")
print(f"X_test.  Shape: { str(X_test.shape):<8} Type: {type(X_test)}")
print(f"y_test.  Shape: { str(y_test.shape):<8} Type: {type(y_test)}")

X_train. Shape: (617, 8) Type: <class 'numpy.ndarray'>
y_train. Shape: (617, 1) Type: <class 'numpy.ndarray'>
X_test.  Shape: (32, 8)  Type: <class 'numpy.ndarray'>
y_test.  Shape: (32, 1)  Type: <class 'numpy.ndarray'>


### Define, entrena y prueba el modelo


In [32]:
DEBUG = False

in_params = X_train[0].shape[0]
layers = [
    LinearLayer(in_features=in_params, out_features=64), ReLU(),
    Dropout(0.3), LinearLayer(in_features=64, out_features=32), ReLU(),
    Dropout(0.3), LinearLayer(in_features=32, out_features=16), ReLU(),
    Dropout(0.3), LinearLayer(in_features=16, out_features=8), ReLU(),
    Dropout(0.3), LinearLayer(in_features=8, out_features=3), ReLU(),
    LinearLayer(in_features=3, out_features=1)
]

loss_function = MSELoss()

model = Model(layers=layers, loss_function=loss_function)

model.summary()
print()

loss, output = model.evaluate(X_train, y_train)
print(f"\nRendimiento inicial: {loss}")

myoptimizer = SGDOptimizer
model.fit(X_train, y_train, epochs=200000, learning_rate=5*10e-3, optimizer_class=myoptimizer, print_every=10000)

loss, output = model.evaluate(X_test, y_test)
print(f"\nRendimiento final: {loss}")



Layer 1: LinearLayer. Params: (64, 8)
Layer 2: ReLU
Layer 3: Dropout
Layer 4: LinearLayer. Params: (32, 64)
Layer 5: ReLU
Layer 6: Dropout
Layer 7: LinearLayer. Params: (16, 32)
Layer 8: ReLU
Layer 9: Dropout
Layer 10: LinearLayer. Params: (8, 16)
Layer 11: ReLU
Layer 12: Dropout
Layer 13: LinearLayer. Params: (3, 8)
Layer 14: ReLU
Layer 15: LinearLayer. Params: (1, 3)


Rendimiento inicial: 146.48474303134142
Epoch 0, Loss: 143.42244469314537
Epoch 10000, Loss: 6.7151308591265
Epoch 20000, Loss: 5.186779544065626
Epoch 30000, Loss: 4.481272698202704
Epoch 40000, Loss: 4.490842298020236
Epoch 50000, Loss: 3.9469239239994307
Epoch 60000, Loss: 4.756128021684299
Epoch 70000, Loss: 3.618981372661058
Epoch 80000, Loss: 3.6012584889415624
Epoch 90000, Loss: 4.0429252666439925
Epoch 100000, Loss: 3.7961230273755384
Epoch 110000, Loss: 4.115529356709682
Epoch 120000, Loss: 3.8188331746398574
Epoch 130000, Loss: 3.6289526040161304
Epoch 140000, Loss: 3.3146621807052807
Epoch 150000, Loss: 3.44