# Reimplementación desde cero de una red densa en PyTorch

**Objetivo:** replicar el ejemplo de Keras "from scratch" usando PyTorch, creando clases mínimas (`NaiveDense`, `NaiveSequential`), un generador de batches, el paso de entrenamiento, el lazo de entrenamiento (`fit`) y la evaluación.

> Nota: Para ser fieles al diseño original, la **última capa aplica `softmax`** y la pérdida se calcula **desde probabilidades** como `-log(p_true)`.


Enlace de archivo original para Tensor-flow: https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/chapter02_mathematical-building-blocks.ipynb

## 1) Imports y utilidades

In [1]:
import math
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms

torch.manual_seed(42)
print('PyTorch version:', torch.__version__)

PyTorch version: 2.8.0+cu126


## 2) `NaiveDense`: capa densa mínima

In [2]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation=None):
        self.activation = activation  # e.g., F.relu, softmax callable
        # Inicialización uniforme (similar a initializer="uniform")
        limit = 0.05
        self.W = torch.nn.Parameter(torch.empty(input_size, output_size).uniform_(-limit, limit))
        self.b = torch.nn.Parameter(torch.zeros(output_size))

    def __call__(self, inputs):
        # inputs: (batch, input_size)
        x = inputs @ self.W
        x = x + self.b
        if self.activation is not None:
            x = self.activation(x)
        return x

    @property
    def weights(self):
        return [self.W, self.b]

## 3) `NaiveSequential`: composición de capas

In [3]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    @property
    def weights(self):
        params = []
        for layer in self.layers:
            params += layer.weights
        return params

## 4) Generador de mini-lotes `BatchGenerator`

In [4]:
class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert images.shape[0] == labels.shape[0]
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

## 5) Pérdida `sparse_categorical_crossentropy` desde **probabilidades**
Como la última capa aplica `softmax`, calculamos la pérdida manualmente como `-log(p_true)`.

In [5]:
def sparse_categorical_crossentropy_from_probs(probs, true_labels):
    # probs: (batch, num_classes) con softmax aplicado
    # true_labels: (batch,) enteros en [0..C-1]
    eps = 1e-12
    gathered = probs[torch.arange(probs.size(0)), true_labels]
    loss = -torch.log(gathered + eps)
    return loss.mean()

## 6) Paso de entrenamiento `one_training_step`
Hace: forward → pérdida → backward → update (SGD).

In [6]:
learning_rate = 1e-3

# El optimizador va sobre todos los pesos del modelo
# (se definirá tras crear el modelo)
optimizer = None

def update_weights():
    optimizer.step()
    optimizer.zero_grad()

def one_training_step(model, images_batch, labels_batch):
    # forward
    predictions = model(images_batch)  # probabilidades (softmax ya aplicado en la última capa)
    loss = sparse_categorical_crossentropy_from_probs(predictions, labels_batch)

    # backward + update
    loss.backward()
    update_weights()
    return loss.item()

## 7) Lazo de entrenamiento `fit`

In [7]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels, batch_size=batch_size)
        batch_generator.index = 0
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

## 8) Cargar MNIST y preparar tensores `(N, 784)` en `[0,1]`

In [8]:
tfms = transforms.ToTensor()  # convierte a float32 en [0,1]
train_ds = datasets.MNIST(root='./data', train=True,  download=True, transform=tfms)
test_ds  = datasets.MNIST(root='./data', train=False, download=True, transform=tfms)

# Construimos tensores grandes (N, 1, 28, 28) y los aplastamos a (N, 784)
train_images = torch.stack([train_ds[i][0] for i in range(len(train_ds))]).view(-1, 28*28)
train_labels = torch.tensor([train_ds[i][1] for i in range(len(train_ds))]).long()
test_images  = torch.stack([test_ds[i][0]  for i in range(len(test_ds))]).view(-1, 28*28)
test_labels  = torch.tensor([test_ds[i][1] for i in range(len(test_ds))]).long()

train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

100%|██████████| 9.91M/9.91M [00:00<00:00, 17.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 485kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.49MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.85MB/s]


(torch.Size([60000, 784]),
 torch.Size([60000]),
 torch.Size([10000, 784]),
 torch.Size([10000]))

## 9) Definir el modelo `NaiveSequential` con `softmax` final y el optimizador SGD

In [9]:
def softmax_activation(x):
    return F.softmax(x, dim=1)

model = NaiveSequential([
    NaiveDense(input_size=28*28, output_size=512, activation=F.relu),
    NaiveDense(input_size=512,   output_size=10,  activation=softmax_activation),
])

assert len(model.weights) == 4  # W1,b1,W2,b2

# Ahora que el modelo existe, instanciamos el optimizador sobre sus pesos
optimizer = torch.optim.SGD(model.weights, lr=learning_rate)
print('Parámetros totales:', sum(p.numel() for p in model.weights))

Parámetros totales: 407050


## 10) Entrenamiento (epochs=10, batch_size=128)

In [10]:
fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 2.32
loss at batch 100: 2.30
loss at batch 200: 2.24
loss at batch 300: 2.21
loss at batch 400: 2.17
Epoch 1
loss at batch 0: 2.14
loss at batch 100: 2.14
loss at batch 200: 2.07
loss at batch 300: 2.04
loss at batch 400: 2.00
Epoch 2
loss at batch 0: 1.96
loss at batch 100: 1.98
loss at batch 200: 1.89
loss at batch 300: 1.86
loss at batch 400: 1.82
Epoch 3
loss at batch 0: 1.77
loss at batch 100: 1.80
loss at batch 200: 1.69
loss at batch 300: 1.67
loss at batch 400: 1.63
Epoch 4
loss at batch 0: 1.57
loss at batch 100: 1.62
loss at batch 200: 1.49
loss at batch 300: 1.48
loss at batch 400: 1.46
Epoch 5
loss at batch 0: 1.38
loss at batch 100: 1.45
loss at batch 200: 1.30
loss at batch 300: 1.31
loss at batch 400: 1.31
Epoch 6
loss at batch 0: 1.22
loss at batch 100: 1.29
loss at batch 200: 1.14
loss at batch 300: 1.16
loss at batch 400: 1.18
Epoch 7
loss at batch 0: 1.08
loss at batch 100: 1.16
loss at batch 200: 1.00
loss at batch 300: 1.04
loss at batch 40

## 11) Evaluación: exactitud en el conjunto de prueba

In [11]:
with torch.no_grad():
    probs = model(test_images)                       # ya con softmax
    predicted = probs.argmax(dim=1)
    matches = (predicted == test_labels)
    accuracy = matches.float().mean().item()
print(f"accuracy: {accuracy:.2f}")

accuracy: 0.83
