# Hoja de Trabajo 2 Deep Learning

Hoja de trabajo 2 – Deep Learning

Edwin Ortega 22305 - Esteban Zambrano 22119

Link del repositorio:<br>
https://github.com/EstebanZG999/HDT2_DL

### Task 1 Preparación del conjunto de datos

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Cargar dataset
iris = load_iris()
X, y = iris.data, iris.target

# Normalizar
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividir train/val
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Convertir a tensores
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

print("Train shape:", X_train.shape, y_train.shape)
print("Validation shape:", X_val.shape, y_val.shape)


Train shape: torch.Size([120, 4]) torch.Size([120])
Validation shape: torch.Size([30, 4]) torch.Size([30])


### Task 2 Arquitectura modelo

In [2]:
import torch
import torch.nn as nn

def get_activation(name: str) -> nn.Module:
    """Devuelve una activación por nombre."""
    name = name.lower()
    if name == "relu":
        return nn.ReLU()
    if name == "tanh":
        return nn.Tanh()
    if name == "leakyrelu":
        return nn.LeakyReLU()
    if name == "gelu":
        return nn.GELU()
    raise ValueError(f"Activación no soportada: {name}")

class MLPClassifier(nn.Module):
    """
    MLP simple para clasificación en Iris:
    - Arquitectura parametrizable: hidden_layers, activación, dropout.
    - Última capa produce logits (sin softmax); compat. con CrossEntropyLoss.
    """
    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        hidden_layers=(16, 16),
        activation="relu",
        dropout_p=0.0,
        use_batchnorm=False,
    ):
        super().__init__()
        act = get_activation(activation)

        layers = []
        prev = input_dim
        for h in hidden_layers:
            layers.append(nn.Linear(prev, h))
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(h))
            layers.append(act)
            if dropout_p and dropout_p > 0:
                layers.append(nn.Dropout(p=dropout_p))
            prev = h

        # Capa de salida (logits)
        layers.append(nn.Linear(prev, output_dim))

        self.net = nn.Sequential(*layers)

        # Inicialización razonable (Kaiming para ReLU/LeakyReLU, Xavier para otras)
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if activation.lower() in ["relu", "leakyrelu"]:
                    nn.init.kaiming_uniform_(m.weight, nonlinearity="relu")
                else:
                    nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.net(x)

# Instancia recomendada para Iris
input_dim = 4   # iris.features
output_dim = 3  # 3 clases
model = MLPClassifier(
    input_dim=input_dim,
    output_dim=output_dim,
    hidden_layers=(32, 16),   
    activation="relu",        # "relu", "tanh", "gelu", "leakyrelu"
    dropout_p=0.0,
    use_batchnorm=False
)

print(model)
sum_params = sum(p.numel() for p in model.parameters())
print(f"Parámetros totales: {sum_params}")


MLPClassifier(
  (net): Sequential(
    (0): Linear(in_features=4, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=3, bias=True)
  )
)
Parámetros totales: 739


### Task 3 Funciones de Pérdida

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from copy import deepcopy
import math
import random

# --- Utilidades reproducibilidad ---
def set_seed(seed=42):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(42)

# --- DataLoaders ---
train_ds = TensorDataset(X_train, y_train)
val_ds   = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False)

# --- Mapeo de pérdidas y transformaciones necesarias ---
class LossAdapter:
    """
    Adapta salida (logits) y targets para cada pérdida:
    - crossentropy: usa logits y targets long (clase)
    - mse: usa softmax(logits) vs one-hot (float)
    - kldiv: usa log_softmax(logits) vs one-hot (float)
    """
    def __init__(self, loss_name: str, num_classes: int):
        self.loss_name = loss_name.lower()
        self.num_classes = num_classes

        if self.loss_name == "crossentropy":
            self.criterion = nn.CrossEntropyLoss()
        elif self.loss_name == "mse":
            self.criterion = nn.MSELoss()
        elif self.loss_name in ("kldiv", "kl", "kl_div"):
            self.criterion = nn.KLDivLoss(reduction="batchmean")
        else:
            raise ValueError(f"Pérdida no soportada: {loss_name}")

    def prepare(self, logits, targets):
        """
        Devuelve (preds_para_loss, targets_para_loss) según la pérdida.
        """
        if self.loss_name == "crossentropy":
            # logits (N,C), targets long (N,)
            return logits, targets

        elif self.loss_name == "mse":
            probs = F.softmax(logits, dim=1)             
            onehot = F.one_hot(targets, self.num_classes).float() 
            return probs, onehot

        elif self.loss_name in ("kldiv", "kl", "kl_div"):
            log_probs = F.log_softmax(logits, dim=1)   
            onehot = F.one_hot(targets, self.num_classes).float()
            return log_probs, onehot

    def __call__(self, logits, targets):
        a, b = self.prepare(logits, targets)
        return self.criterion(a, b)

# --- Métrica de accuracy estándar ---
@torch.no_grad()
def accuracy_from_logits(logits, targets):
    preds = torch.argmax(logits, dim=1)
    return (preds == targets).float().mean().item()

# --- Ciclos de entrenamiento y evaluación ---
def train_one_epoch(model, loader, optimizer, loss_adapter, device="cpu"):
    model.train()
    running_loss, running_acc, n = 0.0, 0.0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = loss_adapter(logits, yb)
        loss.backward()
        optimizer.step()

        bs = yb.size(0)
        running_loss += loss.item() * bs
        running_acc  += accuracy_from_logits(logits, yb) * bs
        n += bs
    return running_loss / n, running_acc / n

@torch.no_grad()
def evaluate(model, loader, loss_adapter, device="cpu"):
    model.eval()
    total_loss, total_acc, n = 0.0, 0.0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = loss_adapter(logits, yb)
        bs = yb.size(0)
        total_loss += loss.item() * bs
        total_acc  += accuracy_from_logits(logits, yb) * bs
        n += bs
    return total_loss / n, total_acc / n

# --- Entrenamiento para un experimento con una pérdida dada ---
def fit_with_loss(
    base_model,
    loss_name="crossentropy",
    lr=1e-2,
    epochs=100,
    device="cpu"
):
    model = deepcopy(base_model).to(device)
    loss_adapter = LossAdapter(loss_name, num_classes=3)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

    for ep in range(1, epochs+1):
        tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, loss_adapter, device)
        va_loss, va_acc = evaluate(model, val_loader, loss_adapter, device)
        history["train_loss"].append(tr_loss)
        history["val_loss"].append(va_loss)
        history["train_acc"].append(tr_acc)
        history["val_acc"].append(va_acc)

        if ep % 20 == 0 or ep == 1:
            print(f"[{loss_name:>11}] Epoch {ep:3d} | "
                  f"train_loss={tr_loss:.4f} val_loss={va_loss:.4f} | "
                  f"train_acc={tr_acc:.3f} val_acc={va_acc:.3f}")

    final = {
        "loss": loss_name,
        "train_loss": history["train_loss"][-1],
        "val_loss": history["val_loss"][-1],
        "train_acc": history["train_acc"][-1],
        "val_acc": history["val_acc"][-1],
        "history": history,
        "model": model, 
    }
    return final

# --- Ejecutar los tres experimentos solicitados ---
losses_to_try = ["crossentropy", "mse", "kldiv"]  # CE + MSE + KLDiv (3 pérdidas)
results = []
for lname in losses_to_try:
    res = fit_with_loss(model, loss_name=lname, lr=1e-2, epochs=120, device="cpu")
    results.append(res)

# --- Resumen ---
print("\n=== Resumen Task 3 ===")
for r in results:
    print(f"{r['loss']:>11} -> "
          f"train_loss={r['train_loss']:.4f} | val_loss={r['val_loss']:.4f} | "
          f"train_acc={r['train_acc']:.3f} | val_acc={r['val_acc']:.3f}")


[crossentropy] Epoch   1 | train_loss=1.1804 val_loss=0.6215 | train_acc=0.417 val_acc=0.767
[crossentropy] Epoch  20 | train_loss=0.1040 val_loss=0.1667 | train_acc=0.975 val_acc=0.933
[crossentropy] Epoch  40 | train_loss=0.0540 val_loss=0.1091 | train_acc=0.975 val_acc=0.967
[crossentropy] Epoch  60 | train_loss=0.0515 val_loss=0.1084 | train_acc=0.975 val_acc=0.967
[crossentropy] Epoch  80 | train_loss=0.0281 val_loss=0.0966 | train_acc=0.992 val_acc=0.967
[crossentropy] Epoch 100 | train_loss=0.0225 val_loss=0.0960 | train_acc=0.992 val_acc=0.967
[crossentropy] Epoch 120 | train_loss=0.0185 val_loss=0.0949 | train_acc=0.992 val_acc=0.967
[        mse] Epoch   1 | train_loss=0.3081 val_loss=0.2805 | train_acc=0.175 val_acc=0.167
[        mse] Epoch  20 | train_loss=0.0450 val_loss=0.0620 | train_acc=0.917 val_acc=0.867
[        mse] Epoch  40 | train_loss=0.0290 val_loss=0.0468 | train_acc=0.958 val_acc=0.900
[        mse] Epoch  60 | train_loss=0.0213 val_loss=0.0375 | train_acc=0