# TensorBoard and Model Checkpointing Tutorial

## Objetivo
Este notebook demuestra cómo usar TensorBoard para monitorear métricas y cómo implementar checkpointing durante el entrenamiento de modelos de machine learning.

In [None]:
# Instalación de librerías requeridas
!pip install torch torchvision tensorboard

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import os

In [None]:
# Definir un modelo de ejemplo (Red neuronal simple)
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear_stack = nn.Sequential(
            nn.Linear(10, 20),
            nn.ReLU(),
            nn.Linear(20, 5)
        )
    
    def forward(self, x):
        return self.linear_stack(x)

In [None]:
# Configuración de TensorBoard
writer = SummaryWriter('runs/experiment_1')

# Crear directorios para checkpoints
os.makedirs('checkpoints', exist_ok=True)

In [None]:
# Función de entrenamiento con TensorBoard y Checkpointing
def train_model(model, epochs=100):
    # Configurar pérdida y optimizador
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # Simular datos de entrenamiento
    for epoch in range(epochs):
        # Generar datos de ejemplo
        inputs = torch.randn(32, 10)
        targets = torch.randn(32, 5)
        
        # Paso de entrenamiento
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        # Registrar métricas en TensorBoard
        writer.add_scalar('Training Loss', loss.item(), epoch)
        writer.add_histogram('Model Weights', model.linear_stack[0].weight, epoch)
        
        # Guardar checkpoint cada 10 épocas
        if epoch % 10 == 0:
            checkpoint = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss.item()
            }
            torch.save(checkpoint, f'checkpoints/model_checkpoint_epoch_{epoch}.pth')
    
    # Cerrar el escritor de TensorBoard
    writer.close()

In [None]:
# Inicializar y entrenar el modelo
model = SimpleModel()
train_model(model)

In [None]:
# Cargar un checkpoint
def load_checkpoint(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model = SimpleModel()
    model.load_state_dict(checkpoint['model_state_dict'])
    
    optimizer = optim.Adam(model.parameters())
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    print(f"Loaded checkpoint from epoch {checkpoint['epoch']}")
    print(f"Last training loss: {checkpoint['loss']}")
    
    return model, optimizer

In [None]:
# Ejemplo de cómo iniciar TensorBoard
print("Para iniciar TensorBoard, ejecuta en la terminal:")
print("tensorboard --logdir=runs")

## Pasos para usar TensorBoard

1. Instalar TensorBoard: `pip install tensorboard`
2. Ejecutar en terminal: `tensorboard --logdir=runs`
3. Abrir el navegador en: `http://localhost:6006`

## Beneficios de Checkpointing
- Guardar progreso de entrenamiento
- Reanudar entrenamiento desde un punto específico
- Proteger contra fallos o interrupciones