In [1]:
import torch
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from tqdm import tqdm
import os

Define las transformaciones para las imágenes:

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),  # Redimensionar imágenes al tamaño compatible con ImageNet
        transforms.RandomHorizontalFlip(),  # Aumentación de datos
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalización para ImageNet
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = './Datasets/food-101/images'
datasets = {
    'train': datasets.ImageFolder(root=f"{data_dir}/train", transform=data_transforms['train']),
    'test': datasets.ImageFolder(root=f"{data_dir}/test", transform=data_transforms['test'])
}

dataloaders = {
    'train': DataLoader(datasets['train'], batch_size=32, shuffle=True, num_workers=os.cpu_count()),
    'test': DataLoader(datasets['test'], batch_size=32, shuffle=False, num_workers=os.cpu_count())
}

class_names = datasets['train'].classes
print(class_names)

Cargar un modelo preentrenado

Usa un modelo como ResNet-50, preentrenado en ImageNet:

In [None]:
model = models.resnet50(pretrained=True)

# Reemplaza la última capa para adaptarla a las clases de Food-101
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))

# Mueve el modelo a GPU si está disponible
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f'Using {"cuda:0" if torch.cuda.is_available() else "cpu"}')

Definir el criterio de pérdida y optimizador

In [None]:
criterion = nn.CrossEntropyLoss()

# Solo ajustaremos los parámetros de las capas nuevas
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

# Usar un scheduler para ajustar dinámicamente la tasa de aprendizaje
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Entrenar el modelo

In [None]:
from tqdm import tqdm

def train_model_verbose_with_accuracy(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25):
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 10)

        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Modo de entrenamiento
            else:
                model.eval()   # Modo de evaluación

            running_loss = 0.0
            running_corrects = 0

            # Barra de progreso para el dataloader
            with tqdm(total=len(dataloaders[phase]), desc=f"{phase} phase") as pbar:
                for inputs, labels in dataloaders[phase]:
                    inputs, labels = inputs.to(device), labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # Estadísticas
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data).item()

                    # Calcula y muestra métricas para el lote actual
                    batch_loss = loss.item()
                    batch_acc = (torch.sum(preds == labels.data).item() / inputs.size(0)) * 100
                    pbar.set_postfix({"Loss": batch_loss, "Accuracy": f"{batch_acc:.2f}%"})
                    pbar.update(1)

            if phase == 'train':
                scheduler.step()

            # Calcula métricas finales por epoch
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset) * 100

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.2f}%")

    return model

Entrena el modelo:

In [6]:
model = train_model_verbose_with_accuracy(model, dataloaders, criterion, optimizer, scheduler, num_epochs=10)

train phase: 100%|██████████| 2368/2368 [19:19<00:00,  2.04it/s, Loss=2.44, Accuracy=50.00%]


train Loss: 1.9587 Acc: 52.76%


test phase: 100%|██████████| 790/790 [03:16<00:00,  4.01it/s, Loss=1.91, Accuracy=50.00%]  


test Loss: 1.6956 Acc: 57.62%
Epoch 5/10
----------


train phase: 100%|██████████| 2368/2368 [19:12<00:00,  2.05it/s, Loss=1.94, Accuracy=66.67%]


train Loss: 1.8831 Acc: 53.96%


test phase: 100%|██████████| 790/790 [03:14<00:00,  4.06it/s, Loss=1.9, Accuracy=50.00%]    


test Loss: 1.6368 Acc: 58.72%
Epoch 6/10
----------


train phase: 100%|██████████| 2368/2368 [19:02<00:00,  2.07it/s, Loss=1.93, Accuracy=50.00%]


train Loss: 1.8372 Acc: 54.63%


test phase: 100%|██████████| 790/790 [03:19<00:00,  3.95it/s, Loss=1.7, Accuracy=50.00%]    


test Loss: 1.6230 Acc: 59.03%
Epoch 7/10
----------


train phase: 100%|██████████| 2368/2368 [18:59<00:00,  2.08it/s, Loss=1.2, Accuracy=66.67%] 


train Loss: 1.7950 Acc: 55.56%


test phase: 100%|██████████| 790/790 [03:19<00:00,  3.96it/s, Loss=2.21, Accuracy=0.00%]    


test Loss: 1.5848 Acc: 59.50%
Epoch 8/10
----------


train phase: 100%|██████████| 2368/2368 [18:54<00:00,  2.09it/s, Loss=1.95, Accuracy=50.00%] 


train Loss: 1.7458 Acc: 56.91%


test phase: 100%|██████████| 790/790 [03:19<00:00,  3.96it/s, Loss=2.14, Accuracy=0.00%]    


test Loss: 1.5815 Acc: 59.86%
Epoch 9/10
----------


train phase: 100%|██████████| 2368/2368 [18:56<00:00,  2.08it/s, Loss=1.59, Accuracy=66.67%] 


train Loss: 1.7355 Acc: 57.02%


test phase: 100%|██████████| 790/790 [03:18<00:00,  3.97it/s, Loss=1.74, Accuracy=50.00%]   


test Loss: 1.5703 Acc: 59.87%
Epoch 10/10
----------


train phase: 100%|██████████| 2368/2368 [18:58<00:00,  2.08it/s, Loss=2.01, Accuracy=66.67%] 


train Loss: 1.7370 Acc: 56.92%


test phase: 100%|██████████| 790/790 [03:18<00:00,  3.97it/s, Loss=1.74, Accuracy=50.00%]  

test Loss: 1.5556 Acc: 60.25%





Guardar y evaluar el modelo

In [7]:
torch.save(model.state_dict(), 'food101_resnet50.pth')

Evalúa el desempeño en el conjunto de prueba:

In [8]:
def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    print(f"Accuracy: {correct / total:.4f}")

evaluate_model(model, dataloaders['test'])

Accuracy: 0.6025
