In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import numpy as np

print("PyTorch version:", torch.__version__)
print("GPU available:", torch.cuda.is_available())

# Cargar etiquetas
labels_df = pd.read_csv('labels/labels.csv')

# Codificar etiquetas
label_encoder = LabelEncoder()
labels_df['label'] = label_encoder.fit_transform(labels_df['label'])

# Directorio base de imágenes
base_dir = 'images/'
labels_df['image_path'] = labels_df['image_name'].apply(lambda x: os.path.join(base_dir, x))

# Dividir el dataset en entrenamiento y prueba
train_df, test_df = train_test_split(labels_df, test_size=0.2, random_state=42)

# Función para calcular la media y desviación estándar
class CustomImageDatasetForStats(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

stats_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()
])

stats_dataset = CustomImageDatasetForStats(train_df, transform=stats_transform)
stats_loader = DataLoader(stats_dataset, batch_size=32, shuffle=False, num_workers=4)

def get_mean_and_std(loader):
    mean = 0.
    std = 0.
    total_images_count = 0
    for images in loader:
        batch_samples = images.size(0)  # número de imágenes en el lote
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total_images_count += batch_samples
    
    mean /= total_images_count
    std /= total_images_count
    return mean, std

mean, std = get_mean_and_std(stats_loader)
print(f"Mean: {mean}")
print(f"Std: {std}")

# Transformaciones para los datos de entrenamiento y prueba
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean.tolist(), std.tolist())
])

test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean.tolist(), std.tolist())
])

class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        image = Image.open(img_name).convert("RGB")
        label = self.df.iloc[idx]['label']
        label = torch.tensor(label, dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = CustomImageDataset(train_df, transform=train_transform)
test_dataset = CustomImageDataset(test_df, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Cargar modelo ResNet-50 preentrenado
model = models.resnet50(pretrained=True)

# Congelar las capas convolucionales
for param in model.parameters():
    param.requires_grad = False

# Modificar la última capa para que coincida con el número de clases de nuestro problema
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(label_encoder.classes_))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3, weight_decay=1e-5)

# Scheduler para la tasa de aprendizaje
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.1)

train_losses = []
train_accuracy = []
val_accuracy = []

best_acc = 0.0

# Entrenamiento
for epoch in range(50):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_losses.append(running_loss / len(train_loader))
    train_accuracy.append(100 * correct / total)

    # Evaluación
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total
    val_accuracy.append(val_acc)
    val_loss /= len(test_loader)

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Train Accuracy: {100 * correct / total}%, Val Accuracy: {val_acc}%')

    # Ajustar la tasa de aprendizaje
    scheduler.step(val_loss)

    # Guardar el mejor modelo
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')


PyTorch version: 2.1.2+cu121
GPU available: True
Mean: tensor([0.5136, 0.5687, 0.6049])
Std: tensor([0.2106, 0.1811, 0.1898])




KeyboardInterrupt: 