In [None]:
# %pip install torch torchvision numpy opencv-python albumentations matplotlib

In [9]:
import torch
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image

In [10]:
CLASS_COLORS = {
    (237, 34, 236): 1,  # Advertisement
    (201, 158, 74): 2,  # Field
    (96, 32, 192): 3,   # Football
    (89, 134, 179): 4,  # Goal Bar
    (153, 223, 219): 5, # Goalkeeper 1
    (255, 106, 77): 6,  # Goalkeeper 2
    (22, 100, 252): 7,  # Referee
    (143, 182, 45): 8,  # Spectators
    (38, 198, 129): 9,  # Team 1
    (27, 154, 218): 10, # Team 2
    (0, 0, 0): 0        # Background
}

In [11]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, tolerance=10):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.images = sorted(os.listdir(image_dir))
        self.tolerance = tolerance

        self.transform_image = transforms.Compose([
            transforms.ToTensor(),  # Convierte PIL Image a Tensor
        ])

    def __len__(self):
        return len(self.images)

    def _mask_to_class(mask, class_colors, tolerance=30):
        """
        Convierte una máscara RGB en una matriz de etiquetas de clase, con tolerancia en la comparación de colores.
        
        Args:
            mask: Imagen de la máscara en formato numpy array (H, W, 3).
            class_colors: Diccionario de colores {(R,G,B): clase_id}.
            tolerance: Rango de tolerancia en la comparación de colores.
        
        Returns:
            class_mask: Matriz de clases con dimensiones (H, W).
        """
        mask = np.array(mask, dtype=np.uint8)  # Asegurar que la máscara es uint8
        class_mask = np.zeros(mask.shape[:2], dtype=np.uint8)  # Crear matriz de etiquetas (solo H, W)

        unique_colors = np.unique(mask.reshape(-1, 3), axis=0)
        print(f"📌 Valores únicos en la máscara antes de conversión: {unique_colors}")

        for rgb_value, class_id in class_colors.items():
            # Crear un rango de tolerancia para cada color
            lower_bound = np.clip(np.array(rgb_value) - tolerance, 0, 255)
            upper_bound = np.clip(np.array(rgb_value) + tolerance, 0, 255)

            matches = np.all((mask >= lower_bound) & (mask <= upper_bound), axis=-1)
            class_mask[matches] = class_id  # Asignar la clase correspondiente

        print(f"📌 Valores únicos en la máscara después de conversión: {np.unique(class_mask)}")

        return class_mask

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx].replace('.jpg', '.png'))

        # 📌 Leer la imagen y la máscara en RGB
        image = Image.open(img_path).convert("RGB")  
        mask = Image.open(mask_path).convert("RGB")  

        # 📌 Convertir la máscara a clases usando self._mask_to_class
        mask = self._mask_to_class(mask, CLASS_COLORS, tolerance=self.tolerance)  # ✅ SOLUCIÓN

        # Convertir a tensores de PyTorch
        image = self.transform_image(image)
        mask = torch.tensor(mask, dtype=torch.long)

        return image, mask

In [None]:
image_files = sorted(os.listdir("../dataset/train/images/"))
mask_files = sorted(os.listdir("../dataset/train/masks/"))

for img, mask in zip(image_files, mask_files):
    print(f"Imagen: {img}  -->  Máscara: {mask}")

In [12]:
train_images = "../dataset/train/images/"
train_masks = "../dataset/train/masks/"

train_dataset = SegmentationDataset(train_images, train_masks)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

# Obtener una muestra
sample_img, sample_mask = train_dataset[0]

# Visualizar imagen y máscara
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(sample_img.permute(1, 2, 0))  # Convertir de (C, H, W) a (H, W, C)
ax[0].set_title("Imagen")

ax[1].imshow(sample_mask, cmap="jet")  # Mostrar máscara con colormap
ax[1].set_title("Máscara")

plt.show()

TypeError: SegmentationDataset._mask_to_class() got multiple values for argument 'tolerance'

In [None]:
import torch.nn as nn

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=11):
        super(UNet, self).__init__()
        self.encoder = nn.Sequential(
            DoubleConv(in_channels, 64),
            nn.MaxPool2d(2),
            DoubleConv(64, 128),
            nn.MaxPool2d(2),
        )
        self.middle = DoubleConv(128, 256)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            DoubleConv(128, 128),
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            DoubleConv(64, 64),
        )
        self.final = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        x1 = self.encoder(x)
        x2 = self.middle(x1)
        x3 = self.decoder(x2)
        return self.final(x3)