In [1]:
import os
from PIL import Image

import torch
import torch.nn as nn

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from tqdm import tqdm

In [2]:
# Fully Convolutional Network Architecture according to the paper
class FullyConvolutionalNetwork(nn.Module):
    def __init__(self, num_classes):
        super(FullyConvolutionalNetwork, self).__init__()
        
        # First convolutional layer with kernel size 12x12, stride of 4, and 64 output channels
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=12, stride=4, padding=1)
        
        # Second convolutional layer with kernel size 4x4, stride of 1 (default), and 112 output channels
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=112, kernel_size=4, padding=1)
        
        # Third convolutional layer with kernel size 3x3, stride of 1 (default), and 80 output channels
        self.conv3 = nn.Conv2d(in_channels=112, out_channels=80, kernel_size=3, padding=1)
        
        # Deconvolutional layer to upsample the feature map to 16x16
        self.deconv = nn.ConvTranspose2d(in_channels=80, out_channels=num_classes, kernel_size=4, stride=2, padding=1)

    def forward(self, x):
        # Apply first convolutional layer and activation function
        x = nn.functional.relu(self.conv1(x))
        
        # Apply second convolutional layer and activation function
        x = nn.functional.relu(self.conv2(x))
        
        # Apply third convolutional layer and activation function
        x = nn.functional.relu(self.conv3(x))
        
        # Upsample the feature map to 16x16 using the deconvolutional layer
        x = self.deconv(x)
        return x


In [3]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize images to 256x256
    transforms.ToTensor(),  # Convert images to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize images
])

In [4]:
class CustomDataset():
    def __init__(self, data_dir, label_dir, transform_image, transform_mask):
        
        # Asignar a las variables de la instancia los valores de los parámetros
        self.image_dir = data_dir
        self.mask_dir = label_dir
        self.transform_image = transform_image
        self.transform_mask = transform_mask
        # Listar todos los archivos en el directorio de imágenes. Se asume que cada imagen tiene su correspondiente máscara.
        self.images = os.listdir(data_dir)

    def __len__(self):
        # Devuelve el número total de imágenes en el conjunto de datos
        return len(self.images)

    def __getitem__(self, idx):
        # Construir las rutas completas para la imagen y la máscara usando el índice proporcionado
        img_name = os.path.join(self.image_dir, self.images[idx])
        mask_name = os.path.join(self.mask_dir, self.images[idx])
        
        # Abrir la imagen y convertirla a color RGB
        image = Image.open(img_name).convert("RGB")
        # Abrir la máscara y convertirla a escala de grises para que tenga un solo canal
        mask = Image.open(mask_name).convert("L")  

        # Aplicar las transformaciones especificadas a la imagen y a la máscara, si se han proporcionado
        if self.transform_image:
            image = self.transform_image(image)
        if self.transform_mask:
            mask = self.transform_mask(mask)

        # Devolver la imagen y la máscara procesadas
        return image, mask

In [5]:
# from custom_dataset import CustomDataset  # You need to create a CustomDataset class if ImageFolder doesn't work

current_directory = os.getcwd()
data_dir = os.path.join(current_directory, 'MBDS/png/train')
label_dir = os.path.join(current_directory, 'MBDS/png/train_labels')

dataset = CustomDataset(data_dir, label_dir, transform, transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FullyConvolutionalNetwork(num_classes=10).to(device)  # Adjust num_classes accordingly
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    train_tqdm = tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs} Training', leave=True)
    for image, mask in train_tqdm:
        image, mask = image.to(device), mask.to(device)
        
        # Forward pass
        outputs = model(image)
        loss = criterion(outputs, mask)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch 1/5 Training:   0%|          | 0/5 [00:00<?, ?it/s]


RuntimeError: output with shape [1, 64, 64] doesn't match the broadcast shape [3, 64, 64]