In [1]:
# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F # FFFFF

# Data loading
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

# Auxiliary functions
from torch.utils.tensorboard import SummaryWriter  # Used for Tensorboard logging
import os
import numpy as np
import matplotlib.pyplot as plt
from math import floor, ceil
import datetime

# Segmentación de imagen mediante arquitectura U-Net

In [3]:
PATH_ROOT = os.path.join('.')
# Ruta para datos:
PATH_DATA = os.path.join(PATH_ROOT, 'data')
# Ruta para modelos:
PATH_MODELS = os.path.join(PATH_ROOT, 'reports', 'models')
# Ruta para resultados:
PATH_RESULTS = os.path.join(PATH_ROOT, 'reports', 'results')
# Ruta para ejecuciones:
PATH_RUNS = os.path.join(PATH_ROOT, 'reports', 'runs')

In [4]:
# Para cada sesión creamos un directorio nuevo, a partir de la fecha y hora de su ejecución:
date = datetime.datetime.now()
test_name = str(date.year) + '_' + str(date.month) + '_' +  str(date.day) + '__' + str(date.hour) + '_' + str(date.minute)
print('Nombre del directorio de pruebas: {}'.format(test_name))
models_folder = os.path.join(PATH_MODELS, test_name)
try:
    os.makedirs(models_folder)
except:
    print(f'Folder {models_folder} already existed.')
results_folder = os.path.join(PATH_RESULTS, test_name)
try:
    os.makedirs(results_folder)
except:
    print(f'Folder {results_folder} already existed.')
runs_folder = os.path.join(PATH_RUNS, test_name)
try:
    os.makedirs(runs_folder)
except:
    print(f'Folder {runs_folder} already existed.')


Nombre del directorio de pruebas: 2025_11_12__21_25


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Definición del modelo

Como vamos a repetir la estructura de Convolución, Convolución, pooling, hacemos una clase para ello

In [None]:
class dobleConvolucionMaxPool(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.convolucion_1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)  
        self.convolucion_2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True) #los índices indican posiciones de los máximos antes del pooling, para poder hacer el unpooling después.
    
    def forward(self, x):
        x = torch.nn.functional.relu(self.convolucion_1(x))
        x = torch.nn.functional.relu(self.convolucion_2(x))
        skip_connection = x
        x, indices = self.pool(x)
        return x, indices, skip_connection

Lo mismo para hacer las deconvoluciones y el unpooling

In [None]:
class dobleDeconvolucionMaxUnpool(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.convolucion_1 = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=3, padding=1)  
        self.convolucion_2 = nn.ConvTranspose2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2)
    
    def forward(self, x, indices, skip_connection):
        x = torch.nn.functional.relu(torch.cat([self.convolucion_1(x), skip_connection], dim=1))
        x = torch.nn.functional.relu(self.convolucion_2(x))
        x = self.unpool(x, indices, output_size=skip_connection.shape)
        return x

In [None]:
def calcular_num_filtros(num_base_filtros, num_niveles):
    filtros = []
    for i in range(num_niveles):
        filtros.append(num_base_filtros * (2**i)) # porque en cada nivel quermos duplicar el num de filtros
    return filtros

In [9]:
class MLP(nn.Module):
    def __init__(self, in_dim, hidden_sizes=[10, 10], bias=True):
        super().__init__()
        self.capa_1 = nn.Linear(in_dim, hidden_sizes[0], bias=bias, device=device)
        self.capa_2 = nn.Linear(hidden_sizes[0], hidden_sizes[1], bias=bias, device=device)

    def forward(self, x):
        x = torch.nn.functional.relu(self.capa_1(x))
        x = self.capa_2(x)
        return x

In [None]:
class UnetModel(nn.Module):

    def __init__(self, num_base_filtros = 64, num_niveles = 5, num_clases = 10):
        super().__init__()
        self.encoders = nn.ModuleList()
        self.decoders = nn.ModuleList()
        self.filtros = calcular_num_filtros(num_base_filtros, num_niveles)  # [64, 128, 256, 512, 1024] por defecto. Podemos aumentar el número de niveles para comprobar rendimiento
        for i in range(num_niveles):
            self.encoders.append(dobleConvolucionMaxPool(in_channels=self.filtros[i], out_channels=self.filtros[i+1]))
            self.decoders.append(dobleDeconvolucionMaxUnpool(in_channels=self.filtros[num_niveles-i], out_channels=self.filtros[num_niveles-1-i])) # Queremos ir al revés
        self.cuello_botella = MLP(self.filtros[-1], [4096, 4096])
        self.convolucion_final = nn.Conv2d(self.filtros[0], num_clases, kernel_size=1)
    
    def forward(self, X):
        indices = []
        skip_connections = []
        for encoder in self.encoders:
            X, indice, skip_connection = encoder(X)
            indices.append(indice)
            skip_connections.append(skip_connection)
        X_shape = X.shape
        X = X.flatten(1)
        X = self.cuello_botella(X)
        X = X.reshape(X_shape)
        for i, decoder in enumerate(self.decoders):
            X = decoder(X, indices[len(indices)-1-i], skip_connections[len(skip_connections)-1-i].shape)
        X = self.convolucion_final(X)
        return X