Entrenar y evaluar la preformance de una red con arquitectura U-net
que realza una tarea de segmantacion semantica

### Preanalisis de Imagenes

In [1]:
import os
import cv2
import numpy as np
from collections import Counter
from tqdm import tqdm
import matplotlib.pyplot as plt

IMG_DIR = "oxford-pets/oxford-iiit-pet/images/images"
MASK_DIR = "oxford-pets/oxford-iiit-pet/annotations/annotations/trimaps"

img_exts = []
mask_exts = []
img_shapes = []
mask_shapes = []
img_channels = []

# Analizar imágenes
for f in tqdm(os.listdir(IMG_DIR), desc="Analizando imágenes"):
    path = os.path.join(IMG_DIR, f)
    ext = os.path.splitext(f)[1].lower()
    img_exts.append(ext)
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    if img is None:
        continue
    h, w = img.shape[:2]
    c = 1 if len(img.shape) == 2 else img.shape[2]
    img_shapes.append((w, h))
    img_channels.append(c)

# Analizar máscaras
for f in tqdm(os.listdir(MASK_DIR), desc="Analizando máscaras"):
    path = os.path.join(MASK_DIR, f)
    ext = os.path.splitext(f)[1].lower()
    mask_exts.append(ext)
    m = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    if m is None:
        continue
    mask_shapes.append(m.shape[:2])

def resumen(lista, name):
    c = Counter(lista)
    print(f"\n{name} únicos:")
    for k, v in c.items():
        print(f"  {k}: {v}")

print("ANÁLISIS DE IMÁGENES")
print(f"Total de imágenes: {len(img_shapes)}")
resumen(img_exts, "Extensiones")
resumen(img_channels, "Canales")

w, h = zip(*img_shapes)
print(f"Ancho promedio: {np.mean(w):.1f}, Alto promedio: {np.mean(h):.1f}")
print(f"Dimensiones mínimas: ({min(w)}, {min(h)}) | máximas: ({max(w)}, {max(h)})")

print("\nANÁLISIS DE MÁSCARAS")
print(f"Total de máscaras: {len(mask_shapes)}")
resumen(mask_exts, "Extensiones")

mw, mh = zip(*[(s[1], s[0]) for s in mask_shapes])
print(f"Ancho promedio: {np.mean(mw):.1f}, Alto promedio: {np.mean(mh):.1f}")
print(f"Dimensiones mínimas: ({min(mw)}, {min(mh)}) | máximas: ({max(mw)}, {max(mh)})")


Analizando imágenes: 100%|██████████| 7393/7393 [00:25<00:00, 284.49it/s]
Analizando máscaras: 100%|██████████| 14780/14780 [00:49<00:00, 298.74it/s] 

ANÁLISIS DE IMÁGENES
Total de imágenes: 7390

Extensiones únicos:
  .jpg: 7390
  .mat: 3

Canales únicos:
  3: 7383
  4: 4
  1: 3
Ancho promedio: 436.7, Alto promedio: 390.9
Dimensiones mínimas: (114, 103) | máximas: (3264, 2606)

ANÁLISIS DE MÁSCARAS
Total de máscaras: 7390

Extensiones únicos:
  .png: 14780
Ancho promedio: 436.7, Alto promedio: 390.9
Dimensiones mínimas: (114, 103) | máximas: (3264, 2606)





In [None]:
for i in range(len(img_channels)):
    if img_channels == 4:
        plt.imshow()

### Red U-NET

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import numpy as np
import os
import random

# Parámetros
IMG_SIZE = 128
BATCH_SIZE = 16
NUM_CLASSES = 3
EPOCHS = 100
VAL_SPLIT = 0.15
TEST_SPLIT = 0.15

IMAGES_DIR = r"C:\Users\anapa\Documents\DeepLearning\Practica_3\oxford-pets\oxford-iiit-pet\images\images"
MASKS_DIR  = r"C:\Users\anapa\Documents\DeepLearning\Practica_3\oxford-pets\oxford-iiit-pet\annotations\annotations\trimaps"

# Cargar y emparejar archivos
all_images = sorted([
    os.path.join(IMAGES_DIR, f)
    for f in os.listdir(IMAGES_DIR)
    if f.endswith(".jpg") and not f.endswith(".mat")
])

all_masks = sorted([
    os.path.join(MASKS_DIR, f)
    for f in os.listdir(MASKS_DIR)
    if f.endswith(".png") and not f.startswith("._")
])

# Asegurarnos de que tengan la misma longitud
assert len(all_images) == len(all_masks), "Cantidad de imágenes y máscaras no coincide"

# Dividir en train / val / test
data = list(zip(all_images, all_masks))
random.shuffle(data)
n_total = len(data)
n_test = int(TEST_SPLIT * n_total)
n_val = int(VAL_SPLIT * n_total)
n_train = n_total - n_val - n_test

train_data = data[:n_train]
val_data = data[n_train:n_train+n_val]
test_data = data[n_train+n_val:]

# Función para cargar imagen y máscara
def load_image_mask(image_path, mask_path):
    # Imagen
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.image.convert_image_dtype(image, tf.float32)

    # Máscara
    mask = tf.io.read_file(mask_path)
    mask = tf.io.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, [IMG_SIZE, IMG_SIZE], method='nearest')
    mask = tf.cast(mask, tf.int32) - 1  # de (1,2,3) -> (0,1,2)
    return image, mask

def create_tf_dataset(data_list, batch_size=BATCH_SIZE):
    image_paths, mask_paths = zip(*data_list)
    dataset = tf.data.Dataset.from_tensor_slices((list(image_paths), list(mask_paths)))
    dataset = dataset.map(load_image_mask, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Crear datasets
train_dataset = create_tf_dataset(train_data)
val_dataset   = create_tf_dataset(val_data)
test_dataset  = create_tf_dataset(test_data)

# Bloque convolucional
def conv_block(inputs, filters):
    x = layers.Conv2D(filters, 3, padding='same', activation='relu')(inputs)
    x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
    return x

# Modelo U-Net
def unet_model(input_size=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES):
    inputs = layers.Input(input_size)

    # Encoder
    c1 = conv_block(inputs, 64)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = conv_block(p1, 128)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = conv_block(p2, 256)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = conv_block(p3, 512)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    # Bottleneck
    c5 = conv_block(p4, 1024)

    # Decoder
    u6 = layers.Conv2DTranspose(512, 2, strides=2, padding='same')(c5)
    u6 = layers.concatenate([u6, c4])
    c6 = conv_block(u6, 512)

    u7 = layers.Conv2DTranspose(256, 2, strides=2, padding='same')(c6)
    u7 = layers.concatenate([u7, c3])
    c7 = conv_block(u7, 256)

    u8 = layers.Conv2DTranspose(128, 2, strides=2, padding='same')(c7)
    u8 = layers.concatenate([u8, c2])
    c8 = conv_block(u8, 128)

    u9 = layers.Conv2DTranspose(64, 2, strides=2, padding='same')(c8)
    u9 = layers.concatenate([u9, c1])
    c9 = conv_block(u9, 64)

    outputs = layers.Conv2D(num_classes, 1, activation='softmax')(c9)
    model = models.Model(inputs, outputs)
    return model


# Metrica IoU personalizada
def iou_metric(y_true, y_pred):
    """IoU promedio para etiquetas enteras (sparse)."""
    y_pred = tf.argmax(y_pred, axis=-1)  # clase predicha
    y_true = tf.squeeze(y_true, axis=-1)  # elimina canal de máscara
    y_pred = tf.cast(y_pred, tf.int32)
    y_true = tf.cast(y_true, tf.int32)

    iou_list = []
    for i in range(NUM_CLASSES):
        intersection = tf.reduce_sum(tf.cast(tf.equal(y_true, i) & tf.equal(y_pred, i), tf.float32))
        union = tf.reduce_sum(tf.cast((tf.equal(y_true, i) | tf.equal(y_pred, i)), tf.float32))
        iou = tf.where(union == 0, 1.0, intersection / union)
        iou_list.append(iou)
    return tf.reduce_mean(iou_list)

iou_metric.__name__ = 'iou_metric'

# Compilar modelo
model = unet_model()
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy', iou_metric])
model.summary()

# Early stopping
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True)

# Entrenamiento
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS,
    callbacks=[early_stop])

# Graficar métricas
plt.figure(figsize=(15, 5))

# Loss
plt.subplot(1, 3, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss por época')
plt.xlabel('Época')
plt.ylabel('Loss')
plt.legend()

# Accuracy
plt.subplot(1, 3, 2)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Accuracy por época')
plt.xlabel('Época')
plt.ylabel('Accuracy')
plt.legend()

# IoU
plt.subplot(1, 3, 3)
plt.plot(history.history['iou_metric'], label='Train IoU')
plt.plot(history.history['val_iou_metric'], label='Val IoU')
plt.title('Mean IoU por época')
plt.xlabel('Época')
plt.ylabel('Mean IoU')
plt.legend()

plt.tight_layout()
plt.show()

# Evaluar en train / val / test
train_metrics = model.evaluate(train_dataset, verbose=0)
val_metrics = model.evaluate(val_dataset, verbose=0)
test_metrics = model.evaluate(test_dataset, verbose=0)

metric_names = model.metrics_names
print("\nResultados finales:")
print(f"Entrenamiento:")
for name, value in zip(metric_names, train_metrics):
    print(f"  {name:25s}: {value:.4f}")

print(f"\nValidación:")
for name, value in zip(metric_names, val_metrics):
    print(f"  {name:25s}: {value:.4f}")

print(f"\nTest:")
for name, value in zip(metric_names, test_metrics):
    print(f"  {name:25s}: {value:.4f}")


In [None]:
from tensorflow.keras.utils import plot_model

plot_model(
    model,
    to_file="unet_architecture.png",
    show_shapes=True,
    show_layer_names=True,
    show_layer_activations=True,
    dpi=98
)


In [None]:
def compute_metrics(model, dataset, num_classes=NUM_CLASSES):
    """
    Calcula Accuracy e IoU promedio para un dataset dado.
    
    Args:
        model: modelo entrenado
        dataset: tf.data.Dataset con imágenes y máscaras
        num_classes: cantidad de clases en la segmentación

    Returns:
        mean_acc: Accuracy promedio
        mean_iou: IoU promedio
    """
    all_accs = []
    all_ious = []

    for images, masks in dataset:
        # Predicciones
        preds = model.predict(images, verbose=0)
        preds_classes = tf.argmax(preds, axis=-1)
        
        # Asegurar que ambos tensores tengan el mismo tipo
        preds_classes = tf.cast(preds_classes, tf.int32)
        masks_squeezed = tf.cast(tf.squeeze(masks, axis=-1), tf.int32)

        # --- Accuracy ---
        acc = tf.reduce_mean(tf.cast(preds_classes == masks_squeezed, tf.float32))
        all_accs.append(acc)

        # --- IoU por clase ---
        iou_list = []
        for i in range(num_classes):
            intersection = tf.reduce_sum(
                tf.cast((preds_classes == i) & (masks_squeezed == i), tf.float32)
            )
            union = tf.reduce_sum(
                tf.cast((preds_classes == i) | (masks_squeezed == i), tf.float32)
            )
            iou = tf.where(union == 0, 1.0, intersection / union)
            iou_list.append(iou)
        mean_iou_batch = tf.reduce_mean(iou_list)
        all_ious.append(mean_iou_batch)

    mean_acc = tf.reduce_mean(all_accs).numpy()
    mean_iou = tf.reduce_mean(all_ious).numpy()

    return mean_acc, mean_iou

train_acc, train_iou = compute_metrics(model, train_dataset)
val_acc, val_iou     = compute_metrics(model, val_dataset)
test_acc, test_iou   = compute_metrics(model, test_dataset)

print(f"Train -> Accuracy: {train_acc:.4f}, IoU: {train_iou:.4f}")
print(f"Val   -> Accuracy: {val_acc:.4f}, IoU: {val_iou:.4f}")
print(f"Test  -> Accuracy: {test_acc:.4f}, IoU: {test_iou:.4f}")


In [None]:
# Visualización de predicciones con métricas
def show_predictions(model, dataset, num=3):
    for images, masks in dataset.take(10):
        preds = model.predict(images)
        preds = tf.argmax(preds, axis=-1)
        preds = preds[..., tf.newaxis]

        for i in range(num):
            img = tf.cast(images[i], tf.uint8)
            true_mask = tf.cast(masks[i], tf.int32)
            pred_mask = tf.cast(preds[i], tf.int32)

            # Calcular métricas
            # Accuracy pixel a pixel
            acc = tf.reduce_mean(
                tf.cast(tf.equal(true_mask, pred_mask), tf.float32)
            ).numpy()

            # IoU
            intersection = tf.reduce_sum(
                tf.cast(tf.equal(true_mask, pred_mask) & (true_mask > 0), tf.float32)
            )
            union = tf.reduce_sum(
                tf.cast((true_mask > 0) | (pred_mask > 0), tf.float32)
            )
            iou = (intersection / union).numpy() if union > 0 else 1.0

            # --- Mostrar imágenes ---
            plt.figure(figsize=(12, 4))
            plt.subplot(1, 3, 1)
            plt.imshow(img)
            plt.title("Imagen")
            plt.axis('off')

            plt.subplot(1, 3, 2)
            plt.imshow(tf.squeeze(true_mask))
            plt.title("Máscara real")
            plt.axis('off')

            plt.subplot(1, 3, 3)
            plt.imshow(tf.squeeze(pred_mask))
            plt.title("Predicción")
            plt.axis('off')

            plt.suptitle(
                f"Accuracy: {acc:.3f}   |   IoU: {iou:.3f}",
                fontsize=12, y=0.005
            )
            plt.tight_layout()
            plt.show()

show_predictions(model, test_dataset, num=3)