In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

class CloudDataset(Dataset):
    def __init__(self, images_dir, masks_dir, size=(256, 256)):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.size = size # Tamaño unificado

        self.image_files = sorted([f for f in os.listdir(images_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.tif'))])
        self.mask_files = sorted([f for f in os.listdir(masks_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.tif'))])

        # Transformaciones fijas para imagen (incluye normalización)
        self.img_transform = transforms.Compose([
            transforms.Resize(self.size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        # Transformaciones fijas para máscara (SIN normalización, solo resize y tensor)
        self.mask_transform = transforms.Compose([
            transforms.Resize(self.size, interpolation=transforms.InterpolationMode.NEAREST),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: (x > 0.5).float())
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.image_files[idx])
        mask_path = os.path.join(self.masks_dir, self.mask_files[idx])

        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')

        return self.img_transform(image), self.mask_transform(mask)

# Ahora creas el dataset así:
train_dataset = CloudDataset(images_dir="overall-mask", masks_dir="masked", size=(256, 256))

# Definir transformaciones para las imágenes de entrenamiento
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Redimensionar si es necesario
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalización estándar
                         std=[0.229, 0.224, 0.225])
])


# Crear el DataLoader para batch training
batch_size = 8
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2  # Número de procesos para carga paralela
)

# Probar que funciona
print(f"Número de imágenes en el dataset: {len(train_dataset)}")

# Ver un batch de datos
for images, masks in train_loader:
    print(f"Tamaño del batch de imágenes: {images.shape}")
    print(f"Tamaño del batch de máscaras: {masks.shape}")
    print(f"Rango de valores en imágenes: [{images.min():.3f}, {images.max():.3f}]")
    print(f"Valores únicos en máscaras: {torch.unique(masks)}")
    break

import torchvision.utils as vutils

def save_processed_dataset(dataset, output_img_dir, output_mask_dir):
    os.makedirs(output_img_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)
    
    print(f"Guardando {len(dataset)} imágenes procesadas...")
    
    # Usamos un DataLoader simple para recorrerlo
    temp_loader = DataLoader(dataset, batch_size=1, shuffle=False)
    
    for i, (image, mask) in enumerate(temp_loader):
        # Des-normalizar la imagen para que sea visible (opcional)
        # La normalización hace que la imagen se vea "rara" si la guardas directo
        img_to_save = image.squeeze(0)
        
        # Guardar imagen y máscara
        vutils.save_image(img_to_save, os.path.join(output_img_dir, f"cloud_{i}.png"))
        vutils.save_image(mask.squeeze(0), os.path.join(output_mask_dir, f"mask_{i}.png"))
        
    print("¡Hecho! Los datos procesados están listos en las carpetas.")

# Ejecutar:
# save_processed_dataset(train_dataset, "dataset_listo/images", "dataset_listo/masks")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.4.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Amalia\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Amalia\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\traitlets\config\application.py", line 1075, in laun

AttributeError: _ARRAY_API not found

ImportError: numpy.core.multiarray failed to import

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class AttentionBlock(nn.Module):
    """Módulo de Atención para filtrar características irrelevantes (ruido/sol)"""
    def __init__(self, F_g, F_l, F_int):
        super(AttentionBlock, self).__init__()
        self.W_g = nn.Sequential(
            nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        self.W_x = nn.Sequential(
            nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, g, x):
        g1 = self.W_g(g)
        x1 = self.W_x(x)
        psi = self.relu(g1 + x1)
        psi = self.psi(psi)
        return x * psi

class CloudAttentionUNet(nn.Module):
    def __init__(self):
        super(CloudAttentionUNet, self).__init__()
        
        # Encoder: Usamos ResNet18 pre-entrenada (ideal para datasets pequeños)
        base_model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.encoder0 = nn.Sequential(*list(base_model.children())[:3]) # 64
        self.encoder1 = base_model.layer1 # 64
        self.encoder2 = base_model.layer2 # 128
        self.encoder3 = base_model.layer3 # 256
        self.encoder4 = base_model.layer4 # 512

        # Bottleneck
        self.bottleneck = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True)
        )

        # Attention Blocks
        self.att4 = AttentionBlock(F_g=512, F_l=512, F_int=256)
        self.att3 = AttentionBlock(F_g=256, F_l=256, F_int=128)
        self.att2 = AttentionBlock(F_g=128, F_l=128, F_int=64)
        self.att1 = AttentionBlock(F_g=64, F_l=64, F_int=32)

        # Decoder
        self.up4 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.up3 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.up2 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.up1 = nn.ConvTranspose2d(64, 64, 2, stride=2)
        
        self.out_conv = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, x):
        # Encoder
        e0 = self.encoder0(x)
        e1 = self.encoder1(e0)
        e2 = self.encoder2(e1)
        e3 = self.encoder3(e2)
        e4 = self.encoder4(e3)

        # Bottleneck
        b = self.bottleneck(e4)

        # Decoder + Attention
        d4 = self.up4(b)
        x4 = self.att4(g=d4, x=e3)
        d4 = torch.cat((x4, d4), dim=1) # Note: simplificado para ejemplo

        # Final layers (Simplificado para brevedad, expandible a U-Net completa)
        # Aquí se aplicarían las capas de convolución post-concatenación
        # Por simplicidad en este script generamos el output desde el bottleneck:
        out = F.interpolate(self.out_conv(e4), size=x.shape[2:], mode='bilinear', align_corners=True)
        return torch.sigmoid(out)

Para probar el modelo

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt

def predict_cloud(image_path, model_path):
    # 1. Definir el dispositivo
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 2. Recrear la arquitectura y cargar pesos
    # Asegúrate de que la clase CloudAttentionUNet esté definida en tu script
    model = CloudAttentionUNet().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval() # IMPORTANTE: Poner el modelo en modo evaluación

    # 3. Preprocesar la imagen (Debe ser IGUAL al entrenamiento)
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    image_org = Image.open(image_path).convert('RGB')
    image_tensor = transform(image_org).unsqueeze(0).to(device) # Añadir dimensión de batch

    # 4. Hacer la predicción
    with torch.no_grad(): # Desactivar gradientes para ahorrar memoria
        output = model(image_tensor)
        # Binarizar el resultado (umbral de 0.5)
        mask_pred = (output > 0.5).float().cpu().squeeze().numpy()

    # 5. Visualizar
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.title("Imagen Original")
    plt.imshow(image_org.resize((256, 256)))
    
    plt.subplot(1, 2, 2)
    plt.title("Segmentación de Nubes")
    plt.imshow(mask_pred, cmap='gray')
    plt.show()

# Uso:
predict_cloud('data/cloud detection/Test/masked/wohle', 'cloud_segmentation_model.pth')