In [3]:
import os

base_dir = '/content/SEN12MS-CR'

# Crear directorios
os.makedirs(os.path.join(base_dir, 'ROIs1158_spring', 's1'), exist_ok=True)
os.makedirs(os.path.join(base_dir, 'ROIs1158_spring', 's2_cloudy'), exist_ok=True)
os.makedirs(os.path.join(base_dir, 'ROIs1158_spring', 's2_clear'), exist_ok=True)
os.makedirs(os.path.join(base_dir, 'splits'), exist_ok=True)

# Crear archivos vacíos en 'splits'
with open(os.path.join(base_dir, 'splits', 'train.txt'), 'w') as f: f.write('')
with open(os.path.join(base_dir, 'splits', 'val.txt'), 'w') as f: f.write('')
with open(os.path.join(base_dir, 'splits', 'test.txt'), 'w') as f: f.write('')

print(f"Estructura de directorios y archivos creados en {base_dir}")
print("Verificando estructura:")
!ls -R /content/SEN12MS-CR

Estructura de directorios y archivos creados en /content/SEN12MS-CR
Verificando estructura:


"ls" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.


In [None]:
import rasterio
import numpy as np
from torch.utils.data import Dataset

class SEN12MSCRDataset(Dataset):
    def __init__(self, list_file, root_dir, transform=None):
        # list_file = archivo con IDs (train/val/test)
        with open(list_file) as f:
            self.ids = [line.strip() for line in f]
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        patch_id = self.ids[idx]

        # Abrir imágenes
        s2_cloudy = rasterio.open(f"{self.root_dir}/S2_cloudy_{patch_id}.tif").read()
        s2_clear  = rasterio.open(f"{self.root_dir}/S2_clear_{patch_id}.tif").read()
        s1        = rasterio.open(f"{self.root_dir}/S1_{patch_id}.tif").read()

        # Normalizar a float32
        s2_cloudy = s2_cloudy.astype(np.float32) / 10000.0
        s2_clear  = s2_clear.astype(np.float32) / 10000.0
        s1        = s1.astype(np.float32) / 10000.0

        if self.transform:
            s2_cloudy, s1, s2_clear = self.transform(s2_cloudy, s1, s2_clear)

        return (s2_cloudy, s1), s2_clear


Arquitectura Recomendada: U-Net con Skip Connections
¿Por qué U-Net?

Excelente para tareas de segmentación y reconstrucción

Las conexiones skip ayudan a preservar detalles de bajo nivel

Funciona bien con datos limitados (relativamente)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(Conv2D -> BatchNorm -> ReLU) * 2"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        return self.double_conv(x)

class DeCloudUNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=3):
        super().__init__()
        # Contraction Path (Encoder)
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = nn.MaxPool2d(2)
        self.conv1 = DoubleConv(64, 128)
        self.down2 = nn.MaxPool2d(2)
        self.conv2 = DoubleConv(128, 256)
        self.down3 = nn.MaxPool2d(2)
        self.conv3 = DoubleConv(256, 512)
        self.down4 = nn.MaxPool2d(2)
        self.conv4 = DoubleConv(512, 1024)
        
        # Expansion Path (Decoder)
        self.up1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.conv5 = DoubleConv(1024, 512)  # 512 + 512
        self.up2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.conv6 = DoubleConv(512, 256)   # 256 + 256
        self.up3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv7 = DoubleConv(256, 128)   # 128 + 128
        self.up4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv8 = DoubleConv(128, 64)    # 64 + 64
        
        self.outc = nn.Conv2d(64, n_classes, kernel_size=1)
    
    def forward(self, x):
        # Encoder
        x1 = self.inc(x)
        x2 = self.conv1(self.down1(x1))
        x3 = self.conv2(self.down2(x2))
        x4 = self.conv3(self.down3(x3))
        x5 = self.conv4(self.down4(x4))
        
        # Decoder with skip connections
        x = self.up1(x5)
        x = torch.cat([x, x4], dim=1)
        x = self.conv5(x)
        
        x = self.up2(x)
        x = torch.cat([x, x3], dim=1)
        x = self.conv6(x)
        
        x = self.up3(x)
        x = torch.cat([x, x2], dim=1)
        x = self.conv7(x)
        
        x = self.up4(x)
        x = torch.cat([x, x1], dim=1)
        x = self.conv8(x)
        
        return torch.sigmoid(self.outc(x))

In [None]:
class CloudRemovalLoss(nn.Module):
    def __init__(self, lambda_p=0.1, lambda_g=0.01):
        super().__init__()
        self.l1_loss = nn.L1Loss()
        self.mse_loss = nn.MSELoss()
        self.lambda_p = lambda_p  # Peso para pérdida perceptual
        self.lambda_g = lambda_g  # Peso para pérdida adversarial
        
    def perceptual_loss(self, pred, target):
        # Usar una VGG preentrenada para extraer características
        vgg = torchvision.models.vgg16(pretrained=True).features[:16].eval()
        for param in vgg.parameters():
            param.requires_grad = False
        
        pred_features = vgg(pred)
        target_features = vgg(target)
        return self.mse_loss(pred_features, target_features)
    
    def forward(self, pred, target):
        # Pérdida L1 (preserva colores)
        l1 = self.l1_loss(pred, target)
        
        # Pérdida perceptual (preserva estructura)
        perceptual = self.perceptual_loss(pred, target)
        
        # Pérdida total
        total_loss = l1 + self.lambda_p * perceptual
        
        return total_loss, {"l1": l1, "perceptual": perceptual}

In [None]:
import os
import gdown
import zipfile
import tarfile
import kaggle
from google.colab import drive

class SEN12MSDataLoader:
    def __init__(self, platform='colab'):
        self.platform = platform
        self.data_paths = {
            'cloudy': [],
            'clean': [],
            'masks': []
        }
    
    def download_from_kaggle(self):
        """Descarga dataset de Kaggle (necesita API key)"""
        print("Downloading SEN12MS dataset from Kaggle...")
        
        # Configurar API key (subir kaggle.json primero)
        !mkdir -p ~/.kaggle
        !cp kaggle.json ~/.kaggle/
        !chmod 600 ~/.kaggle/kaggle.json
        
        # Descargar dataset
        !kaggle datasets download -d benjimx/sen12mscr-s2
        !unzip -q sen12mscr-s2.zip -d ./sen12ms_data
        !rm sen12mscr-s2.zip
    
    def download_from_ftp(self):
        """Descarga directamente del FTP oficial"""
        print("Downloading from TUM FTP server...")
        
        # Crear directorios
        os.makedirs('data/sen12ms', exist_ok=True)
        os.makedirs('data/sen12ms/ROIs1158_spring', exist_ok=True)
        
        # Descargar archivos (ejemplo para una temporada)
        base_url = "ftp://m1474000:m1474000@dataserv.ub.tum.de/ROIs1158_spring/"
        
        files = [
            "ROIs1158_spring_s1.tar.gz",
            "ROIs1158_spring_s2.tar.gz",
            "ROIs1158_spring_s2_cloudy.tar.gz"
        ]
        
        for file in files:
            print(f"Downloading {file}...")
            !wget -q {base_url}{file} -P data/sen12ms/
            print(f"Extracting {file}...")
            !tar -xzf data/sen12ms/{file} -C data/sen12ms/ROIs1158_spring/
            !rm data/sen12ms/{file}
    
    def download_cloud_masks(self):
        """Descargar máscaras de nubes (Cloud Detection Dataset)"""
        print("Downloading cloud masks...")
        
        # Opción 1: 38-Cloud dataset from Kaggle
        !kaggle datasets download -d soroush/38cloud-cloud-segmentation-in-satellite-images
        !unzip -q 38cloud-cloud-segmentation-in-satellite-images.zip
        !rm 38cloud-cloud-segmentation-in-satellite-images.zip
        
        # Opción 2: SPARCS dataset
        !wget -q https://landsat.usgs.gov/sites/default/files/sparcs/SPARCS_val_data.zip
        !unzip -q SPARCS_val_data.zip -d ./sparcs_data
        !rm SPARCS_val_data.zip
    
    def mount_google_drive(self):
        """Montar Google Drive para almacenamiento persistente"""
        print("Mounting Google Drive...")
        drive.mount('/content/drive')
        
        # Crear estructura de directorios
        base_path = '/content/drive/MyDrive/DeCould_Project'
        os.makedirs(f'{base_path}/data/raw', exist_ok=True)
        os.makedirs(f'{base_path}/data/processed', exist_ok=True)
        os.makedirs(f'{base_path}/models', exist_ok=True)
        os.makedirs(f'{base_path}/results', exist_ok=True)
        
        return base_path
    
    def setup_colab_environment(self):
        """Configurar entorno completo en Colab"""
        print("="*50)
        print("Setting up Google Colab Environment")
        print("="*50)
        
        # 1. Instalar dependencias
        !pip install torch torchvision torchaudio
        !pip install rasterio
        !pip install kaggle
        !pip install earthpy
        !pip install pyproj
        !pip install tensorboardX
        
        # 2. Verificar GPU
        import torch
        print(f"PyTorch version: {torch.__version__}")
        print(f"GPU Available: {torch.cuda.is_available()}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name(0)}")
        
        # 3. Montar Drive para almacenamiento persistente
        drive_path = self.mount_google_drive()
        
        return drive_path
    
    def load_sen12ms_sample(self, sample_path):
        """Cargar un sample de SEN12MS"""
        import rasterio
        import numpy as np
        
        # SEN12MS tiene 13 bandas Sentinel-2
        s2_bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B10', 'B11', 'B12']
        
        # Para nuestro caso, usaremos RGB (B4, B3, B2) y NIR (B8)
        with rasterio.open(f'{sample_path}_s2.tif') as src:
            img = src.read()
            
        # Normalizar y extraer bandas RGB
        rgb = img[[3, 2, 1], :, :]  # B4, B3, B2
        rgb = np.transpose(rgb, (1, 2, 0))
        
        # Normalizar a [0, 1]
        rgb = (rgb - rgb.min()) / (rgb.max() - rgb.min() + 1e-8)
        
        return rgb
    
    def create_cloudy_clean_pairs(self, data_dir):
        """Crear pares de imágenes con/sin nubes"""
        import glob
        from sklearn.model_selection import train_test_split
        
        # Encontrar todas las imágenes
        all_images = glob.glob(f'{data_dir}/**/*_s2.tif', recursive=True)
        
        # Para SEN12MS-CR, buscar imágenes correspondientes
        pairs = []
        
        for clean_path in all_images:
            # Buscar versión con nubes (si existe)
            base_name = clean_path.replace('_s2.tif', '')
            cloudy_path = clean_path.replace('_s2.tif', '_s2_cloudy.tif')
            
            if os.path.exists(cloudy_path):
                pairs.append({
                    'clean': clean_path,
                    'cloudy': cloudy_path,
                    'mask': f'{base_name}_cloud_mask.tif' if os.path.exists(f'{base_name}_cloud_mask.tif') else None
                })
        
        print(f"Found {len(pairs)} cloudy-clean pairs")
        
        # Dividir en train/val/test
        train_pairs, temp_pairs = train_test_split(pairs, test_size=0.3, random_state=42)
        val_pairs, test_pairs = train_test_split(temp_pairs, test_size=0.5, random_state=42)
        
        return train_pairs, val_pairs, test_pairs

In [None]:
def train_decloud(model, dataloader, val_dataloader, epochs=100, lr=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    criterion = CloudRemovalLoss()
    
    # TensorBoard para monitoreo
    writer = SummaryWriter('logs/decould')
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        
        for batch_idx, (cloudy, clean, mask) in enumerate(dataloader):
            cloudy, clean = cloudy.to(device), clean.to(device)
            
            optimizer.zero_grad()
            
            # Predicción
            output = model(cloudy)
            
            # Calcular pérdida
            loss, loss_dict = criterion(output, clean)
            
            # Backpropagation
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            # Log cada 10 batches
            if batch_idx % 10 == 0:
                print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss.item():.4f}')
                
                # Guardar imágenes en TensorBoard
                if batch_idx == 0:
                    writer.add_images('Input/Cloudy', cloudy[:4], epoch)
                    writer.add_images('Target/Clean', clean[:4], epoch)
                    writer.add_images('Output/Predicted', output[:4], epoch)
        
        # Validación
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for cloudy_val, clean_val, _ in val_dataloader:
                cloudy_val, clean_val = cloudy_val.to(device), clean_val.to(device)
                output_val = model(cloudy_val)
                loss_val, _ = criterion(output_val, clean_val)
                val_loss += loss_val.item()
        
        # Log en TensorBoard
        avg_train_loss = train_loss / len(dataloader)
        avg_val_loss = val_loss / len(val_dataloader)
        
        writer.add_scalar('Loss/Train', avg_train_loss, epoch)
        writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
        
        print(f'Epoch {epoch}: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        # Ajustar learning rate
        scheduler.step(avg_val_loss)
        
        # Guardar checkpoint
        if epoch % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': avg_val_loss,
            }, f'checkpoints/decould_epoch_{epoch}.pth')
    
    writer.close()
    return model

In [None]:
def evaluate_model(model, dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    
    metrics = {
        'psnr': [],
        'ssim': [],
        'l1_error': []
    }
    
    with torch.no_grad():
        for cloudy, clean, mask in dataloader:
            cloudy, clean = cloudy.to(device), clean.to(device)
            output = model(cloudy)
            
            # Calcular PSNR
            mse = F.mse_loss(output, clean)
            psnr = 20 * torch.log10(1.0 / torch.sqrt(mse))
            metrics['psnr'].append(psnr.item())
            
            # Calcular SSIM (simplificado)
            # En la práctica, usaría una implementación completa de SSIM
            ssim = calculate_ssim(output, clean)
            metrics['ssim'].append(ssim)
            
            # Error L1
            l1 = F.l1_loss(output, clean)
            metrics['l1_error'].append(l1.item())
    
    # Promediar métricas
    avg_metrics = {k: np.mean(v) for k, v in metrics.items()}
    return avg_metrics

def calculate_ssim(img1, img2, window_size=11):
    """
    Implementación simplificada de SSIM
    """
    C1 = 0.01**2
    C2 = 0.03**2
    
    mu1 = F.avg_pool2d(img1, window_size, stride=1, padding=window_size//2)
    mu2 = F.avg_pool2d(img2, window_size, stride=1, padding=window_size//2)
    
    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2
    
    sigma1_sq = F.avg_pool2d(img1*img1, window_size, stride=1, padding=window_size//2) - mu1_sq
    sigma2_sq = F.avg_pool2d(img2*img2, window_size, stride=1, padding=window_size//2) - mu2_sq
    sigma12 = F.avg_pool2d(img1*img2, window_size, stride=1, padding=window_size//2) - mu1_mu2
    
    ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2)) / ((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
    
    return ssim_map.mean().item()

In [None]:
import os
import glob
from torch.utils.data import DataLoader

def main():
    # 1. Configuración
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # 2. Preparar datos
    # Suponiendo que tienes imágenes en una carpeta
    clean_images = glob.glob("data/satellite_images/*.jpg")[:1000]
    
    # Dividir en train/val
    split_idx = int(len(clean_images) * 0.8)
    train_images = clean_images[:split_idx]
    val_images = clean_images[split_idx:]
    
    # Transformaciones
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    
    # Crear datasets
    train_dataset = CloudDataset(train_images, transform=transform)
    val_dataset = CloudDataset(val_images, transform=transform)
    
    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)
    
    # 3. Crear modelo
    model = DeCloudUNet(n_channels=3, n_classes=3)
    
    # 4. Entrenar
    trained_model = train_decloud(
        model=model,
        dataloader=train_loader,
        val_dataloader=val_loader,
        epochs=50,
        lr=1e-4
    )
    
    # 5. Evaluar
    metrics = evaluate_model(trained_model, val_loader)
    print(f"Metrics: {metrics}")
    
    # 6. Guardar modelo final
    torch.save(trained_model.state_dict(), "decould_final.pth")
    print("Model saved as decould_final.pth")

if __name__ == "__main__":
    main()

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(6, 64, 4, 2, 1),  # Concatenar input y output
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            nn.Conv2d(256, 512, 4, 2, 1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            nn.Conv2d(512, 1, 4, 1, 0),
            nn.Sigmoid()
        )
    
    def forward(self, img_A, img_B):
        img_input = torch.cat([img_A, img_B], dim=1)
        return self.model(img_input)

In [None]:
class AttentionBlock(nn.Module):
    def __init__(self, F_g, F_l, F_int):
        super().__init__()
        self.W_g = nn.Sequential(
            nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        
        self.W_x = nn.Sequential(
            nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
        
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, g, x):
        g1 = self.W_g(g)
        x1 = self.W_x(x)
        psi = self.relu(g1 + x1)
        psi = self.psi(psi)
        return x * psi