In [1]:
from google.colab import drive
import sys
import os

# Montar Google Drive
drive.mount('/content/drive')

# Definir rutas base y repo
BASE_PATH = '/content/drive/MyDrive/Tesis'
REPO_NAME = 'Tesis_NN'
REPO_PATH = os.path.join(BASE_PATH, REPO_NAME)

# Agregar repo al path de Python para imports
if REPO_PATH not in sys.path:
    sys.path.append(REPO_PATH)

# Cambiar directorio actual al repo (para comandos git)
os.chdir(REPO_PATH)

print(f"Drive montado. Carpeta repo seteada en: {REPO_PATH}")


Mounted at /content/drive
Drive montado. Carpeta repo seteada en: /content/drive/MyDrive/Tesis/Tesis_NN


In [2]:
# ───────────────────────────────────────────────────────────────────────────────
# 📦 Standard Library Imports
# ───────────────────────────────────────────────────────────────────────────────
import os
from pathlib import Path    # For file and directory manipulation
import sys                  # To modify Python path for custom module imports
import csv                  # To handle CSV file reading/writing
import random               # For generating random numbers
import numpy as np          # Numerical operations and array handling
import pandas as pd         # DataFrame handling for structured data
import matplotlib.pyplot as plt  # Plotting and visualization
import time


# ───────────────────────────────────────────────────────────────────────────────
# 🌍 Third-Party Library Imports
# ───────────────────────────────────────────────────────────────────────────────
import torch                # PyTorch: deep learning framework
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms.functional as functional_transforms

# ───────────────────────────────────────────────────────────────────────────────
# 🧩 Custom Project Modules
# ───────────────────────────────────────────────────────────────────────────────

# Add custom project folder to system path to enable local module imports
sys.path.append('C:/Users/nnobi/Desktop/FIUBA/Tesis/Project')

# Import common training routines
from project_package.utils import train_common_routines as tcr

# Import model
from project_package.conv_net.ConvNet_model import SRCNN

# Import dataset manager
from project_package.dataset_manager.tensor_images_dataset import Tensor_images_dataset

# Import Sentinel-2 to Venus preprocessing utilities
from project_package.data_processing import sen2venus_routines as s2v

# Import general utility functions
from project_package.utils import utils as utils


In [17]:
!pip install -q webdataset

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
import torch

# Cargá el tensor
dataset = 'my_dataset3'
low_res_file = os.path.join(REPO_PATH,'datasets',dataset,'10m.pt')
high_res_file = os.path.join(REPO_PATH,'datasets',dataset,'05m.pt')


# Leer tensores
tensor_low_res = torch.load(low_res_file)
tensor_high_res = torch.load(high_res_file)

# Castear a float32 y escalar
scale_value = 10000
tensor_low_res = tensor_low_res.float() / scale_value
tensor_high_res = tensor_high_res.float() / scale_value

# Normalización min-max por imagen y canal
max_val_low = tensor_low_res.amax(dim=(2, 3), keepdim=True)  # shape: [N, C, 1, 1]
min_val_low = tensor_low_res.amin(dim=(2, 3), keepdim=True)
tensor_low_res = (tensor_low_res - min_val_low) / (max_val_low - min_val_low + 1e-8)

max_val_high = tensor_high_res.amax(dim=(2, 3), keepdim=True)
min_val_high = tensor_high_res.amin(dim=(2, 3), keepdim=True)
tensor_high_res = (tensor_high_res - min_val_high) / (max_val_high - min_val_high + 1e-8)

# Interpolar low-res a 256x256 (bilinear)
tensor_low_res = F.interpolate(tensor_low_res, size=(256, 256), mode='bilinear', align_corners=False)


print(tensor_low_res.shape)
print(tensor_high_res.shape)

torch.Size([129, 3, 256, 256])
torch.Size([129, 3, 256, 256])


In [27]:
import os
import torch
import torch.nn.functional as F
import webdataset as wds
from pathlib import Path
import io

def create_webdataset_shards_pt(tensor_low_res, tensor_high_res, output_dir, shard_size=1000):
    """
    Guarda pares de tensores [N,3,H,W] low-res y high-res como archivos .pt dentro de shards WebDataset (.tar).
    """
    os.makedirs(output_dir, exist_ok=True)
    N = tensor_low_res.shape[0]

    def save_shard(shard_idx, start_idx, end_idx):
        shard_path = os.path.join(output_dir, f"shard-{shard_idx:05d}.tar")
        with wds.TarWriter(shard_path) as sink:
            for i in range(start_idx, end_idx):
                sample_id = f"{i:08d}"

                # Serializar tensor a bytes con torch.save
                low_buffer = io.BytesIO()
                torch.save(tensor_low_res[i], low_buffer)
                low_bytes = low_buffer.getvalue()

                high_buffer = io.BytesIO()
                torch.save(tensor_high_res[i], high_buffer)
                high_bytes = high_buffer.getvalue()

                sample = {
                    "__key__": sample_id,
                    "low.pt": low_bytes,
                    "high.pt": high_bytes,
                }
                sink.write(sample)

    num_shards = (N + shard_size - 1) // shard_size
    for shard_idx in range(num_shards):
        start = shard_idx * shard_size
        end = min(start + shard_size, N)
        print(f"Creando shard {shard_idx+1}/{num_shards} con índices {start} a {end-1}")
        save_shard(shard_idx, start, end)

    print("Shards creados en:", output_dir)


def torch_decoder(data):
    """Decodificador para cargar tensores desde bytes en WebDataset"""
    buffer = io.BytesIO(data)
    return torch.load(buffer)


def get_webdataset_loader_pt(shard_pattern, batch_size=16, shuffle=True):
    """
    Carga WebDataset con pares low.pt y high.pt, devuelve tensores en batches.
    """
    dataset = (
        wds.WebDataset(shard_pattern)
        .decode(torch_decoder)
        .to_tuple("low.pt", "high.pt")
    )

    if shuffle:
        dataset = dataset.shuffle(1000)

    loader = torch.utils.data.DataLoader(dataset.batched(batch_size), batch_size=None)
    return loader


# --- EJEMPLO de uso ---

# Supongamos que tensor_low_res y tensor_high_res ya están creados con shape [N,3,256,256]

BASE_PATH = "/content/drive/MyDrive/Tesis"  # Cambiá al path que uses

# Crear shards (esto puede demorar un poco si N es grande)
create_webdataset_shards_pt(tensor_low_res, tensor_high_res, output_dir=os.path.join(BASE_PATH,'webdata_test'), shard_size=1000)

import glob

shard_paths = sorted(glob.glob(os.path.join(BASE_PATH, 'webdata_test', 'shard-*.tar')))
loader = get_webdataset_loader_pt(shard_paths, batch_size=8)

for i, (low_batch, high_batch) in enumerate(loader):
    print(f"Batch {i}: low_res {low_batch.shape}, high_res {high_batch.shape}")
    # Aquí podés usar tus batches para entrenar, validar, etc.
    if i == 1:  # solo para probar 2 batches
        break


Creando shard 1/1 con índices 0 a 128
Shards creados en: /content/drive/MyDrive/Tesis/webdata_test


IndexError: list index out of range