In [None]:
#!git clone https://github.com/AICONSlab/3DINO.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install -r /content/drive/MyDrive/tesis/3DINO-main/3DINO-main/requirements.txt


Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu117, https://pypi.nvidia.com


In [None]:
import os, glob, random
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import tifffile

In [1]:
# Configuración de reproducibilidad
SEED = 42

def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

# Rutas del conjunto de datos
TRAIN_DIR = "/content/drive/MyDrive/tesis/patches_train"
TEST_DIR  = "/content/drive/MyDrive/tesis/patches_test"

LABELS = [1,2,3,4,5]
TARGET_DHW0 = (80,80,80)  #Tamaño inicial del recorte (profundidad, alto, ancho)
TARGET_DHW  = (112,112,112) # Tamaño final de entrada al modelo

BATCH_SIZE = 8
NUM_WORKERS = 2


NameError: name 'random' is not defined

In [None]:
# Normaliza un volumen 3D a rango [-1, 1] usando cuantiles para reducir el efecto de outliers
def normalize_to_minus1_1(vol, p_low=0.0005, p_high=0.9995):
    vol = vol.astype(np.float32)
    lo = np.quantile(vol, p_low)
    hi = np.quantile(vol, p_high)
    vol = (vol - lo) / (hi - lo + 1e-8)
    vol = np.clip(vol * 2 - 1, -1, 1)
    return vol

# Realiza center-crop o padding simétrico para ajustar el volumen al tamaño objetivo (D, H, W)
def center_crop_or_pad_3d(vol, target_dhw):
    D,H,W = vol.shape
    tD,tH,tW = target_dhw
    pad_d = max(0, tD - D); pad_h = max(0, tH - H); pad_w = max(0, tW - W)

# Si el volumen es más pequeño que el objetivo, se rellena (padding) centrado
    if pad_d or pad_h or pad_w:
        vol = np.pad(
            vol,
            ((pad_d//2, pad_d - pad_d//2),
             (pad_h//2, pad_h - pad_h//2),
             (pad_w//2, pad_w - pad_w//2)),
            mode="constant",
            constant_values=float(vol.min())
        )
 # Si el volumen es más grande, se recorta (crop) centrado al tamaño objetivo
    D,H,W = vol.shape
    sD = (D - tD)//2
    sH = (H - tH)//2
    sW = (W - tW)//2
    return vol[sD:sD+tD, sH:sH+tH, sW:sW+tW]

# Aumento geométrico: flips aleatorios en ejes y rotación 2D en el plano (H, W)
def random_flip_rot(vol, rng: np.random.RandomState):
    if rng.rand() < 0.5: vol = vol[::-1, :, :]
    if rng.rand() < 0.5: vol = vol[:, ::-1, :]
    if rng.rand() < 0.5: vol = vol[:, :, ::-1]
    k = int(rng.randint(0, 4))
    vol = np.rot90(vol, k=k, axes=(1, 2))
    return vol.copy()

# Aumento de intensidad: escalado/shift de contraste-brillo y adición de ruido gaussiano
def intensity_aug(vol, rng: np.random.RandomState):
    if rng.rand() < 0.8:
        scale = 1.0 + rng.uniform(-0.2, 0.2)
        shift = rng.uniform(-0.1, 0.1)
        vol = np.clip(vol * scale + shift, -1, 1)
  # Ruido gaussiano leve (simula ruido de microscopía)
    if rng.rand() < 0.5:
        vol = np.clip(vol + rng.normal(0, 0.03, size=vol.shape).astype(np.float32), -1, 1)
    return vol

def augment_one(vol, rng: np.random.RandomState):
    vol = random_flip_rot(vol, rng)
    vol = intensity_aug(vol, rng)
    return vol

#aumentar el gaussion blur y noise, visualizar las aumentaciones de imagenes, verificar en fiji, hacer los embeddings sin aumetnar datos, 500, 3500 y 7000 , solo en el random forest primero

In [None]:
# Dataset para cargar volúmenes 3D en formato TIFF organizados por carpetas label_X
class Tif3DDatasetSingle(Dataset):
    def __init__(self, base_dir, labels, target_dhw0, target_dhw, do_aug=False, seed=SEED):
        self.items = []
        self.target_dhw0 = target_dhw0
        self.target_dhw = target_dhw
        self.do_aug = do_aug
        self.seed = seed

        for lv in labels:
            folder = os.path.join(base_dir, f"label_{lv}")
            files = sorted(glob.glob(os.path.join(folder, "*.tif")) + glob.glob(os.path.join(folder, "*.tiff")))
            self.items.extend([(f, lv) for f in files])

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        path, y = self.items[idx]
        vol = tifffile.imread(path).astype(np.float32)  # (Z,H,W)

        vol = normalize_to_minus1_1(vol)
        vol = center_crop_or_pad_3d(vol, self.target_dhw0)
        vol = np.resize(vol, self.target_dhw)

 #Aplica augmentation si está habilitado (determinístico por índice: seed + idx)
        if self.do_aug:
            rng = np.random.RandomState(self.seed + idx)
            vol = augment_one(vol, rng)

        x = torch.from_numpy(vol).unsqueeze(0)  # (1,D,H,W)
        return x, int(y), path

# Construye un índice balanceado por clase replicando (con reemplazo) hasta target_per_label
def build_augmented_index(items, labels, target_per_label, seed=SEED):
    rng = random.Random(seed)
    by_label = {lv: [] for lv in labels}
    for p, y in items:
        by_label[y].append((p, y))

    new_items = []
    for lv in labels:
        src = by_label[lv]
        if len(src) == 0:
            raise ValueError(f"No hay archivos en label_{lv}")
        for _ in range(target_per_label):
            new_items.append(rng.choice(src))
    rng.shuffle(new_items)
    return new_items


In [None]:
# Número objetivo de muestras por clase después del balanceo (sobremuestreo con augmentation)
TARGET_PER_LABEL = 7000

base_train = Tif3DDatasetSingle(TRAIN_DIR, LABELS, TARGET_DHW0, TARGET_DHW, do_aug=False, seed=SEED)
aug_items = build_augmented_index(base_train.items, LABELS, TARGET_PER_LABEL, seed=SEED)

train_aug = Tif3DDatasetSingle(TRAIN_DIR, LABELS, TARGET_DHW0, TARGET_DHW, do_aug=True, seed=SEED)
train_aug.items = aug_items

print("Train reales:", len(base_train), "| Train augmentado:", len(train_aug))


Train reales: 11959 | Train augmentado: 35000


In [None]:
# Establece el directorio actual en la ruta donde se encuentra el código fuente de 3DINO
cd /content/drive/MyDrive/tesis/3DINO-main/3DINO-main

/content/drive/MyDrive/tesis/3DINO-main/3DINO-main


In [None]:
# Importa funciones para cargar la configuración 3D y construir el modelo
from dinov2.eval.setup import build_model_for_eval
from dinov2.configs import load_and_merge_config_3d

config_file = "/content/drive/MyDrive/tesis/3DINO-main/3DINO-main/dinov2/configs/train/vit3d_highres"
pretrained_weights = "/content/drive/MyDrive/tesis/3dino_vit_weights.pth"

cfg = load_and_merge_config_3d(config_file)
model = build_model_for_eval(cfg, pretrained_weights)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

print("Modelo listo en:", device)




Modelo listo en: cuda


In [None]:
# Reproducibilidad en DataLoader
def seed_worker(worker_id):
    worker_seed = SEED + worker_id
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(SEED)

# Extracción de embeddings con 3DINO
def extract_embeddings(dataset, save_path):
    loader = DataLoader(
        dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True,
        worker_init_fn=seed_worker,
        generator=g
    )

    all_emb, all_y, all_paths = [], [], []

    with torch.no_grad():
        for x, y, paths in loader:
            x = x.to(device, non_blocking=True)
            out = model(x)  # (B,1024)
            all_emb.append(out.cpu().numpy())
            all_y.append(np.array(y))
            all_paths.extend(list(paths))

    embeddings = np.concatenate(all_emb, axis=0)
    labels = np.concatenate(all_y, axis=0)

    np.savez_compressed(
        save_path,
        embeddings=embeddings,
        labels=labels,
        paths=np.array(all_paths, dtype=object)
    )

    print("Guardado:", save_path)
    print("Embeddings:", embeddings.shape, "| Labels:", labels.shape)
    return embeddings, labels


In [None]:
#Ruta para guardar los embeddings de train
save_train = "/content/drive/MyDrive/tesis/embeddings_3dino_TRAIN_aug10k_seed42_finn.npz"
train_embeddings, train_labels = extract_embeddings(train_aug, save_train)

Guardado: /content/drive/MyDrive/tesis/embeddings_3dino_TRAIN_aug10k_seed42_finn.npz
Embeddings: (35000, 1024) | Labels: (35000,)


In [None]:
# dataset test SIN augmentation, solo tamaños 80->112
#Ruta para guardar los embeddings de test
test_ds = Tif3DDatasetSingle(TEST_DIR, LABELS, TARGET_DHW0, TARGET_DHW, do_aug=False, seed=SEED)
print("Test reales:", len(test_ds))

save_test = "/content/drive/MyDrive/tesis/embeddings_3dino_TEST_seed42_fin.npz"
test_embeddings, test_labels = extract_embeddings(test_ds, save_test)


Test reales: 2542
Guardado: /content/drive/MyDrive/tesis/embeddings_3dino_TEST_seed42_fin.npz
Embeddings: (2542, 1024) | Labels: (2542,)
