In [6]:
import pandas as pd
import cv2
import sys
import os

sys.path.append("../../")
from src import utils
from src.dataset3D import MRIDataset3D
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import matplotlib.pyplot as plt
utils.set_seed(42)

results_dir = '../../results/preprocessed_data/'

labels=["Noise", "Zipper", "Positioning", "Banding", "Motion", "Contrast", "Distortion"]
LABELS = ["Noise","Zipper","Positioning","Banding","Motion","Contrast","Distortion"]

df_train = pd.read_csv(os.path.join(results_dir, 'df_train.csv'))
df_test = pd.read_csv(os.path.join(results_dir, 'df_test.csv'))
df_train["patient_id"] = df_train["filename"].str.extract(r"(LISA_\d+)")
df_test["patient_id"]  = df_test["filename"].str.extract(r"(LISA_VALIDATION_\d+)")
    
df_train.head(2)

Unnamed: 0,filename,Noise,Zipper,Positioning,Banding,Motion,Contrast,Distortion,path,patient_id,dim_x,dim_y,dim_z,spacing_x,spacing_y,spacing_z,view
0,LISA_0001_LF_axi.nii.gz,0,0,0,0,0,0,0,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0001,36,120,146,5.0,1.5,1.5,axi
1,LISA_0001_LF_cor.nii.gz,0,0,0,0,0,0,0,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0001,40,120,120,5.0,1.5,1.5,cor


In [15]:
# =========================
# 1) Args para notebooks
# =========================
class Args:
    def __init__(self,
                 train_csv,
                 test_csv,
                 device="cuda:5",
                 out_dir="./runs_lisa_nb",
                 folds=5,
                 epochs=8,
                 batch_size=2,
                 num_workers=2,
                 lr=3e-4,
                 wd=1e-4,
                 dropout=0.3,
                 freeze_n=2,
                 spatial_size=(40,120,120),
                 use_aug=True,
                 seed=42,
                 amp=True,
                 mode="ordinal",   # "ordinal" o "multiclass"
                 thr1=0.5,         # para ordinal: threshold de y>=1
                 thr2=0.5):        # para ordinal: threshold de y==2
        self.train_csv = train_csv
        self.test_csv  = test_csv
        self.out_dir = out_dir
        self.device = device
        self.folds = folds
        self.epochs = epochs
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.lr = lr
        self.wd = wd
        self.dropout = dropout
        self.freeze_n = freeze_n
        self.spatial_size = spatial_size
        self.use_aug = use_aug
        self.seed = seed
        self.amp = amp
        self.mode = mode
        self.thr1 = thr1
        self.thr2 = thr2

# =========================
# 2) Imports y utilidades
# =========================
import os, json, numpy as np, pandas as pd
from tqdm import tqdm
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score, accuracy_score
from torchvision.models.video import r3d_18


def set_seed(seed):
    import random, numpy as np, torch
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def ensure_dir(p): os.makedirs(p, exist_ok=True)

def count_pred012(y_pred):
    counts = {0:0, 1:0, 2:0}
    vals, cnts = np.unique(y_pred, return_counts=True)
    for v,c in zip(vals, cnts): counts[int(v)] = int(c)
    return counts

# =========================
# 3) Dataset con target único (por label)
#    Reusa tu MRIDataset3D base y selecciona una columna
# =========================
from monai.transforms import (
    LoadImaged, EnsureChannelFirstd, Orientationd, Resized, Spacingd,
    ScaleIntensityd, EnsureTyped,
    RandFlipd, RandAffined, RandZoomd, Compose
)
import numpy as np
import torch

from monai.transforms import MapTransform
import torch

class ReorientToViewAxisd(MapTransform):
    """
    Reordena (C, D, H, W) para alinear 'depth' con la vista deseada sin perder MetaTensor.
    Debe aplicarse DESPUÉS de EnsureTyped(track_meta=True), Orientationd y Spacingd,
    y ANTES de Resized/ScaleIntensityd.
    """
    def __init__(self, keys, view_axis_getter):
        super().__init__(keys)
        self.view_axis_getter = view_axis_getter

    def __call__(self, data):
        d = dict(data)
        view_axis = self.view_axis_getter(d)

        # orden por vista (C, D, H, W) -> (C, newD, newH, newW)
        if view_axis == "sag":
            order = (0, 1, 2, 3)      # D,H,W (igual)
        elif view_axis == "cor":
            order = (0, 2, 1, 3)      # swap D <-> H
        elif view_axis == "axi":
            order = (0, 3, 1, 2)      # W pasa a 'depth'
        else:
            raise ValueError(f"Invalid view_axis: {view_axis}")

        for key in self.keys:
            img = d[key]
            if not isinstance(img, torch.Tensor):
                img = torch.as_tensor(img)  # evita numpy -> tensor
            # MetaTensor también soporta permute; se mantiene el tipo
            d[key] = img.permute(*order).contiguous()

        return d


class MRIDataset3DOneTarget(torch.utils.data.Dataset):
    def __init__(self, df, target_label, is_train=False, use_aug=False, spatial_size=(40,120,120)):
        self.df = df.reset_index(drop=True)
        self.is_train = is_train
        self.target_label = target_label
        def view_axis_getter(d): return d["view_axis"]
        self.view2onehot = {"axi": torch.tensor([1,0,0],dtype=torch.float),
                            "cor": torch.tensor([0,1,0],dtype=torch.float),
                            "sag": torch.tensor([0,0,1],dtype=torch.float)}
        tfms = [
            LoadImaged(keys=["image"], image_only=False),
            EnsureChannelFirstd(keys=["image"]),
            EnsureTyped(keys=["image"], track_meta=True),      # <- aquí
            Orientationd(keys=["image"], axcodes="RAS"),
            Spacingd(keys=["image"], pixdim=(5, 1.5, 1.5), mode="bilinear"),
            ReorientToViewAxisd(keys=["image"], view_axis_getter=view_axis_getter),  # <- aquí
            Resized(keys=["image"], spatial_size=spatial_size, mode="trilinear"),
            ScaleIntensityd(keys=["image"]),
        ]

        if use_aug:
            tfms += [
                RandFlipd(keys=["image"], spatial_axis=0, prob=0.3),
                RandFlipd(keys=["image"], spatial_axis=1, prob=0.3),
                RandAffined(keys=["image"], prob=0.2,
                            rotate_range=(0.1,0.1,0.1),
                            scale_range=(0.05,0.05,0.05)),
                RandZoomd(keys=["image"], min_zoom=0.9, max_zoom=1.1, prob=0.2),
            ]
        #tfms+=[EnsureTyped(keys=["image"])]
        self.transform = Compose(tfms)

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = row["path"]
        view_axis  = image_path.split("_LF_")[-1].split(".nii")[0]
        sample = {"image": image_path, "view_axis": view_axis}
        out = self.transform(sample)
        image = out["image"]
        view_onehot = self.view2onehot[view_axis]
        if self.is_train:
            y = torch.tensor(int(row[self.target_label]), dtype=torch.long)
            return image, y, row["filename"], view_onehot
        else:
            return image, -1, row["filename"], view_onehot

# =========================
# 4) Modelo (1 label)
# =========================
class Model3DResnetOne(nn.Module):
    def __init__(self, in_channels=1, num_classes=3, pretrained=True, dropout_p=0.3, freeze_n=2, mode="ordinal"):
        super().__init__()
        self.mode = mode
        self.backbone = r3d_18(pretrained=pretrained)
        self.adapter = nn.Sequential(
            nn.Conv3d(in_channels, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv3d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv3d(16, 3, kernel_size=1),
        )
        # congelar etapas
        blocks = [self.backbone.stem, self.backbone.layer1, self.backbone.layer2, self.backbone.layer3, self.backbone.layer4]
        for i,b in enumerate(blocks):
            req_grad = (i >= freeze_n)
            for p in b.parameters(): p.requires_grad = req_grad
        in_features = self.backbone.fc.in_features
        out_dim = 3 if mode=="multiclass" else 2
        self.backbone.fc = nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(in_features + 3, in_features),
            nn.ReLU(),
            nn.Dropout(p=dropout_p),
            nn.Linear(in_features, out_dim),
        )

    def forward(self, x, view_onehot):
        x = self.adapter(x)
        x = self.backbone.stem(x)
        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        x = self.backbone.avgpool(x).flatten(1)
        x = torch.cat([x, view_onehot], dim=1)
        logits = self.backbone.fc(x)        # (B,3) o (B,2)
        return logits

# =========================
# 5) Folds y loaders
# =========================
def make_folds(df, n_splits=5):
    df = df.copy()
    df["fold"] = -1
    gkf = GroupKFold(n_splits=n_splits)
    groups = df["patient_id"].values
    y_balance = df[LABELS].sum(axis=1).values
    for k, (_, val_idx) in enumerate(gkf.split(df, y_balance, groups)):
        df.loc[val_idx, "fold"] = k
    return df

def build_loaders_one(df_tr, df_va, target_label, args):
    ds_tr = MRIDataset3DOneTarget(df_tr, target_label, is_train=True, use_aug=args.use_aug, spatial_size=args.spatial_size)
    ds_va = MRIDataset3DOneTarget(df_va, target_label, is_train=True, use_aug=False,     spatial_size=args.spatial_size)
    dl_tr = DataLoader(ds_tr, batch_size=args.batch_size, shuffle=True,  num_workers=args.num_workers, pin_memory=True)
    dl_va = DataLoader(ds_va, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True)
    return dl_tr, dl_va

# =========================
# 6) Entrenamiento por label y fold
# =========================
def ordinal_decode_one(logits, thr1=0.5, thr2=0.5):
    # logits (B,2) -> p_ge1, p_eq2
    p = torch.sigmoid(logits)
    yhat = torch.zeros(logits.size(0), dtype=torch.long, device=logits.device)
    yhat[p[:,0] >= thr1] = 1
    yhat[p[:,1] >= thr2] = 2
    return yhat, p[:,0], p[:,1]  # (pred, p_ge1, p_eq2)

def evaluate_one(model, loader, device, mode, thr1, thr2):
    model.eval()
    ys, yhats = [], []
    with torch.no_grad():
        for imgs, y, _, view1h in loader:
            imgs, y, view1h = imgs.to(device), y.to(device), view1h.to(device)
            logits = model(imgs, view1h)
            if mode == "multiclass":
                pred = logits.argmax(-1)
            else:
                pred, _, _ = ordinal_decode_one(logits, thr1, thr2)
            ys.append(y.cpu().numpy()); yhats.append(pred.cpu().numpy())
    y_true = np.concatenate(ys,0); y_pred = np.concatenate(yhats,0)
    f1 = f1_score(y_true, y_pred, average="macro")
    acc = accuracy_score(y_true, y_pred)
    counts = count_pred012(y_pred)
    return f1, acc, counts

def train_one_fold_one_label(fold, target_label, df_folds, args, device):
    out_dir = os.path.join(args.out_dir, target_label, f"fold{fold}")
    ensure_dir(out_dir)
    df_tr = df_folds[df_folds.fold != fold].reset_index(drop=True)
    df_va = df_folds[df_folds.fold == fold].reset_index(drop=True)

    dl_tr, dl_va = build_loaders_one(df_tr, df_va, target_label, args)

    model = Model3DResnetOne(
        in_channels=1, num_classes=3,
        pretrained=True, dropout_p=args.dropout,
        freeze_n=args.freeze_n, mode=args.mode
    ).to(device)

    if args.mode == "multiclass":
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.BCEWithLogitsLoss()

    opt = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                            lr=args.lr, weight_decay=args.wd)
    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode="max", factor=0.5, patience=2, verbose=False)
    scaler = torch.cuda.amp.GradScaler(enabled=args.amp)

    best_f1 = -1.0
    for epoch in range(args.epochs):
        model.train()
        losses = []
        pbar = tqdm(dl_tr, desc=f"[{target_label}] Fold {fold} Epoch {epoch}")
        for imgs, y, _, view1h in pbar:
            imgs, y, view1h = imgs.to(device), y.to(device), view1h.to(device)
            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=args.amp):
                logits = model(imgs, view1h)  # (B,3) o (B,2)
                if args.mode == "multiclass":
                    loss = F.cross_entropy(logits, y)
                else:
                    # objetivos ordinales
                    t1 = (y >= 1).float()
                    t2 = (y == 2).float()
                    loss = criterion(logits[:,0], t1) + criterion(logits[:,1], t2)
                    loss = loss / 2.0
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            losses.append(loss.item())
            pbar.set_postfix(loss=f"{np.mean(losses):.4f}")

        f1, acc, counts = evaluate_one(model, dl_va, device, args.mode, args.thr1, args.thr2)
        print(f"Val | {target_label} | fold {fold} | F1_macro={f1:.4f} Acc={acc:.4f} | preds 0:{counts[0]} 1:{counts[1]} 2:{counts[2]}")
        sched.step(f1)

        if f1 > best_f1:
            best_f1 = f1
            save_path = os.path.join(out_dir, "best.pt")
            torch.save({"state_dict": model.state_dict(),
                        "epoch": epoch,
                        "best_f1": best_f1,
                        "mode": args.mode,
                        "thr1": args.thr1,
                        "thr2": args.thr2}, save_path)
            # print(f"  -> saved {save_path}")

    return best_f1

def train_all_labels(args):
    set_seed(args.seed)
    ensure_dir(args.out_dir)
    device = args.device #"cuda" if torch.cuda.is_available() else "cpu"

    assert {"filename","patient_id","path","view"}.issubset(df_train.columns)

    df_folds = make_folds(df_train, n_splits=args.folds)
    df_folds.to_csv(os.path.join(args.out_dir, "df_folds.csv"), index=False)

    summary = {}
    for lbl in LABELS:
        bests = []
        for fold in range(args.folds):
            bests.append(train_one_fold_one_label(fold, lbl, df_folds, args, device))
        summary[lbl] = {"bests_per_fold": bests, "offcv_mean_f1": float(np.mean(bests))}
        print(f"[{lbl}] OFF-CV F1_macro mean = {summary[lbl]['offcv_mean_f1']:.4f}")

    with open(os.path.join(args.out_dir, "offcv_summary.json"), "w") as f:
        json.dump(summary, f, indent=2)
    return summary

# =========================
# 7) Inferencia en df_test por label
#    Devuelve DataFrame con pred_*, (y opcionalmente probs/score01)
# =========================
def load_best_model_for_label(args, label_name, fold, device):
    path = os.path.join(args.out_dir, label_name, f"fold{fold}", "best.pt")
    ckpt = torch.load(path, map_location=device)
    model = Model3DResnetOne(in_channels=1, num_classes=3,
                             pretrained=False, dropout_p=args.dropout,
                             freeze_n=args.freeze_n, mode=ckpt.get("mode", args.mode)).to(device)
    model.load_state_dict(ckpt["state_dict"])
    model.eval()
    thr1 = ckpt.get("thr1", args.thr1); thr2 = ckpt.get("thr2", args.thr2)
    return model, thr1, thr2

def predict_df_test_one_label(args, df_test, label_name, fold_for_infer=0, return_probs=True):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, thr1, thr2 = load_best_model_for_label(args, label_name, fold_for_infer, device)

    ds = MRIDataset3DOneTarget(df_test, target_label=label_name, is_train=False,
                               use_aug=False, spatial_size=args.spatial_size)
    dl = DataLoader(ds, batch_size=args.batch_size, shuffle=False,
                    num_workers=args.num_workers, pin_memory=True)

    recs = []
    with torch.no_grad():
        for imgs, _, fnames, view1h in tqdm(dl, desc=f"Infer {label_name} (fold{fold_for_infer})"):
            imgs, view1h = imgs.to(device), view1h.to(device)
            logits = model(imgs, view1h)
            if model.mode == "multiclass":
                probs = F.softmax(logits, dim=-1)
                pred = probs.argmax(-1)
                # opcional "score01" heurístico (0*P0 + 0.5*P1 + 1*P2)
                score01 = (probs[:,1]*0.5 + probs[:,2]*1.0).cpu().numpy()
                for f, p, s, p0, p1, p2 in zip(fnames, pred.cpu().numpy(), score01,
                                               probs[:,0].cpu().numpy(), probs[:,1].cpu().numpy(), probs[:,2].cpu().numpy()):
                    recs.append({"filename": f,
                                 f"pred_{label_name}": int(p),
                                 f"score01_{label_name}": float(s),
                                 f"p0_{label_name}": float(p0),
                                 f"p1_{label_name}": float(p1),
                                 f"p2_{label_name}": float(p2)})
            else:
                # ordinal: logits -> p_ge1, p_eq2; score01 = (p_ge1 + p_eq2)/2
                pred, p_ge1, p_eq2 = ordinal_decode_one(logits, thr1=thr1, thr2=thr2)
                score01 = ((p_ge1 + p_eq2)/2.0).cpu().numpy()
                for f, p, s, a, b in zip(fnames, pred.cpu().numpy(), score01,
                                         p_ge1.cpu().numpy(), p_eq2.cpu().numpy()):
                    recs.append({"filename": f,
                                 f"pred_{label_name}": int(p),
                                 f"score01_{label_name}": float(s),
                                 f"p_ge1_{label_name}": float(a),
                                 f"p_eq2_{label_name}": float(b)})
    return pd.DataFrame(recs)

def predict_df_test_all_labels(args):
    #df_test = pd.read_csv(args.test_csv)
    assert {"filename","path","view"}.issubset(df_test.columns)
    dfs = []
    for lbl in LABELS:
        df_lbl = predict_df_test_one_label(args, df_test, lbl, fold_for_infer=0)
        dfs.append(df_lbl)
    # merge por filename
    out = dfs[0]
    for i in range(1, len(dfs)):
        out = out.merge(dfs[i], on="filename", how="outer")
    out_path = os.path.join(args.out_dir, "preds_test.csv")
    out.to_csv(out_path, index=False)
    print(f"[OK] guardado: {out_path}")
    return out


In [14]:
import pandas as pd
import cv2
import sys
import os

sys.path.append("../../")
from src import utils
from src.dataset3D import MRIDataset3D
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import matplotlib.pyplot as plt
utils.set_seed(42)

results_dir = '../../results/preprocessed_data/'

#labels=["Noise", "Zipper", "Positioning", "Banding", "Motion", "Contrast", "Distortion"]
LABELS = ["Noise"]#,"Zipper","Positioning","Banding","Motion","Contrast","Distortion"]

df_train = pd.read_csv(os.path.join(results_dir, 'df_train.csv'))
df_test = pd.read_csv(os.path.join(results_dir, 'df_test.csv'))
df_train["patient_id"] = df_train["filename"].str.extract(r"(LISA_\d+)")
df_test["patient_id"]  = df_test["filename"].str.extract(r"(LISA_VALIDATION_\d+)")
    
df_train.head(2)

args = Args(
    train_csv="/ruta/a/df_train.csv",
    test_csv ="/ruta/a/df_test.csv",
    out_dir  ="./runs_nb_5fold_ord_perlabel",
    folds=5, epochs=8, batch_size=2, num_workers=2,
    lr=3e-4, wd=1e-4, dropout=0.3, freeze_n=2,
    spatial_size=(40,120,120),
    use_aug=True, seed=42, amp=True,
    mode="ordinal",   # prueba ordinal primero
    thr1=0.55,        # puedes tunear estos thresholds
    thr2=0.45,
    device="cuda:5"
)

summary = train_all_labels(args)   # entrena 7 modelos × 5 folds y guarda




[Noise] Fold 0 Epoch 0: 100%|██████████| 212/212 [00:15<00:00, 13.26it/s, loss=0.3674]


Val | Noise | fold 0 | F1_macro=0.5936 Acc=0.8962 | preds 0:100 1:0 2:6


[Noise] Fold 0 Epoch 1: 100%|██████████| 212/212 [00:15<00:00, 13.43it/s, loss=0.3157]


Val | Noise | fold 0 | F1_macro=0.5618 Acc=0.8774 | preds 0:96 1:0 2:10


[Noise] Fold 0 Epoch 2: 100%|██████████| 212/212 [00:15<00:00, 13.45it/s, loss=0.2943]


Val | Noise | fold 0 | F1_macro=0.5703 Acc=0.8868 | preds 0:99 1:0 2:7


[Noise] Fold 0 Epoch 3: 100%|██████████| 212/212 [00:15<00:00, 13.66it/s, loss=0.2422]


Val | Noise | fold 0 | F1_macro=0.6170 Acc=0.8868 | preds 0:99 1:2 2:5


[Noise] Fold 0 Epoch 4: 100%|██████████| 212/212 [00:15<00:00, 13.53it/s, loss=0.3048]


Val | Noise | fold 0 | F1_macro=0.6233 Acc=0.8962 | preds 0:98 1:1 2:7


[Noise] Fold 0 Epoch 5: 100%|██████████| 212/212 [00:15<00:00, 13.51it/s, loss=0.2639]


Val | Noise | fold 0 | F1_macro=0.6770 Acc=0.8962 | preds 0:97 1:2 2:7


[Noise] Fold 0 Epoch 6: 100%|██████████| 212/212 [00:15<00:00, 13.56it/s, loss=0.2639]


Val | Noise | fold 0 | F1_macro=0.6171 Acc=0.8774 | preds 0:94 1:2 2:10


[Noise] Fold 0 Epoch 7: 100%|██████████| 212/212 [00:15<00:00, 13.49it/s, loss=0.2506]


Val | Noise | fold 0 | F1_macro=0.6491 Acc=0.9057 | preds 0:100 1:0 2:6


[Noise] Fold 1 Epoch 0: 100%|██████████| 212/212 [00:15<00:00, 13.47it/s, loss=0.3726]


Val | Noise | fold 1 | F1_macro=0.4890 Acc=0.8585 | preds 0:103 1:0 2:3


[Noise] Fold 1 Epoch 1: 100%|██████████| 212/212 [00:15<00:00, 13.41it/s, loss=0.2821]


Val | Noise | fold 1 | F1_macro=0.4906 Acc=0.8585 | preds 0:102 1:1 2:3


[Noise] Fold 1 Epoch 2: 100%|██████████| 212/212 [00:16<00:00, 13.23it/s, loss=0.2830]


Val | Noise | fold 1 | F1_macro=0.5849 Acc=0.8396 | preds 0:95 1:5 2:6


[Noise] Fold 1 Epoch 3: 100%|██████████| 212/212 [00:16<00:00, 13.04it/s, loss=0.2998]


Val | Noise | fold 1 | F1_macro=0.4576 Acc=0.7925 | preds 0:85 1:0 2:21


[Noise] Fold 1 Epoch 4: 100%|██████████| 212/212 [00:16<00:00, 13.23it/s, loss=0.3093]


Val | Noise | fold 1 | F1_macro=0.5649 Acc=0.8679 | preds 0:100 1:1 2:5


[Noise] Fold 1 Epoch 5: 100%|██████████| 212/212 [00:16<00:00, 13.05it/s, loss=0.2399]


Val | Noise | fold 1 | F1_macro=0.5120 Acc=0.8585 | preds 0:101 1:0 2:5


[Noise] Fold 1 Epoch 6: 100%|██████████| 212/212 [00:16<00:00, 13.22it/s, loss=0.2481]


Val | Noise | fold 1 | F1_macro=0.5668 Acc=0.8774 | preds 0:101 1:0 2:5


[Noise] Fold 1 Epoch 7: 100%|██████████| 212/212 [00:16<00:00, 13.11it/s, loss=0.2647]


Val | Noise | fold 1 | F1_macro=0.5975 Acc=0.8774 | preds 0:98 1:2 2:6


[Noise] Fold 2 Epoch 0: 100%|██████████| 212/212 [00:16<00:00, 12.92it/s, loss=0.3444]


Val | Noise | fold 2 | F1_macro=0.4403 Acc=0.8113 | preds 0:103 1:0 2:3


[Noise] Fold 2 Epoch 1: 100%|██████████| 212/212 [00:16<00:00, 12.98it/s, loss=0.3448]


Val | Noise | fold 2 | F1_macro=0.3793 Acc=0.7358 | preds 0:93 1:3 2:10


[Noise] Fold 2 Epoch 2: 100%|██████████| 212/212 [00:16<00:00, 13.00it/s, loss=0.2737]


Val | Noise | fold 2 | F1_macro=0.6005 Acc=0.8679 | preds 0:93 1:1 2:12


[Noise] Fold 2 Epoch 3: 100%|██████████| 212/212 [00:16<00:00, 12.96it/s, loss=0.2393]


Val | Noise | fold 2 | F1_macro=0.5530 Acc=0.7925 | preds 0:78 1:5 2:23


[Noise] Fold 2 Epoch 4: 100%|██████████| 212/212 [00:16<00:00, 13.25it/s, loss=0.2551]


Val | Noise | fold 2 | F1_macro=0.5758 Acc=0.8491 | preds 0:96 1:3 2:7


[Noise] Fold 2 Epoch 5: 100%|██████████| 212/212 [00:16<00:00, 13.18it/s, loss=0.3050]


Val | Noise | fold 2 | F1_macro=0.5668 Acc=0.8585 | preds 0:91 1:0 2:15


[Noise] Fold 2 Epoch 6: 100%|██████████| 212/212 [00:16<00:00, 13.05it/s, loss=0.2888]


Val | Noise | fold 2 | F1_macro=0.5735 Acc=0.8679 | preds 0:94 1:0 2:12


[Noise] Fold 2 Epoch 7: 100%|██████████| 212/212 [00:16<00:00, 12.80it/s, loss=0.2303]


Val | Noise | fold 2 | F1_macro=0.5837 Acc=0.8679 | preds 0:90 1:0 2:16


[Noise] Fold 3 Epoch 0: 100%|██████████| 212/212 [00:16<00:00, 12.82it/s, loss=0.3211]


Val | Noise | fold 3 | F1_macro=0.5194 Acc=0.8019 | preds 0:97 1:0 2:9


[Noise] Fold 3 Epoch 1: 100%|██████████| 212/212 [00:16<00:00, 12.72it/s, loss=0.2962]


Val | Noise | fold 3 | F1_macro=0.3810 Acc=0.7547 | preds 0:104 1:0 2:2


[Noise] Fold 3 Epoch 2: 100%|██████████| 212/212 [00:16<00:00, 12.93it/s, loss=0.2633]


Val | Noise | fold 3 | F1_macro=0.5305 Acc=0.8019 | preds 0:97 1:1 2:8


[Noise] Fold 3 Epoch 3: 100%|██████████| 212/212 [00:17<00:00, 12.28it/s, loss=0.2474]


Val | Noise | fold 3 | F1_macro=0.5614 Acc=0.8208 | preds 0:95 1:0 2:11


[Noise] Fold 3 Epoch 4: 100%|██████████| 212/212 [00:16<00:00, 13.20it/s, loss=0.2448]


Val | Noise | fold 3 | F1_macro=0.4058 Acc=0.7642 | preds 0:95 1:4 2:7


[Noise] Fold 3 Epoch 5: 100%|██████████| 212/212 [00:16<00:00, 12.71it/s, loss=0.2472]


Val | Noise | fold 3 | F1_macro=0.5490 Acc=0.8019 | preds 0:96 1:1 2:9


[Noise] Fold 3 Epoch 6: 100%|██████████| 212/212 [00:16<00:00, 12.96it/s, loss=0.2174]


Val | Noise | fold 3 | F1_macro=0.5523 Acc=0.8208 | preds 0:94 1:0 2:12


[Noise] Fold 3 Epoch 7: 100%|██████████| 212/212 [00:16<00:00, 12.99it/s, loss=0.2102]


Val | Noise | fold 3 | F1_macro=0.4933 Acc=0.7830 | preds 0:97 1:1 2:8


[Noise] Fold 4 Epoch 0: 100%|██████████| 212/212 [00:16<00:00, 13.13it/s, loss=0.3293]


Val | Noise | fold 4 | F1_macro=0.5643 Acc=0.8476 | preds 0:89 1:4 2:12


[Noise] Fold 4 Epoch 1: 100%|██████████| 212/212 [00:16<00:00, 13.03it/s, loss=0.2985]


Val | Noise | fold 4 | F1_macro=0.5646 Acc=0.8571 | preds 0:91 1:2 2:12


[Noise] Fold 4 Epoch 2: 100%|██████████| 212/212 [00:15<00:00, 13.47it/s, loss=0.2682]


Val | Noise | fold 4 | F1_macro=0.5813 Acc=0.8762 | preds 0:93 1:1 2:11


[Noise] Fold 4 Epoch 3: 100%|██████████| 212/212 [00:15<00:00, 13.36it/s, loss=0.2823]


Val | Noise | fold 4 | F1_macro=0.5443 Acc=0.8571 | preds 0:90 1:1 2:14


[Noise] Fold 4 Epoch 4: 100%|██████████| 212/212 [00:16<00:00, 12.70it/s, loss=0.2454]


Val | Noise | fold 4 | F1_macro=0.5793 Acc=0.8667 | preds 0:92 1:2 2:11


[Noise] Fold 4 Epoch 5: 100%|██████████| 212/212 [00:16<00:00, 12.90it/s, loss=0.2520]


Val | Noise | fold 4 | F1_macro=0.6001 Acc=0.8762 | preds 0:91 1:2 2:12


[Noise] Fold 4 Epoch 6: 100%|██████████| 212/212 [00:16<00:00, 12.66it/s, loss=0.2406]


Val | Noise | fold 4 | F1_macro=0.6021 Acc=0.8857 | preds 0:92 1:1 2:12


[Noise] Fold 4 Epoch 7: 100%|██████████| 212/212 [00:15<00:00, 13.31it/s, loss=0.2768]


Val | Noise | fold 4 | F1_macro=0.5909 Acc=0.8857 | preds 0:91 1:1 2:13
[Noise] OFF-CV F1_macro mean = 0.6077


In [16]:
def predict_df_test_all_labels(args, strategy="mean"):
    """
    strategy: "fold0", "best", "mean", "vote"
    """
    #df_test = pd.read_csv(args.test_csv)
    assert {"filename","path","view"}.issubset(df_test.columns)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dfs_out = []

    for lbl in LABELS:
        print(f"[Predict] Label={lbl} Strategy={strategy}")
        # Cargar folds y probs
        probs_list, preds_list = [], []
        thr1, thr2 = args.thr1, args.thr2
        for fold in range(args.folds):
            model, t1, t2 = load_best_model_for_label(args, lbl, fold, device)
            thr1, thr2 = t1, t2
            ds = MRIDataset3DOneTarget(df_test, target_label=lbl, is_train=False,
                                       use_aug=False, spatial_size=args.spatial_size)
            dl = DataLoader(ds, batch_size=args.batch_size, shuffle=False,
                            num_workers=args.num_workers, pin_memory=True)
            fold_probs, fold_preds = [], []
            with torch.no_grad():
                for imgs, _, fnames, view1h in dl:
                    imgs, view1h = imgs.to(device), view1h.to(device)
                    logits = model(imgs, view1h)
                    if model.mode == "multiclass":
                        p = F.softmax(logits, dim=-1).cpu().numpy()
                        fold_probs.append(p)
                        fold_preds.append(p.argmax(-1))
                    else:
                        pred, p_ge1, p_eq2 = ordinal_decode_one(logits, thr1, thr2)
                        # prob vector similar a multiclass: [P0, P1, P2]
                        p0 = 1 - p_ge1
                        p1 = p_ge1 - p_eq2
                        p2 = p_eq2
                        p = torch.stack([p0, p1, p2], dim=1).cpu().numpy()
                        fold_probs.append(p)
                        fold_preds.append(pred.cpu().numpy())
            probs_list.append(np.concatenate(fold_probs, axis=0))
            preds_list.append(np.concatenate(fold_preds, axis=0))

        # Estrategias
        if strategy == "fold0":
            final_probs = probs_list[0]
            final_preds = preds_list[0]
        elif strategy == "best":
            # cargar resumen de f1
            summary_path = os.path.join(args.out_dir, "offcv_summary.json")
            if not os.path.exists(summary_path):
                raise FileNotFoundError("No summary encontrado para estrategia 'best'")
            import json
            with open(summary_path) as f:
                summary = json.load(f)
            best_fold = int(np.argmax(summary[lbl]["bests_per_fold"]))
            final_probs = probs_list[best_fold]
            final_preds = preds_list[best_fold]
        elif strategy == "mean":
            final_probs = np.mean(probs_list, axis=0)
            final_preds = np.argmax(final_probs, axis=-1)
        elif strategy == "vote":
            stacked_preds = np.stack(preds_list, axis=0)  # (folds, N)
            from scipy.stats import mode
            final_preds, _ = mode(stacked_preds, axis=0, keepdims=False)
            final_probs = np.mean(probs_list, axis=0)
        else:
            raise ValueError(f"Estrategia '{strategy}' no soportada")

        # Crear DF por label
        df_lbl = pd.DataFrame({
            "filename": df_test["filename"],
            f"pred_{lbl}": final_preds,
            f"p0_{lbl}": final_probs[:,0],
            f"p1_{lbl}": final_probs[:,1],
            f"p2_{lbl}": final_probs[:,2],
            f"score01_{lbl}": final_probs[:,1]*0.5 + final_probs[:,2]
        })
        dfs_out.append(df_lbl)

    # Merge final
    df_final = dfs_out[0]
    for i in range(1, len(dfs_out)):
        df_final = df_final.merge(dfs_out[i], on="filename", how="outer")

    out_path = os.path.join(args.out_dir, f"preds_test_{strategy}.csv")
    df_final.to_csv(out_path, index=False)
    print(f"[OK] Guardado en {out_path}")
    return df_final


In [17]:
# Fold 0 solo
preds_fold0 = predict_df_test_all_labels(args, strategy="fold0")

# Mejor fold por label (según F1 en entrenamiento)
preds_best = predict_df_test_all_labels(args, strategy="best")

# Promedio de probabilidades de todos los folds (recomendado)
preds_mean = predict_df_test_all_labels(args, strategy="mean")

# Votación mayoritaria de folds
preds_vote = predict_df_test_all_labels(args, strategy="vote")


[Predict] Label=Noise Strategy=fold0




[OK] Guardado en ./runs_nb_5fold_ord_perlabel/preds_test_fold0.csv
[Predict] Label=Noise Strategy=best
[OK] Guardado en ./runs_nb_5fold_ord_perlabel/preds_test_best.csv
[Predict] Label=Noise Strategy=mean
[OK] Guardado en ./runs_nb_5fold_ord_perlabel/preds_test_mean.csv
[Predict] Label=Noise Strategy=vote
[OK] Guardado en ./runs_nb_5fold_ord_perlabel/preds_test_vote.csv
