In [1]:
import os
import glob
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from utils_functions.sort_files import alphanumeric_sort


# ---------------------------
# 0. Réglage aléatoire
# ---------------------------
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
# ---------------------------
# Chemins des données
# ---------------------------
#PATH = "./data/"
#train_images = sorted(glob.glob(os.path.join(PATH, "train-images", "*.png")))

# MONAI imports corrigés
# MONAI imports corrigés
from monai.transforms import (
    Compose,
    EnsureChannelFirstd,
    ScaleIntensityd,
    Resized,
    ToTensord,
)
from monai.networks.nets import UNet
from monai.losses import DiceCELoss
#from monai.metrics import compute_meandice
from monai.utils import set_determinism

# ---------------------------
# 0. Réglage aléatoire
# ---------------------------
set_determinism(seed=42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ---------------------------
# Chemins des données
# ---------------------------
PATH = "./data/"
train_images = sorted(glob.glob(os.path.join(PATH, "train-images", "*.png")),key=alphanumeric_sort)
mask_csv = os.path.join(PATH, "y_train.csv")

In [3]:
import warnings
from typing import Union

import torch

from monai.metrics.utils import do_metric_reduction, ignore_background
from monai.utils import MetricReduction


class DiceMetric:
    """
    Compute average Dice loss between two tensors. It can support both multi-classes and multi-labels tasks.
    Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
    `y_preds` is expected to have binarized predictions and `y` should be in one-hot format. You can use suitable transforms
    in ``monai.transforms.post`` first to achieve binarized values.
    The `include_background` parameter can be set to ``False`` for an instance of DiceLoss to exclude
    the first category (channel index 0) which is by convention assumed to be background. If the non-background
    segmentations are small compared to the total image size they can get overwhelmed by the signal from the
    background so excluding it in such cases helps convergence.

    Args:
        include_background: whether to skip Dice computation on the first channel of
            the predicted output. Defaults to ``True``.
        reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
            ``"mean_channel"``, ``"sum_channel"``}
            Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``.

    """

    def __init__(
        self,
        include_background: bool = True,
        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
    ) -> None:
        super().__init__()
        self.include_background = include_background
        self.reduction = reduction

    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
        """
        Args:
            y_pred: input data to compute, typical segmentation model output.
                It must be one-hot format and first dim is batch, example shape: [16, 3, 32, 32]. The values
                should be binarized.
            y: ground truth to compute mean dice metric. It must be one-hot format and first dim is batch.
                The values should be binarized.

        Raises:
            ValueError: when `y` is not a binarized tensor.
            ValueError: when `y_pred` has less than three dimensions.
        """
        if not torch.all(y_pred.byte() == y_pred):
            warnings.warn("y_pred is not a binarized tensor here!")
        if not torch.all(y.byte() == y):
            raise ValueError("y should be a binarized tensor.")
        dims = y_pred.ndimension()
        if dims < 3:
            raise ValueError("y_pred should have at least three dimensions.")
        # compute dice (BxC) for each channel for each batch
        f = compute_meandice(
            y_pred=y_pred,
            y=y,
            include_background=self.include_background,
        )

        # do metric reduction
        f, not_nans = do_metric_reduction(f, self.reduction)
        return f, not_nans



def compute_meandice(
    y_pred: torch.Tensor,
    y: torch.Tensor,
    include_background: bool = True,
) -> torch.Tensor:
    """Computes Dice score metric from full size Tensor and collects average.

    Args:
        y_pred: input data to compute, typical segmentation model output.
            It must be one-hot format and first dim is batch, example shape: [16, 3, 32, 32]. The values
            should be binarized.
        y: ground truth to compute mean dice metric. It must be one-hot format and first dim is batch.
            The values should be binarized.
        include_background: whether to skip Dice computation on the first channel of
            the predicted output. Defaults to True.

    Returns:
        Dice scores per batch and per class, (shape [batch_size, n_classes]).

    Raises:
        ValueError: when `y_pred` and `y` have different shapes.

    """

    if not include_background:
        y_pred, y = ignore_background(
            y_pred=y_pred,
            y=y,
        )

    y = y.float()
    y_pred = y_pred.float()

    if y.shape != y_pred.shape:
        raise ValueError("y_pred and y should have same shapes.")

    # reducing only spatial dimensions (not batch nor channels)
    n_len = len(y_pred.shape)
    reduce_axis = list(range(2, n_len))
    intersection = torch.sum(y * y_pred, dim=reduce_axis)

    y_o = torch.sum(y, reduce_axis)
    y_pred_o = torch.sum(y_pred, dim=reduce_axis)
    denominator = y_o + y_pred_o

    f = torch.where(y_o > 0, (2.0 * intersection) / denominator, torch.tensor(float("nan"), device=y_o.device))
    return f  # returns array of Dice with shape: [batch, n_classes]


In [4]:
# Charger tous les masques en mémoire
masks = pd.read_csv(mask_csv, index_col=0).T.values.reshape(-1, 256, 256).astype(np.int64)

# ---------------------------
# 1. Dataset MONAI
# ---------------------------
class CTScanDataset(Dataset):
    def __init__(self, image_paths, masks, transform=None):
        self.image_paths = image_paths
        self.masks = masks
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("L")
        img = np.array(img, dtype=np.float32)
        mask = self.masks[idx].astype(np.int64)

        sample = {"image": img, "mask": mask}
        if self.transform:
            sample = self.transform(sample)
        return sample

# Transforms MONAI dict-based
train_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"]),
    ScaleIntensityd(keys=["image"]),               # normaliser l'intensité de l'image
    Resized(keys=["image", "mask"], spatial_size=[256, 256]),
    ToTensord(keys=["image", "mask"]),           # convertir en tenseurs
])

# Créer dataset et dataloader
train_ds = CTScanDataset(train_images, masks, transform=train_transforms)
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=4)


In [5]:

# ---------------------------
# 2. Modèle MONAI UNet
# ---------------------------
model = UNet(
    spatial_dims=2,
    in_channels=1,
    out_channels=55,
    channels=(32, 64, 128, 256, 512),
    strides=(2, 2, 2, 2),
    num_res_units=2,
).to(device)

# ---------------------------
# 3. Perte et métrique
# ---------------------------
loss_fn = DiceCELoss(to_onehot_y=True, softmax=True)
metric_fn = compute_meandice

# ---------------------------
# 4. Entraînement
# ---------------------------
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 2

In [6]:
from tqdm import tqdm

In [None]:


for epoch in range(1, num_epochs+1):
    model.train()
    epoch_loss = 0

    for batch in tqdm(train_loader):
        imgs = batch['image'].to(device)          # (B,1,256,256)
        labels = batch['mask'].to(device)         # (B,1,256,256)

        optimizer.zero_grad()
        outputs = model(imgs)                     # (B,55,256,256)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)

    # calcul de Mean Dice sur un mini-batch pour suivi rapide
    model.eval()
    with torch.no_grad():
        sample = next(iter(train_loader))
        imgs = sample['image'].to(device)
        labels = sample['mask'].to(device)
        preds = model(imgs)
        mean_dice = metric_fn(
            y_pred=preds, y=labels,
            include_background=True,
            to_onehot_y=True,
            softmax=True
        ).mean().item()

    print(f"Epoch {epoch}/{num_epochs} — Loss: {avg_loss:.4f} — Mean Dice: {mean_dice:.4f}")

# Sauvegarde du modèle
torch.save(model.state_dict(), "unet_monai.pth")

  0%|                                                                                          | 0/250 [00:00<?, ?it/s]

In [None]:
import os
import glob
import numpy as np
import pandas as pd
from PIL import Image



# ---------------------------
# Chemins des données
# ---------------------------
PATH = "./data/"
train_path = os.path.join(PATH, "train-images/")
mask_csv = os.path.join(PATH, "y_train.csv")

# ---------------------------
# 1. Dataset Definition
# ---------------------------
class HFCTScanDataset(Dataset):
    def __init__(self, image_dir, mask_csv, image_processor):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, "*.png")),key=alphanumeric_sort)
        # Charger masques transposés
        masks = pd.read_csv(mask_csv, index_col=0).T.values
        self.masks = masks.reshape(-1, 256, 256).astype(np.int64)
        self.processor = image_processor

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # ouvrir image grayscale -> convertir en 3 canaux
        img = Image.open(self.image_paths[idx]).convert("L")
        img = np.stack([np.array(img)]*3, axis=-1)
        mask = self.masks[idx]
        # préparer pixel_values
        inputs = self.processor(images=img, return_tensors="pt")
        pixel_values = inputs.pixel_values.squeeze(0)  # (3, H, W)
        # HuggingFace attend (H, W) labels
        label = torch.from_numpy(mask)
        return {"pixel_values": pixel_values, "labels": label}

# ---------------------------
# 2. Feature Extractor & Model
# ---------------------------
checkpoint = "nvidia/segformer-b0-finetuned-ade-512-512"
image_processor = AutoImageProcessor.from_pretrained(checkpoint,from_tf=True)
model = SegformerForSemanticSegmentation.from_pretrained(
    checkpoint,
    num_labels=55,
    ignore_mismatched_sizes=True,
    from_tf=True
    
)

# ---------------------------
# 3. Metrics (pixel accuracy)
# ---------------------------
import numpy as np
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    # calcul d'accuracy pixel-wise
    valid = labels >= 0
    acc = (preds[valid] == labels[valid]).sum() / valid.sum()
    return {"pixel_accuracy": acc}

# ---------------------------
# 4. Entraînement
# ---------------------------






In [2]:
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Using cache found in C:\Users\rmondelice/.cache\torch\hub\facebookresearch_dinov2_main
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to C:\Users\rmondelice/.cache\torch\hub\checkpoints\dinov2_vits14_pretrain.pth
100%|██████████████████████████████████████████████████████████████████████████████| 84.2M/84.2M [05:55<00:00, 249kB/s]


In [1]:
from utils_functions.sort_files import alphanumeric_sort

In [2]:
import os
import glob
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Charger DINOv2 depuis torch.hub
dinov2_model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14', pretrained=True)
dinov2_model.eval()

# ---------------------------
# Chemins des données
# ---------------------------
PATH = "./data/"
train_images = sorted(glob.glob(os.path.join(PATH, "train-images", "*.png")),key=alphanumeric_sort)
mask_csv = os.path.join(PATH, "y_train.csv")

# Charger tous les masques en mémoire
masks = pd.read_csv(mask_csv, index_col=0).T.values.reshape(-1, 256, 256).astype(np.int64)

# ---------------------------
# 1. Dataset
# ---------------------------
class CTScanDataset(Dataset):
    def __init__(self, image_paths, masks, transform=None):
        self.image_paths = image_paths
        self.masks = masks
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("L")
        img = np.array(img, dtype=np.float32) / 255.0
        if self.transform:
            img = self.transform(img)
        # dupliquer en 3 canaux pour DINO
        img3 = np.stack([img]*3, axis=0)
        mask = self.masks[idx]
        return torch.from_numpy(img3), torch.from_numpy(mask)

# Transforms: resize et normalization
transform = transforms.Compose([
    transforms.ToTensor(),          # convertit en [C,H,W]
    transforms.Resize((224,224)),   # DINOv2 attend 224x224
    transforms.Normalize(mean=0.5, std=0.5)
])

train_ds = CTScanDataset(train_images, masks, transform=transform)
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=4)

# ---------------------------
# 2. Segmentation model
# ---------------------------
class Dinov2Seg(nn.Module):
    def __init__(self, backbone, num_classes=55):
        super().__init__()
        self.backbone = backbone
        # Freeze backbone
        for p in self.backbone.parameters(): p.requires_grad = False
        # conv to reduce embed dim to num_classes
        embed_dim = self.backbone.embed_dim  # 384 pour vits14
        self.conv1 = nn.Conv2d(embed_dim, 256, kernel_size=1)
        self.up1 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.up2 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.up3 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.out_conv = nn.Conv2d(32, num_classes, kernel_size=1)

    def forward(self, x):
        # x: (B,3,224,224)
        # obtenir patches embeddings
        feats = self.backbone.forward_features(x)  # (B, P+1, D)
        feats = feats[:,1:,:]                      # retirer token CLS
        B, P, D = feats.shape
        h = w = int(np.sqrt(P))
        feats = feats.permute(0,2,1).contiguous().view(B, D, h, w)  # (B,D,h,w)
        # upsample aux dimensions d'origine 256
        y = self.conv1(feats)
        y = self.up1(y)   # x2
        y = self.up2(y)   # x4
        y = self.up3(y)   # x8
        # feats 224->28 patches, h=w=14 -> after up1:28, up2:56, up3:112
        # on resize final à 256
        y = self.out_conv(y)
        y = nn.functional.interpolate(y, size=(256,256), mode='bilinear', align_corners=False)
        return y  # (B, num_classes, 256,256)

# Instanciation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Dinov2Seg(dinov2_model).to(device)


Using cache found in C:\Users\rmondelice/.cache\torch\hub\facebookresearch_dinov2_main


In [None]:
from tqdm import tqdm
# ---------------------------
# 3. Training
# ---------------------------
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-2)

num_epochs = 10
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0
    for imgs, masks in tqdm(train_loader):
        imgs = imgs.to(device)
        masks = masks.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss = criterion(logits, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    print(f"Epoch {epoch}/{num_epochs} - Loss: {total_loss/len(train_loader.dataset):.4f}")

# Sauvegarde
torch.save(model.state_dict(), "dinov2_seg.pt")


  0%|                                                                                          | 0/250 [00:00<?, ?it/s]