In [None]:
from monai.data import PersistentDataset, DataLoader
import pandas as pd
import numpy as np
from pathlib import Path
import monai.transforms as T
import torch
# Using MONAI for medical imaging tasks
# ---- Config ----
csv_path = Path("/home/tibia/Projet_Hemorragie/MBH_label_case/splits/train_split.csv")
nii_dir = Path("/home/tibia/Projet_Hemorragie/MBH_label_case")
cache_dir = Path("./persistent_cache/3D_train_cache")  
# Ensure cache directory exists
cache_dir.mkdir(parents=True, exist_ok=True)

label_cols = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
df = pd.read_csv(csv_path)

# ---- Build MONAI-style data list ----
data_list = [
    {
        "image": str(nii_dir / f"{row['patientID_studyID']}.nii.gz"),
        "label": np.array([row[col] for col in label_cols], dtype=np.float32)
    }
    for _, row in df.iterrows()
]

#print (data_list[:5] ) # Print first 5 entries for debugging
# ---- Transforms ----
window_preset = {"window_center": 40, "window_width": 80}


window_preset = {"window_center": 40, "window_width": 80}

train_transforms = T.Compose([
    # Load image only
    T.LoadImaged(keys=["image"], image_only=True),  
    T.EnsureChannelFirstd(keys=["image"]),
    
    # Harmonisation spatiale
    T.Orientationd(keys=["image"], axcodes='RAS'),  # standard orientation
    T.Spacingd(keys=["image"], pixdim=(1.0, 1.0, 1.0), mode="bilinear"),  # isometric voxels
    
    # Padding/cropping
   
    T.ResizeD(keys=["image"], spatial_size=(224, 224, -1 )), 
    T.SpatialPadd(keys=["image"], spatial_size=(128, 128, 64)) ,# Padding to ensure consistent size : on garde toujours tous les slices mais on ajoute du vide

    # Intensity
    T.ScaleIntensityRanged(
        keys=["image"],
        a_min=window_preset["window_center"] - window_preset["window_width"] // 2,
        a_max=window_preset["window_center"] + window_preset["window_width"] // 2,
        b_min=0.0,
        b_max=1.0,
        clip=True
    ),

    # Augmentations
    T.RandFlipd(keys=["image"], spatial_axis=[0, 1, 2], prob=0.5),
    T.RandRotate90d(keys=["image"], spatial_axes=(0, 1), prob=0.5),
    T.RandScaleIntensityd(keys=["image"], factors=0.1, prob=0.5),
    T.RandShiftIntensityd(keys=["image"], offsets=0.1, prob=0.5),

    # Final tensor
    T.ToTensord(keys=["image", "label"])
])

for data in data_list:
    try:
        sample = train_transforms(data)
    except Exception as e:
        print(f"❌ Error loading: {data['image']}")
        raise e
# ---- PersistentDataset ----
train_dataset = PersistentDataset(
    data=data_list,
    transform=train_transforms,
    cache_dir=str(cache_dir),
)



print(f" Dataset ready with {len(train_dataset)} samples and cached transforms at {cache_dir}")



In [20]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [21]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from monai.networks.nets import densenet121,SEResNet50,ResNet
from monai.transforms import Compose, Resize, ToTensor
from monai.data import DataLoader, PersistentDataset, Dataset
from tqdm import tqdm
import os
# === Hyperparams ===

NUM_CLASSES = 6
BATCH_SIZE = 32
EPOCHS = 3
LR = 1e-3

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8,persistent_workers= True,pin_memory=True)
print(f"Using device: {DEVICE}")
print(f"Number of Batches in the dataset: {len(train_loader)}")

# === Model ===
model = ResNet(
    block='basic',           # BasicBlock for ResNet18/34
    layers=[2, 2, 2, 2],    # ResNet18 architecture
    block_inplanes=[64, 128, 256, 512],
    spatial_dims=3,
    n_input_channels=1,     # Your grayscale input
    num_classes=NUM_CLASSES,
    conv1_t_size=7,
    conv1_t_stride=2
)
# model = SEResNet50(
#     spatial_dims=2,         # Because you work with 2D CT slices
#     in_channels=1,          # 1 channel for grayscale CT (unless you use 3-slice input, see tip below)
#     num_classes= NUM_CLASSES,          # Set this to number of hemorrhage types you want to classify
#     pretrained=False,       # Can be True if input has 3 channels and you're okay fine-tuning from ImageNet
#     dropout_prob=0.2,       # Helps regularize on smaller datasets
#     reduction=16,           # Default for Squeeze-and-Excitation; works well
#     input_3x3=True,         # Enables better local feature extraction at first layer
#     downsample_kernel_size=3  # Slightly better spatial feature preservation
#)
#model = densenet121(spatial_dims=2, in_channels=1, out_channels=NUM_CLASSES)
model.to(DEVICE)
#print (model)
# === Loss (Inspiré Al )

pos_weights = torch.tensor([1.0] * NUM_CLASSES, dtype=torch.float).to(DEVICE)
print(f"répartition des poids : {pos_weights}")
loss_fn= nn.BCEWithLogitsLoss(pos_weight=pos_weights)

# === Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LR)



Using device: cuda
Number of Batches in the dataset: 40
répartition des poids : tensor([1., 1., 1., 1., 1., 1.], device='cuda:0')


In [22]:
try : 
    ! pip install torchinfo

except:
    print("torchinfo is already installed or installation failed.")
    
import torchinfo
from torchinfo import summary
# Display model summary
model = model.to(DEVICE)
summary(model, input_size=(BATCH_SIZE, 1, 224, 224,64))


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 6]                   --
├─Conv3d: 1-1                            [32, 64, 112, 112, 32]    21,952
├─BatchNorm3d: 1-2                       [32, 64, 112, 112, 32]    128
├─ReLU: 1-3                              [32, 64, 112, 112, 32]    --
├─MaxPool3d: 1-4                         [32, 64, 56, 56, 16]      --
├─Sequential: 1-5                        [32, 64, 56, 56, 16]      --
│    └─ResNetBlock: 2-1                  [32, 64, 56, 56, 16]      --
│    │    └─Conv3d: 3-1                  [32, 64, 56, 56, 16]      110,592
│    │    └─BatchNorm3d: 3-2             [32, 64, 56, 56, 16]      128
│    │    └─ReLU: 3-3                    [32, 64, 56, 56, 16]      --
│    │    └─Conv3d: 3-4                  [32, 64, 56, 56, 16]      110,592
│    │    └─BatchNorm3d: 3-5             [32, 64, 56, 56, 16]      128
│    │    └─ReLU: 3-6                    [32, 64, 56, 56, 16]      -

In [14]:
from timeit import default_timer as timer
def print_train_time(start:float,end:float,device:torch.device=None):
    total_time=end-start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

def compute_accuracy(y_pred, y_true, threshold=0.5):
    """
    Renvoie l'accuracy multilabel (exact match pour chaque label indépendamment).
    """
    preds = torch.sigmoid(y_pred) > threshold
    #print(f"preds: {preds}")
    correct = (preds == y_true.bool()).float()
    #print(f"correct: {correct}")
    return correct.mean().item()



In [24]:
def train_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                optimizer:torch.optim,
                device:torch.device=DEVICE):
   
    train_loss=0


    model.train()

    for i, batch in enumerate(dataloader):
      X = batch["image"].to(device)
      y = batch["label"].to(device)
    
      #1. forward pass (output the raw logits from the model )
      y_pred=model(X)

      #2. Calculate loss and accuracy (per batch)
      loss=loss_fn(y_pred,y)
      train_loss += loss


      #3. Optimizer zero grad 
      optimizer.zero_grad()

      #4. Loss backward
      loss.backward()

      #5. Optimizer step
      
      optimizer.step()
      #Divide total train loss and acc by lenght of train dataloader
    
      if (i% 400 == 0):
        print(f"Looked at {i * len(X)}/{len(dataloader.dataset)} samples")
      
    train_loss /= len(dataloader)
   

    print(f"Train loss: {train_loss:.5f} ")


def val_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                device:torch.device=DEVICE):
    """Performs a testing loop step on model going over data_loader"""
   
    test_loss=0


    model.eval()

    with torch.inference_mode():
      for batch,(X_test,y_test) in enumerate(dataloader):
     
        X_test,y_test=X_test.to(device),y_test.to(device)

      #1. forward pass (output the raw logits from the model )
        test_pred=model(X_test)

      #2. Calculate loss and accuracy (per batch)
        loss=loss_fn(test_pred,y_test)
        test_loss += loss
        test_acc += compute_accuracy(y_true=y_test,y_pred=test_pred) # go from logits -> prediction labels
     
        if (batch % 400 == 0):
          print(f"Looked at {batch * len(X_test)}/{len(dataloader.dataset)} samples")
    #Divide total test loss and acc by lenght of test dataloader
      test_loss /= len(dataloader)
   

      print(f"Test loss: {test_loss:.5f} ")

In [None]:
# # === Training Loop ===
# from tqdm import tqdm

torch.manual_seed(42)
torch.cuda.manual_seed(42)

start_time = timer()

for epoch in tqdm(range(EPOCHS)):
    print(f"Epoch: {epoch}\n-------")
    train_step(model=model,
               dataloader=train_loader,
               loss_fn=loss_fn,
               optimizer=optimizer,
     
               device=DEVICE)
    

end_time = timer()
total_train_time = print_train_time(start_time, end_time, DEVICE)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
-------





RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/transform.py", line 141, in apply_transform
    return _apply_transform(transform, data, unpack_items, lazy, overrides, log_stats)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/transform.py", line 98, in _apply_transform
    return transform(data, lazy=lazy) if isinstance(transform, LazyTrait) else transform(data)
                                                                               ^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/io/dictionary.py", line 163, in __call__
    data = self._loader(d[key], reader)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/io/array.py", line 264, in __call__
    img = reader.read(filename)
          ^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/data/image_reader.py", line 491, in read
    ds = pydicom.dcmread(fp=name, **kwargs_)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/pydicom/filereader.py", line 1042, in dcmread
    fp = open(fp, "rb")
         ^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/home/tibia/Projet_Hemorragie/MBH_label_case/ID_4e069b3d_ID_0320ad6a17'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/data/dataset.py", line 108, in __getitem__
    return self._transform(index)
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/data/dataset.py", line 412, in _transform
    pre_random_item = self._cachecheck(self.data[index])
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/data/dataset.py", line 385, in _cachecheck
    _item_transformed = self._pre_transform(deepcopy(item_transformed))  # keep the original hashed
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/data/dataset.py", line 323, in _pre_transform
    item_transformed = self.transform(item_transformed, end=first_random, threading=True)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/compose.py", line 335, in __call__
    result = execute_compose(
             ^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/compose.py", line 111, in execute_compose
    data = apply_transform(
           ^^^^^^^^^^^^^^^^
  File "/home/tibia/Projet_Hemorragie/hemorragie-env/lib/python3.12/site-packages/monai/transforms/transform.py", line 171, in apply_transform
    raise RuntimeError(f"applying transform {transform}") from e
RuntimeError: applying transform <monai.transforms.io.dictionary.LoadImaged object at 0x7fbaea698050>
