In [None]:
from monai.data import PersistentDataset, DataLoader
import pandas as pd
import numpy as np
from pathlib import Path
import monai.transforms as T
import torch
# Using MONAI for medical imaging tasks
# ---- Config ----
csv_path = Path("/home/tibia/Projet_Hemorragie/Seg_hemorragie/Classification_RSNA/data/csv/train_fold0.csv")
dicom_dir = Path("/home/tibia/Projet_Hemorragie/Seg_hemorragie/Classification_RSNA/data/rsna-intracranial-hemorrhage-detection/stage_2_train")
cache_dir = Path("./persistent_cache/fold0")  

label_cols = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
df = pd.read_csv(csv_path)

# ---- Build MONAI-style data list ----
data_list = [
    {
        "image": str(dicom_dir / row['filename']),
        "label": np.array([row[col] for col in label_cols], dtype=np.float32)
    }
    for _, row in df.iterrows()
]

#print (data_list[:5] ) # Print first 5 entries for debugging
# ---- Transforms ----
window_preset = {"window_center": 40, "window_width": 80}
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, ResizeD, ScaleIntensityRanged, ToTensord
)

window_preset = {"window_center": 40, "window_width": 80}

train_transforms = Compose([
    LoadImaged(keys=["image"]),
    ScaleIntensityRanged(
        keys=["image"],
        a_min=window_preset["window_center"] - window_preset["window_width"] // 2,
        a_max=window_preset["window_center"] + window_preset["window_width"] // 2,
        b_min=0.0,
        b_max=1.0,
        clip=True
    ),
    EnsureChannelFirstd(keys=["image"]),
    ResizeD(keys=["image"], spatial_size=(224, 224)),
    ToTensord(keys=["image", "label"])  
])


# ---- PersistentDataset ----
train_dataset = PersistentDataset(
    data=data_list,
    transform=train_transforms,
    cache_dir=str(cache_dir),
)



print(f" Dataset ready with {len(train_dataset)} samples and cached transforms at {cache_dir}")


KeyboardInterrupt: 

In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from monai.networks.nets import densenet121
from monai.transforms import Compose, Resize, ToTensor
from monai.data import DataLoader, PersistentDataset, Dataset
from tqdm import tqdm
import os
# === Hyperparams ===

NUM_CLASSES = 6
BATCH_SIZE = 32
EPOCHS = 3
LR = 1e-3

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8,persistent_workers= True,pin_memory=True)
print(f"Using device: {DEVICE}")
print(f"Number of Batches in the dataset: {len(train_loader)}")

# === Model ===
model = densenet121(spatial_dims=2, in_channels=1, out_channels=NUM_CLASSES)
model.to(DEVICE)
print (model)
# === Loss (Inspiré Al )

pos_weights = torch.tensor([1.0] * NUM_CLASSES, dtype=torch.float).to(DEVICE)
print(f"répartition des poids : {pos_weights}")
loss_fn= nn.BCEWithLogitsLoss(pos_weight=pos_weights)

# === Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LR)



Using device: cuda
Number of Batches in the dataset: 16859
DenseNet121(
  (features): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (layers): Sequential(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
      )
      (denselayer2): _DenseLay

In [5]:
from timeit import default_timer as timer
def print_train_time(start:float,end:float,device:torch.device=None):
    total_time=end-start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

def compute_accuracy(y_pred, y_true, threshold=0.5):
    """
    Renvoie l'accuracy multilabel (exact match pour chaque label indépendamment).
    """
    preds = torch.sigmoid(y_pred) > threshold
    #print(f"preds: {preds}")
    correct = (preds == y_true.bool()).float()
    #print(f"correct: {correct}")
    return correct.mean().item()



In [12]:
def train_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                optimizer:torch.optim,
                compute_accuracy,
                device:torch.device=DEVICE):
   
    train_loss,train_acc=0,0


    model.train()

    for i, batch in enumerate(dataloader):
      X = batch["image"].to(device)
      y = batch["label"].to(device)
    
      #1. forward pass (output the raw logits from the model )
      y_pred=model(X)

      #2. Calculate loss and accuracy (per batch)
      loss=loss_fn(y_pred,y)
      train_loss += loss
      train_acc += compute_accuracy(y_pred=y_pred,y_true=y) # go from logits -> prediction labels

      #3. Optimizer zero grad 
      optimizer.zero_grad()

      #4. Loss backward
      loss.backward()

      #5. Optimizer step
      
      optimizer.step()
      #Divide total train loss and acc by lenght of train dataloader
    
      if (i% 400 == 0):
        print(f"Looked at {i * len(X)}/{len(dataloader.dataset)} samples")
      
    train_loss /= len(dataloader)
    train_acc /= len(dataloader)

    print(f"Train loss: {train_loss:.5f} | Train acc: {train_acc:.4f}")


def val_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                compute_accuracy,
                device:torch.device=DEVICE):
    """Performs a testing loop step on model going over data_loader"""
   
    test_loss,test_acc=0,0


    model.eval()

    with torch.inference_mode():
      for batch,(X_test,y_test) in enumerate(dataloader):
     
        X_test,y_test=X_test.to(device),y_test.to(device)

      #1. forward pass (output the raw logits from the model )
        test_pred=model(X_test)

      #2. Calculate loss and accuracy (per batch)
        loss=loss_fn(test_pred,y_test)
        test_loss += loss
        test_acc += compute_accuracy(y_true=y_test,y_pred=test_pred) # go from logits -> prediction labels
     
        if (batch % 400 == 0):
          print(f"Looked at {batch * len(X_test)}/{len(dataloader.dataset)} samples")
    #Divide total test loss and acc by lenght of test dataloader
      test_loss /= len(dataloader)
      test_acc /= len(dataloader)

      print(f"Test loss: {test_loss:.5f} | Test acc: {test_acc:.4f}")

In [None]:
# # === Training Loop ===
# from tqdm import tqdm

torch.manual_seed(42)
torch.cuda.manual_seed(42)

start_time = timer()

for epoch in tqdm(range(EPOCHS)):
    print(f"Epoch: {epoch}\n-------")
    train_step(model=model,
               dataloader=train_loader,
               loss_fn=loss_fn,
               optimizer=optimizer,
               compute_accuracy=compute_accuracy,
               device=DEVICE)
    

end_time = timer()
total_train_time = print_train_time(start_time, end_time, DEVICE)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
-------
Looked at 0/539469 samples
Looked at 12800/539469 samples
Looked at 25600/539469 samples
Looked at 38400/539469 samples


  0%|          | 0/3 [06:24<?, ?it/s]


KeyboardInterrupt: 

: 