In [2]:
from monai.data import PersistentDataset, DataLoader
import pandas as pd
import numpy as np
from pathlib import Path
import monai.transforms as T
import torch
# Using MONAI for medical imaging tasks
# ---- Config ----
csv_path = Path("/home/tibia/Projet_Hemorragie/Seg_hemorragie/Classification_RSNA/data/csv/train_fold0.csv")
dicom_dir = Path("/home/tibia/Projet_Hemorragie/Seg_hemorragie/Classification_RSNA/data/rsna-intracranial-hemorrhage-detection/stage_2_train")
cache_dir = Path("./persistent_cache/fold0")  

label_cols = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
df = pd.read_csv(csv_path)

# ---- Build MONAI-style data list ----
data_list = [
    {
        "image": str(dicom_dir / row['filename']),
        "label": np.array([row[col] for col in label_cols], dtype=np.float32)
    }
    for _, row in df.iterrows()
]

#print (data_list[:5] ) # Print first 5 entries for debugging
# ---- Transforms ----
window_preset = {"window_center": 40, "window_width": 80}


window_preset = {"window_center": 40, "window_width": 80}

train_transforms = T.Compose([
    T.LoadImaged(keys=["image"],image_only=True ),
    T.ScaleIntensityRanged(
        keys=["image"],
        a_min=window_preset["window_center"] - window_preset["window_width"] // 2,
        a_max=window_preset["window_center"] + window_preset["window_width"] // 2,
        b_min=0.0,
        b_max=1.0,
        clip=True
    ),
    T.EnsureChannelFirstd(keys=["image"]),
    T.ResizeD(keys=["image"], spatial_size=(224, 224)),
    T.ToTensord(keys=["image", "label"])  
])


# ---- PersistentDataset ----
train_dataset = PersistentDataset(
    data=data_list,
    transform=train_transforms,
    cache_dir=str(cache_dir),
)



print(f" Dataset ready with {len(train_dataset)} samples and cached transforms at {cache_dir}")


  from .autonotebook import tqdm as notebook_tqdm


 Dataset ready with 539469 samples and cached transforms at persistent_cache/fold0


In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from monai.networks.nets import densenet121,SEResNet50,ResNet
from monai.transforms import Compose, Resize, ToTensor
from monai.data import DataLoader, PersistentDataset, Dataset
from tqdm import tqdm
import os
# === Hyperparams ===

NUM_CLASSES = 6
BATCH_SIZE = 32
EPOCHS = 3
LR = 1e-3

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8,persistent_workers= True,pin_memory=True)
print(f"Using device: {DEVICE}")
print(f"Number of Batches in the dataset: {len(train_loader)}")

# === Model ===
model = ResNet(
    block='basic',           # BasicBlock for ResNet18/34
    layers=[2, 2, 2, 2],    # ResNet18 architecture
    block_inplanes=[64, 128, 256, 512],
    spatial_dims=2,
    n_input_channels=1,     # Your grayscale input
    num_classes=NUM_CLASSES,
    conv1_t_size=7,
    conv1_t_stride=2
)
# model = SEResNet50(
#     spatial_dims=2,         # Because you work with 2D CT slices
#     in_channels=1,          # 1 channel for grayscale CT (unless you use 3-slice input, see tip below)
#     num_classes= NUM_CLASSES,          # Set this to number of hemorrhage types you want to classify
#     pretrained=False,       # Can be True if input has 3 channels and you're okay fine-tuning from ImageNet
#     dropout_prob=0.2,       # Helps regularize on smaller datasets
#     reduction=16,           # Default for Squeeze-and-Excitation; works well
#     input_3x3=True,         # Enables better local feature extraction at first layer
#     downsample_kernel_size=3  # Slightly better spatial feature preservation
#)
#model = densenet121(spatial_dims=2, in_channels=1, out_channels=NUM_CLASSES)
model.to(DEVICE)
#print (model)
# === Loss (Inspiré Al )

pos_weights = torch.tensor([1.0] * NUM_CLASSES, dtype=torch.float).to(DEVICE)
print(f"répartition des poids : {pos_weights}")
loss_fn= nn.BCEWithLogitsLoss(pos_weight=pos_weights)

# === Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LR)



Using device: cuda
Number of Batches in the dataset: 16859


répartition des poids : tensor([1., 1., 1., 1., 1., 1.], device='cuda:0')


In [5]:
try : 
    ! pip install torchinfo

except:
    print("torchinfo is already installed or installation failed.")
    
import torchinfo
from torchinfo import summary
# Display model summary
model = model.to(DEVICE)
summary(model, input_size=(BATCH_SIZE, 1, 224, 224))


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 6]                   --
├─Conv2d: 1-1                            [32, 64, 112, 112]        3,136
├─BatchNorm2d: 1-2                       [32, 64, 112, 112]        128
├─ReLU: 1-3                              [32, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [32, 64, 56, 56]          --
├─Sequential: 1-5                        [32, 64, 56, 56]          --
│    └─ResNetBlock: 2-1                  [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-1                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-2             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-3                    [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-4                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-5             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-6                    [32, 64, 56, 56]          --
│

In [6]:
from timeit import default_timer as timer
def print_train_time(start:float,end:float,device:torch.device=None):
    total_time=end-start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

def compute_accuracy(y_pred, y_true, threshold=0.5):
    """
    Renvoie l'accuracy multilabel (exact match pour chaque label indépendamment).
    """
    preds = torch.sigmoid(y_pred) > threshold
    #print(f"preds: {preds}")
    correct = (preds == y_true.bool()).float()
    #print(f"correct: {correct}")
    return correct.mean().item()



In [7]:
def train_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                optimizer:torch.optim,
                compute_accuracy,
                device:torch.device=DEVICE):
   
    train_loss,train_acc=0,0


    model.train()

    for i, batch in enumerate(dataloader):
      X = batch["image"].to(device)
      y = batch["label"].to(device)
    
      #1. forward pass (output the raw logits from the model )
      y_pred=model(X)

      #2. Calculate loss and accuracy (per batch)
      loss=loss_fn(y_pred,y)
      train_loss += loss
      train_acc += compute_accuracy(y_pred=y_pred,y_true=y) # go from logits -> prediction labels

      #3. Optimizer zero grad 
      optimizer.zero_grad()

      #4. Loss backward
      loss.backward()

      #5. Optimizer step
      
      optimizer.step()
      #Divide total train loss and acc by lenght of train dataloader
    
      if (i% 400 == 0):
        print(f"Looked at {i * len(X)}/{len(dataloader.dataset)} samples")
      
    train_loss /= len(dataloader)
    train_acc /= len(dataloader)

    print(f"Train loss: {train_loss:.5f} | Train acc: {train_acc:.4f}")


def val_step(model:torch.nn.Module,
                dataloader:torch.utils.data.DataLoader,
                loss_fn:torch.nn.Module,
                compute_accuracy,
                device:torch.device=DEVICE):
    """Performs a testing loop step on model going over data_loader"""
   
    test_loss,test_acc=0,0


    model.eval()

    with torch.inference_mode():
      for batch,(X_test,y_test) in enumerate(dataloader):
     
        X_test,y_test=X_test.to(device),y_test.to(device)

      #1. forward pass (output the raw logits from the model )
        test_pred=model(X_test)

      #2. Calculate loss and accuracy (per batch)
        loss=loss_fn(test_pred,y_test)
        test_loss += loss
        test_acc += compute_accuracy(y_true=y_test,y_pred=test_pred) # go from logits -> prediction labels
     
        if (batch % 400 == 0):
          print(f"Looked at {batch * len(X_test)}/{len(dataloader.dataset)} samples")
    #Divide total test loss and acc by lenght of test dataloader
      test_loss /= len(dataloader)
      test_acc /= len(dataloader)

      print(f"Test loss: {test_loss:.5f} | Test acc: {test_acc:.4f}")

In [8]:
# # === Training Loop ===
# from tqdm import tqdm

torch.manual_seed(42)
torch.cuda.manual_seed(42)

start_time = timer()

for epoch in tqdm(range(EPOCHS)):
    print(f"Epoch: {epoch}\n-------")
    train_step(model=model,
               dataloader=train_loader,
               loss_fn=loss_fn,
               optimizer=optimizer,
               compute_accuracy=compute_accuracy,
               device=DEVICE)
    

end_time = timer()
total_train_time = print_train_time(start_time, end_time, DEVICE)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
-------


  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)
  return torch.load(hashfile)


Looked at 0/539469 samples
Looked at 12800/539469 samples
Looked at 25600/539469 samples
Looked at 38400/539469 samples
Looked at 51200/539469 samples
Looked at 64000/539469 samples
Looked at 76800/539469 samples
Looked at 89600/539469 samples
Looked at 102400/539469 samples
Looked at 115200/539469 samples
Looked at 128000/539469 samples
Looked at 140800/539469 samples
Looked at 153600/539469 samples
Looked at 166400/539469 samples
Looked at 179200/539469 samples
Looked at 192000/539469 samples
Looked at 204800/539469 samples
Looked at 217600/539469 samples
Looked at 230400/539469 samples
Looked at 243200/539469 samples
Looked at 256000/539469 samples
Looked at 268800/539469 samples
Looked at 281600/539469 samples
Looked at 294400/539469 samples
Looked at 307200/539469 samples
Looked at 320000/539469 samples
Looked at 332800/539469 samples
Looked at 345600/539469 samples
Looked at 358400/539469 samples
Looked at 371200/539469 samples
Looked at 384000/539469 samples
Looked at 396800/539

 33%|███▎      | 1/3 [18:41<37:23, 1121.82s/it]

Epoch: 1
-------
Looked at 0/539469 samples
Looked at 12800/539469 samples
Looked at 25600/539469 samples
Looked at 38400/539469 samples
Looked at 51200/539469 samples
Looked at 64000/539469 samples
Looked at 76800/539469 samples
Looked at 89600/539469 samples
Looked at 102400/539469 samples
Looked at 115200/539469 samples
Looked at 128000/539469 samples
Looked at 140800/539469 samples
Looked at 153600/539469 samples
Looked at 166400/539469 samples
Looked at 179200/539469 samples
Looked at 192000/539469 samples
Looked at 204800/539469 samples
Looked at 217600/539469 samples
Looked at 230400/539469 samples
Looked at 243200/539469 samples
Looked at 256000/539469 samples
Looked at 268800/539469 samples
Looked at 281600/539469 samples
Looked at 294400/539469 samples
Looked at 307200/539469 samples
Looked at 320000/539469 samples
Looked at 332800/539469 samples
Looked at 345600/539469 samples
Looked at 358400/539469 samples
Looked at 371200/539469 samples
Looked at 384000/539469 samples
Loo

 67%|██████▋   | 2/3 [50:41<26:31, 1591.34s/it]

Epoch: 2
-------
Looked at 0/539469 samples
Looked at 12800/539469 samples
Looked at 25600/539469 samples
Looked at 38400/539469 samples
Looked at 51200/539469 samples
Looked at 64000/539469 samples
Looked at 76800/539469 samples
Looked at 89600/539469 samples
Looked at 102400/539469 samples
Looked at 115200/539469 samples
Looked at 128000/539469 samples
Looked at 140800/539469 samples
Looked at 153600/539469 samples
Looked at 166400/539469 samples
Looked at 179200/539469 samples
Looked at 192000/539469 samples
Looked at 204800/539469 samples
Looked at 217600/539469 samples
Looked at 230400/539469 samples
Looked at 243200/539469 samples
Looked at 256000/539469 samples
Looked at 268800/539469 samples
Looked at 281600/539469 samples
Looked at 294400/539469 samples
Looked at 307200/539469 samples
Looked at 320000/539469 samples
Looked at 332800/539469 samples
Looked at 345600/539469 samples
Looked at 358400/539469 samples
Looked at 371200/539469 samples
Looked at 384000/539469 samples
Loo

100%|██████████| 3/3 [1:13:18<00:00, 1466.02s/it]


Train time on cuda: 4399.478 seconds
