# RT-DETR-X Training

Training RT-DETR-X with ResNet-101 backbone for WBC Classification on Raabin-WBC dataset.

## Model Details
- **Backbone**: ResNet-101
- **Training**: Pretrained weights (fine-tuning)
- **Dataset**: Raabin-WBC with 5 cell types

## 1. Setup and Imports

In [None]:
# %pip install -U ultralytics torch torchvision pillow tqdm scikit-learn seaborn timm

In [13]:
%matplotlib inline

import os
import json
import yaml
from datetime import datetime

import numpy as np
import torch

from sklearn.metrics import classification_report

# Import common training utilities
from training_utils import (
    create_sampled_dataset,
    create_full_dataset_config,
    train_model,
    evaluate_model,
    save_results,
    print_training_summary,
)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

PyTorch version: 2.6.0+cu124
CUDA available: True


## 2. Configuration

In [14]:
# =============================================================================
# MODEL CONFIGURATION
# =============================================================================
MODEL_NAME = "RT-DETR-X"
BACKBONE = "ResNet-101"
IS_PRETRAINED = True  # Using pretrained weights

# Pretrained model file
MODEL_FILE = "rtdetr-x.pt"

# =============================================================================
# BASE DIRECTORY
# =============================================================================
NOTEBOOK_DIR = os.getcwd()
BASE_DIR = os.path.join(NOTEBOOK_DIR, "output")

# Dataset path (contains separate Train and val folders)
DATA_ROOT = r"C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels"

print(f"Notebook directory: {NOTEBOOK_DIR}")
print(f"Base directory: {BASE_DIR}")
print(f"Data root: {DATA_ROOT}")

# =============================================================================
# SAMPLING CONFIGURATION
# =============================================================================
USE_FULL_DATASET = True  # Set to True to use ALL images, False for sampling

# Sample sizes per class (only used when USE_FULL_DATASET=False)
TRAIN_SAMPLE_SIZE = 100   # Number of training samples per class
VAL_SAMPLE_SIZE = 20      # Number of validation samples per class

# =============================================================================
# CHECKPOINT CONFIGURATION (for resume training on full dataset)
# =============================================================================
CHECKPOINT_DIR = os.path.join(BASE_DIR, "checkpoints", MODEL_NAME)
CHECKPOINT_MODEL_PATH = os.path.join(CHECKPOINT_DIR, "last.pt")
CHECKPOINT_META_PATH = os.path.join(CHECKPOINT_DIR, "training_meta.json")

# Create checkpoint directory
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Data paths (separate train and validation directories)
TRAIN_IMAGES_DIR = os.path.join(DATA_ROOT, "Train", "images")
TRAIN_LABELS_DIR = os.path.join(DATA_ROOT, "Train", "labels")
VAL_IMAGES_DIR = os.path.join(DATA_ROOT, "val", "images")
VAL_LABELS_DIR = os.path.join(DATA_ROOT, "val", "labels")

# For evaluation (uses training images by default)
IMAGES_DIR = TRAIN_IMAGES_DIR

# Output directories
os.makedirs(BASE_DIR, exist_ok=True)
MODEL_DIR = os.path.join(BASE_DIR, "models")
RESULTS_DIR = os.path.join(BASE_DIR, "results")
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Class definitions
CLASSES = {
    "Basophil": 0,
    "Eosinophil": 1,
    "Lymphocyte": 2,
    "Monocyte": 3,
    "Neutrophil": 4
}
ID2LABEL = {v: k for k, v in CLASSES.items()}
NUM_CLASSES = len(CLASSES)

print(f"\nUsing device: {DEVICE}")
if USE_FULL_DATASET:
    print(f"Dataset mode: FULL DATASET")
    print(f"Checkpoint directory: {CHECKPOINT_DIR}")
    # Check for existing checkpoint
    if os.path.exists(CHECKPOINT_MODEL_PATH) and os.path.exists(CHECKPOINT_META_PATH):
        with open(CHECKPOINT_META_PATH, 'r') as f:
            meta = json.load(f)
        print(f"  -> Found existing checkpoint: {meta['total_epochs']} epochs completed")
        print(f"  -> Training will RESUME from epoch {meta['total_epochs'] + 1}")
    else:
        print(f"  -> No checkpoint found. Training will start from scratch.")
else:
    print(f"Dataset mode: SAMPLED (Train: {TRAIN_SAMPLE_SIZE}/class, Val: {VAL_SAMPLE_SIZE}/class)")
    print(f"  -> Sampled mode: Always starts fresh (no resume)")
print(f"\nTraining data: {TRAIN_IMAGES_DIR}")
print(f"Validation data: {VAL_IMAGES_DIR}")
print(f"\nModel: {MODEL_NAME} ({BACKBONE})")
print(f"Training mode: {'Pretrained (fine-tuning)' if IS_PRETRAINED else 'From scratch'}")

Notebook directory: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells
Base directory: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output
Data root: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels

Using device: cuda
Dataset mode: FULL DATASET
Checkpoint directory: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\checkpoints\RT-DETR-X
  -> Found existing checkpoint: 1 epochs completed
  -> Training will RESUME from epoch 2

Training data: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels\Train\images
Validation data: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels\val\images

Model: RT-DETR-X (ResNet-101)
Training mode: Pretrained (fine-tuning)


## 3. Training Hyperparameters

In [15]:
# =============================================================================
# TRAINING HYPERPARAMETERS (PRETRAINED CONFIG - OPTIMIZED FOR SPEED)
# =============================================================================
# Fewer epochs needed since backbone is already trained

TRAINING_CONFIG = {
    "epochs": 2,
    "imgsz": 512,           # Reduced from 640 for faster processing
    "batch": 3,             # Increased from 4 (AMP enables larger batch)
    "lr0": 0.01,            
    "lrf": 0.0001,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "workers": 4,           # Reduced to lower RAM usage
    "patience": 15,
    "cos_lr": True,
    "warmup_epochs": 1,
    "warmup_momentum": 0.8,
    "warmup_bias_lr": 0.1,
    "amp": True,            # Mixed precision - enables larger batch & speeds up
}

print("Training Configuration (OPTIMIZED FOR 8GB VRAM):")
print("="*60)
for k, v in TRAINING_CONFIG.items():
    print(f"  {k}: {v}")

Training Configuration (OPTIMIZED FOR 8GB VRAM):
  epochs: 2
  imgsz: 512
  batch: 3
  lr0: 0.01
  lrf: 0.0001
  momentum: 0.937
  weight_decay: 0.0005
  workers: 4
  patience: 15
  cos_lr: True
  warmup_epochs: 1
  warmup_momentum: 0.8
  warmup_bias_lr: 0.1
  amp: True


## 4. Data Preparation

In [16]:
# create_sampled_dataset is imported from training_utils.py
# See training_utils.py for the implementation

In [17]:
# Create data configuration
if USE_FULL_DATASET:
    print("Using FULL DATASET\n")
    print(f"Training: {TRAIN_IMAGES_DIR}")
    print(f"Validation: {VAL_IMAGES_DIR}")
    
    DATA_YAML = create_full_dataset_config(DATA_ROOT, BASE_DIR, NUM_CLASSES, ID2LABEL)
    print(f"\nData config: {DATA_YAML}")
else:
    print(f"Creating SAMPLED dataset...")
    print(f"  Train samples: {TRAIN_SAMPLE_SIZE} per class")
    print(f"  Val samples: {VAL_SAMPLE_SIZE} per class\n")
    
    DATA_YAML = create_sampled_dataset(
        DATA_ROOT, 
        BASE_DIR, 
        CLASSES, 
        train_samples_per_class=TRAIN_SAMPLE_SIZE,
        val_samples_per_class=VAL_SAMPLE_SIZE,
        random_seed=42
    )

Using FULL DATASET

Training: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels\Train\images
Validation: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels\val\images

Data config: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\data_full.yaml


## 5. Training

In [18]:
# train_model is imported from training_utils.py
# See training_utils.py for the implementation

In [25]:
# Train the model
training_result = train_model(
    model_source=MODEL_FILE,
    model_name=MODEL_NAME,
    data_yaml=DATA_YAML,
    training_config=TRAINING_CONFIG,
    base_dir=BASE_DIR,
    use_full_dataset=USE_FULL_DATASET,
    checkpoint_dir=CHECKPOINT_DIR if USE_FULL_DATASET else None,
    default_warmup_epochs=1  # Pretrained model needs less warmup
)

print(f"\nTraining completed in {training_result['training_time']:.1f}s")
print(f"Best model saved to: {training_result['best_model_path']}")

if training_result['resumed']:
    print(f"\nResumed from epoch {training_result['previous_epochs'] + 1}")
print(f"Total epochs trained: {training_result['total_epochs']}")


Training: RT-DETR-X

*** RESUMING FROM CHECKPOINT ***
  Previous epochs completed: 3
  This session will train epochs: 4 to 5
  Loading model from: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\checkpoints\RT-DETR-X\last.pt
New https://pypi.org/project/ultralytics/8.4.11 available  Update with 'pip install -U ultralytics'
Ultralytics 8.4.8  Python-3.12.10 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=3, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\data_full.yaml, degrees=0.0, deterministic=True, devi

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K        1/2      3.55G     0.2123     0.5358     0.1007         10        512: 100% ━━━━━━━━━━━━ 3392/3392 2.4it/s 23:52<0.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 711/711 8.8it/s 1:21<0.1ss
                   all       4261       6074      0.663      0.781      0.738      0.689

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size
[K        2/2      3.92G     0.5234     0.5698     0.3346          6        512: 0% ──────────── 0/3392  0.5s

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K        2/2      3.93G     0.1962     0.4719    0.09124         14        512: 100% ━━━━━━━━━━━━ 3392/3392 2.4it/s 23:40<0.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 711/711 8.8it/s 1:21<0.1ss
                   all       4261       6074      0.831      0.673      0.638      0.599

2 epochs completed in 0.839 hours.
Optimizer stripped from C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-X\weights\last.pt, 135.4MB
Optimizer stripped from C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-X\weights\best.pt, 135.4MB

Validating C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-X\weights\best

## 6. Evaluation

In [26]:
# evaluate_model is imported from training_utils.py
# See training_utils.py for the implementation

In [27]:
# Evaluate the model
CONF_THRESH = 0.1
EVAL_PER_CLASS = 100

print(f"Evaluating: {MODEL_NAME}")
evaluation_result = evaluate_model(
    model_path=training_result["best_model_path"],
    images_dir=IMAGES_DIR,
    classes=CLASSES,
    id2label=ID2LABEL,
    conf_thresh=CONF_THRESH,
    eval_per_class=EVAL_PER_CLASS,
)

print(f"\nResults:")
print(f"  Accuracy: {evaluation_result['accuracy']:.4f}")
print(f"  Avg inference time: {evaluation_result['avg_inference_time']*1000:.2f}ms")
print(f"  No predictions: {evaluation_result['no_prediction_count']}/{evaluation_result['total_samples']}")

Evaluating: RT-DETR-X


                                                                                                                       


Results:
  Accuracy: 0.9140
  Avg inference time: 48.73ms
  No predictions: 0/500




In [28]:
# Print classification report
if evaluation_result["classification_report"] is not None:
    y_true = np.array(evaluation_result["y_true"])
    y_pred = np.array(evaluation_result["y_pred"])
    valid = y_pred != -1
    
    print(f"\n--- {MODEL_NAME} Classification Report ---")
    print(classification_report(
        y_true[valid],
        y_pred[valid],
        target_names=list(CLASSES.keys()),
        labels=list(range(NUM_CLASSES)),
        zero_division=0
    ))


--- RT-DETR-X Classification Report ---
              precision    recall  f1-score   support

    Basophil       1.00      1.00      1.00       100
  Eosinophil       0.97      0.72      0.83       100
  Lymphocyte       0.96      0.90      0.93       100
    Monocyte       0.76      0.97      0.85       100
  Neutrophil       0.93      0.98      0.96       100

    accuracy                           0.91       500
   macro avg       0.93      0.91      0.91       500
weighted avg       0.93      0.91      0.91       500



## 7. Save Results to Disk

In [29]:
# Save results to JSON
results_file = save_results(
    results_dir=RESULTS_DIR,
    model_name=MODEL_NAME,
    backbone=BACKBONE,
    is_pretrained=IS_PRETRAINED,
    training_result=training_result,
    evaluation_result=evaluation_result,
    training_config=TRAINING_CONFIG,
    classes=CLASSES
)

print(f"Results saved to: {results_file}")

Results saved to: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\results\RT-DETR-X_results.json


In [30]:
# Print training summary
print_training_summary(
    model_name=MODEL_NAME,
    backbone=BACKBONE,
    training_result=training_result,
    evaluation_result=evaluation_result,
    training_config=TRAINING_CONFIG,
    checkpoint_model_path=CHECKPOINT_MODEL_PATH if USE_FULL_DATASET else None,
    results_file=results_file
)


TRAINING COMPLETE
Model: RT-DETR-X (ResNet-101)
Total Epochs: 5
  (Resumed from epoch 4)
Accuracy: 0.9140
Inference Time: 48.73ms
Training Time (this session): 3152.4s

Best model: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-X\weights\best.pt
Checkpoint: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\checkpoints\RT-DETR-X\last.pt
Results JSON: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\results\RT-DETR-X_results.json
