# RT-DETR-L Training

Training RT-DETR-L with ResNet-50 backbone for WBC Classification on Raabin-WBC dataset.

## Model Details
- **Backbone**: ResNet-50
- **Training**: Pretrained weights (fine-tuning)
- **Dataset**: Raabin-WBC with 5 cell types

## 1. Setup and Imports

In [None]:
# %pip install -U ultralytics torch torchvision pillow tqdm scikit-learn seaborn timm

In [1]:
%matplotlib inline

import os
import json
import random
import shutil
import yaml
import time
from datetime import datetime

import numpy as np
import torch
from tqdm import tqdm

from ultralytics import RTDETR

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

PyTorch version: 2.6.0+cu124
CUDA available: True


## 2. Configuration

In [2]:
# =============================================================================
# MODEL CONFIGURATION
# =============================================================================
MODEL_NAME = "RT-DETR-L"
BACKBONE = "ResNet-50"
IS_PRETRAINED = True  # Using pretrained weights

# Pretrained model file
MODEL_FILE = "rtdetr-l.pt"

# =============================================================================
# BASE DIRECTORY
# =============================================================================
NOTEBOOK_DIR = os.getcwd()
BASE_DIR = os.path.join(NOTEBOOK_DIR, "output")

# Dataset path
DATA_ROOT = r"C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels"

print(f"Notebook directory: {NOTEBOOK_DIR}")
print(f"Base directory: {BASE_DIR}")
print(f"Data root: {DATA_ROOT}")

# =============================================================================
# SAMPLING CONFIGURATION
# =============================================================================
SAMPLES_PER_CLASS = 100  # Set to None for full dataset

# Data paths
IMAGES_DIR = os.path.join(DATA_ROOT, "Train", "images")
LABELS_DIR = os.path.join(DATA_ROOT, "Train", "labels")

# Output directories
os.makedirs(BASE_DIR, exist_ok=True)
MODEL_DIR = os.path.join(BASE_DIR, "models")
RESULTS_DIR = os.path.join(BASE_DIR, "results")
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Class definitions
CLASSES = {
    "Basophil": 0,
    "Eosinophil": 1,
    "Lymphocyte": 2,
    "Monocyte": 3,
    "Neutrophil": 4
}
ID2LABEL = {v: k for k, v in CLASSES.items()}
NUM_CLASSES = len(CLASSES)

print(f"\nUsing device: {DEVICE}")
print(f"Samples per class: {SAMPLES_PER_CLASS if SAMPLES_PER_CLASS else 'ALL'}")
print(f"\nModel: {MODEL_NAME} ({BACKBONE})")
print(f"Training mode: {'Pretrained (fine-tuning)' if IS_PRETRAINED else 'From scratch'}")

Notebook directory: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells
Base directory: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output
Data root: C:\D drive\mydata\MSML\DataSets\Raabin_datsets_withlabels

Using device: cuda
Samples per class: 100

Model: RT-DETR-L (ResNet-50)
Training mode: Pretrained (fine-tuning)


## 3. Training Hyperparameters

In [3]:
# =============================================================================
# TRAINING HYPERPARAMETERS (PRETRAINED CONFIG)
# =============================================================================
# Fewer epochs needed since backbone is already trained

TRAINING_CONFIG = {
    "epochs": 3,            
    "imgsz": 640,
    "batch": 4,
    "lr0": 0.01,            
    "lrf": 0.0001,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "workers": 8,
    "patience": 15,
    "cos_lr": True,
    "warmup_epochs": 1,
    "warmup_momentum": 0.8,
    "warmup_bias_lr": 0.1,
}

print("Training Configuration (Pretrained Fine-tuning):")
print("="*60)
for k, v in TRAINING_CONFIG.items():
    print(f"  {k}: {v}")

Training Configuration (Pretrained Fine-tuning):
  epochs: 3
  imgsz: 640
  batch: 4
  lr0: 0.01
  lrf: 0.0001
  momentum: 0.937
  weight_decay: 0.0005
  workers: 8
  patience: 15
  cos_lr: True
  warmup_epochs: 1
  warmup_momentum: 0.8
  warmup_bias_lr: 0.1


## 4. Data Preparation

In [4]:
def create_training_subset(data_root, base_dir, classes, samples_per_class=None, random_seed=42):
    """
    Create a training subset with ZERO image duplication.
    - Creates train.txt/val.txt pointing to original images
    - Corrects class IDs in original label files (fixes the dataset)
    - No image files created in output folder
    """
    if random_seed is not None:
        random.seed(random_seed)
    
    # Define paths
    subset_dir = os.path.join(base_dir, "data_subset")
    
    # Source paths
    src_train_images = os.path.join(data_root, "Train", "images")
    src_train_labels = os.path.join(data_root, "Train", "labels")
    src_val_images = os.path.join(data_root, "val", "images")
    src_val_labels = os.path.join(data_root, "val", "labels")
    
    # Clean up existing subset directory
    if os.path.exists(subset_dir):
        shutil.rmtree(subset_dir)
    os.makedirs(subset_dir, exist_ok=True)
    
    # Lists to store image paths for txt files
    train_image_paths = []
    val_image_paths = []
    labels_corrected = 0
    
    total_train = 0
    total_val = 0
    
    for cls_name, cls_id in classes.items():
        # --- Training data ---
        src_cls_images = os.path.join(src_train_images, cls_name)
        src_cls_labels = os.path.join(src_train_labels, cls_name)
        
        if os.path.exists(src_cls_images):
            image_files = [f for f in os.listdir(src_cls_images) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            if samples_per_class is not None and len(image_files) > samples_per_class:
                image_files = random.sample(image_files, samples_per_class)
            
            for img_file in image_files:
                base_name = os.path.splitext(img_file)[0]
                
                # Store path to ORIGINAL image
                original_img_path = os.path.join(src_cls_images, img_file)
                train_image_paths.append(original_img_path)
                
                # Correct the ORIGINAL label file (fix class ID)
                label_file = base_name + ".txt"
                label_path = os.path.join(src_cls_labels, label_file)
                
                if os.path.exists(label_path):
                    with open(label_path, 'r') as f:
                        lines = f.readlines()
                    
                    new_lines = []
                    needs_correction = False
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) > 1:
                            if parts[0] != str(cls_id):
                                needs_correction = True
                                parts[0] = str(cls_id)
                            new_lines.append(' '.join(parts) + '\n')
                    
                    if needs_correction:
                        with open(label_path, 'w') as f:
                            f.writelines(new_lines)
                        labels_corrected += 1
            
            total_train += len(image_files)
            print(f"  {cls_name} (class {cls_id}): {len(image_files)} training images")
        
        # --- Validation data ---
        src_cls_val_images = os.path.join(src_val_images, cls_name)
        src_cls_val_labels = os.path.join(src_val_labels, cls_name)
        
        if os.path.exists(src_cls_val_images):
            val_files = [f for f in os.listdir(src_cls_val_images) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            if samples_per_class is not None:
                val_sample_size = min(len(val_files), max(20, samples_per_class // 5))
                if len(val_files) > val_sample_size:
                    val_files = random.sample(val_files, val_sample_size)
            
            for img_file in val_files:
                base_name = os.path.splitext(img_file)[0]
                
                # Store path to ORIGINAL image
                original_img_path = os.path.join(src_cls_val_images, img_file)
                val_image_paths.append(original_img_path)
                
                # Correct the ORIGINAL label file
                label_file = base_name + ".txt"
                label_path = os.path.join(src_cls_val_labels, label_file)
                
                if os.path.exists(label_path):
                    with open(label_path, 'r') as f:
                        lines = f.readlines()
                    
                    new_lines = []
                    needs_correction = False
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) > 1:
                            if parts[0] != str(cls_id):
                                needs_correction = True
                                parts[0] = str(cls_id)
                            new_lines.append(' '.join(parts) + '\n')
                    
                    if needs_correction:
                        with open(label_path, 'w') as f:
                            f.writelines(new_lines)
                        labels_corrected += 1
            
            total_val += len(val_files)
    
    # Write train.txt with paths to original images
    train_txt_path = os.path.join(subset_dir, "train.txt")
    with open(train_txt_path, 'w') as f:
        for img_path in train_image_paths:
            f.write(img_path + '\n')
    
    # Write val.txt with paths to original images
    val_txt_path = os.path.join(subset_dir, "val.txt")
    with open(val_txt_path, 'w') as f:
        for img_path in val_image_paths:
            f.write(img_path + '\n')
    
    # Create data.yaml pointing to txt files
    data_yaml_path = os.path.join(subset_dir, "data.yaml")
    data_config = {
        'path': data_root,  # Base path for label lookup
        'train': train_txt_path,  # Absolute path to train.txt
        'val': val_txt_path,  # Absolute path to val.txt
        'nc': len(classes),
        'names': {v: k for k, v in classes.items()}
    }
    
    with open(data_yaml_path, 'w') as f:
        yaml.dump(data_config, f, default_flow_style=False)
    
    print(f"\nSubset created:")
    print(f"  Total training images: {total_train}")
    print(f"  Total validation images: {total_val}")
    print(f"  Labels corrected: {labels_corrected}")
    print(f"  Data config: {data_yaml_path}")
    print(f"  NO image files copied - using original dataset directly!")
    
    return data_yaml_path

In [5]:
# Create training subset
print(f"Creating training subset with {SAMPLES_PER_CLASS if SAMPLES_PER_CLASS else 'ALL'} images per class...\n")
DATA_YAML = create_training_subset(
    DATA_ROOT, 
    BASE_DIR, 
    CLASSES, 
    samples_per_class=SAMPLES_PER_CLASS,
    random_seed=42
)

Creating training subset with 100 images per class...

  Basophil (class 0): 100 training images
  Eosinophil (class 1): 100 training images
  Lymphocyte (class 2): 100 training images
  Monocyte (class 3): 100 training images
  Neutrophil (class 4): 100 training images

Subset created:
  Total training images: 500
  Total validation images: 100
  Labels corrected: 0
  Data config: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\data_subset\data.yaml
  NO image files copied - using original dataset directly!


## 5. Training

In [6]:
def train_model(model_file, model_name, data_yaml, training_config, base_dir):
    """
    Train the RT-DETR model and return training results.
    Clears previous training runs for this model before starting.
    """
    print(f"\n{'='*60}")
    print(f"Training: {model_name}")
    print(f"{'='*60}")

    # Project directory
    project_dir = os.path.join(base_dir, "training_runs")
    os.makedirs(project_dir, exist_ok=True)

    # Clear previous training runs for this model
    for folder in os.listdir(project_dir):
        if folder.startswith(model_name):
            old_run_path = os.path.join(project_dir, folder)
            print(f"Removing previous run: {folder}")
            shutil.rmtree(old_run_path)

    # Load pretrained model
    model = RTDETR(model_file)

    run_name = f"{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    # Record start time
    start_time = time.time()
    
    # Train
    results = model.train(
        data=data_yaml,
        epochs=training_config["epochs"],
        imgsz=training_config["imgsz"],
        batch=training_config["batch"],
        lr0=training_config["lr0"],
        lrf=training_config["lrf"],
        momentum=training_config["momentum"],
        weight_decay=training_config["weight_decay"],
        workers=training_config["workers"],
        patience=training_config["patience"],
        cos_lr=training_config["cos_lr"],
        warmup_epochs=training_config.get("warmup_epochs", 1),
        warmup_momentum=training_config.get("warmup_momentum", 0.8),
        warmup_bias_lr=training_config.get("warmup_bias_lr", 0.1),
        project=project_dir,
        name=run_name,
        save=True,
        plots=True,
        verbose=True,
    )
    
    training_time = time.time() - start_time
    
    # Get best model path
    best_model_path = os.path.join(project_dir, run_name, "weights", "best.pt")
    
    return {
        "model_name": model_name,
        "best_model_path": best_model_path,
        "training_time": training_time,
        "run_dir": os.path.join(project_dir, run_name),
        "results": results,
    }

In [7]:
# Train the model
training_result = train_model(
    MODEL_FILE,
    MODEL_NAME,
    DATA_YAML,
    TRAINING_CONFIG,
    BASE_DIR
)

print(f"\nTraining completed in {training_result['training_time']:.1f}s")
print(f"Best model saved to: {training_result['best_model_path']}")


Training: RT-DETR-L
New https://pypi.org/project/ultralytics/8.4.9 available  Update with 'pip install -U ultralytics'
Ultralytics 8.4.8  Python-3.12.10 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\data_subset\data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, end2end=None, epochs=3, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K        1/3       3.6G     0.6263      5.737     0.4378         12        640: 100% ━━━━━━━━━━━━ 125/125 2.5it/s 49.7s0.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 13/13 8.1it/s 1.6s0.1s
                   all        100        155      0.193      0.342      0.219      0.189

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size
[K        2/3      3.88G     0.3795      1.299     0.1482         11        640: 0% ──────────── 0/125  0.4s

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K        2/3      3.88G     0.3037      1.099     0.1573          8        640: 100% ━━━━━━━━━━━━ 125/125 2.6it/s 48.0s0.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 13/13 8.5it/s 1.5s0.1s
                   all        100        155      0.367      0.388       0.32      0.287

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size
[K        3/3      3.97G     0.3295     0.7725     0.2182         13        640: 0% ──────────── 0/125  0.4s

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K        3/3      3.97G     0.2821      0.908     0.1509         12        640: 100% ━━━━━━━━━━━━ 125/125 2.6it/s 47.9s0.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 13/13 8.4it/s 1.5s0.1s
                   all        100        155       0.58       0.57      0.467      0.414

3 epochs completed in 0.043 hours.
Optimizer stripped from C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-L_20260131_105955\weights\last.pt, 66.2MB
Optimizer stripped from C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-L_20260131_105955\weights\best.pt, 66.2MB

Validating C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs

## 6. Evaluation

In [8]:
def evaluate_model(model_path, images_dir, classes, id2label, 
                   conf_thresh=0.1, eval_per_class=100, random_seed=123):
    """
    Evaluate the trained model on the dataset.
    """
    model = RTDETR(model_path)
    
    random.seed(random_seed)
    
    y_true = []
    y_pred = []
    inference_times = []
    
    for gt_class, gt_id in classes.items():
        cls_dir = os.path.join(images_dir, gt_class)
        files = [f for f in os.listdir(cls_dir) if f.lower().endswith(".jpg")]
        
        if len(files) > eval_per_class:
            files = random.sample(files, eval_per_class)
        
        for fname in tqdm(files, desc=f"Evaluating {gt_class}", leave=False):
            img_path = os.path.join(cls_dir, fname)
            
            start = time.time()
            results = model(img_path, conf=conf_thresh, verbose=False)[0]
            inference_times.append(time.time() - start)
            
            y_true.append(gt_id)
            
            if len(results.boxes) == 0:
                y_pred.append(-1)
            else:
                best_idx = results.boxes.conf.argmax()
                y_pred.append(int(results.boxes.cls[best_idx].cpu().item()))
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    # Calculate metrics
    valid = y_pred != -1
    valid_count = np.sum(valid)
    
    if valid_count > 0:
        accuracy = accuracy_score(y_true[valid], y_pred[valid])
        cm = confusion_matrix(y_true[valid], y_pred[valid], labels=list(range(len(classes))))
        report = classification_report(
            y_true[valid], y_pred[valid],
            target_names=list(classes.keys()),
            labels=list(range(len(classes))),
            zero_division=0,
            output_dict=True
        )
    else:
        accuracy = 0.0
        cm = None
        report = None
    
    return {
        "accuracy": accuracy,
        "no_prediction_count": len(y_true) - valid_count,
        "total_samples": len(y_true),
        "confusion_matrix": cm.tolist() if cm is not None else None,
        "classification_report": report,
        "avg_inference_time": np.mean(inference_times),
        "y_true": y_true.tolist(),
        "y_pred": y_pred.tolist(),
    }

In [9]:
# Evaluate the model
CONF_THRESH = 0.1
EVAL_PER_CLASS = 100

print(f"Evaluating: {MODEL_NAME}")
evaluation_result = evaluate_model(
    model_path=training_result["best_model_path"],
    images_dir=IMAGES_DIR,
    classes=CLASSES,
    id2label=ID2LABEL,
    conf_thresh=CONF_THRESH,
    eval_per_class=EVAL_PER_CLASS,
)

print(f"\nResults:")
print(f"  Accuracy: {evaluation_result['accuracy']:.4f}")
print(f"  Avg inference time: {evaluation_result['avg_inference_time']*1000:.2f}ms")
print(f"  No predictions: {evaluation_result['no_prediction_count']}/{evaluation_result['total_samples']}")

Evaluating: RT-DETR-L


                                                                                                               


Results:
  Accuracy: 0.6914
  Avg inference time: 39.89ms
  No predictions: 1/500




In [10]:
# Print classification report
if evaluation_result["classification_report"] is not None:
    y_true = np.array(evaluation_result["y_true"])
    y_pred = np.array(evaluation_result["y_pred"])
    valid = y_pred != -1
    
    print(f"\n--- {MODEL_NAME} Classification Report ---")
    print(classification_report(
        y_true[valid],
        y_pred[valid],
        target_names=list(CLASSES.keys()),
        labels=list(range(NUM_CLASSES)),
        zero_division=0
    ))


--- RT-DETR-L Classification Report ---
              precision    recall  f1-score   support

    Basophil       0.73      1.00      0.84       100
  Eosinophil       0.86      0.06      0.11       100
  Lymphocyte       0.60      0.89      0.72        99
    Monocyte       0.90      0.57      0.70       100
  Neutrophil       0.64      0.94      0.76       100

    accuracy                           0.69       499
   macro avg       0.75      0.69      0.63       499
weighted avg       0.75      0.69      0.63       499



## 7. Save Results to Disk

In [11]:
# Prepare results for saving (convert numpy types to native Python)
def convert_to_native(obj):
    """Convert numpy types to native Python types for JSON serialization."""
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.int64, np.int32, np.int16, np.int8)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32, np.float16)):
        return float(obj)
    elif isinstance(obj, dict):
        return {k: convert_to_native(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_native(i) for i in obj]
    return obj

results_to_save = {
    "model_name": MODEL_NAME,
    "backbone": BACKBONE,
    "is_pretrained": IS_PRETRAINED,
    "best_model_path": training_result["best_model_path"],
    "run_dir": training_result["run_dir"],
    "training_time_s": float(training_result["training_time"]),
    "training_config": TRAINING_CONFIG,
    "accuracy": float(evaluation_result["accuracy"]),
    "avg_inference_time_ms": float(evaluation_result["avg_inference_time"]) * 1000,
    "no_prediction_count": int(evaluation_result["no_prediction_count"]),
    "total_samples": int(evaluation_result["total_samples"]),
    "confusion_matrix": convert_to_native(evaluation_result["confusion_matrix"]),
    "classification_report": convert_to_native(evaluation_result["classification_report"]),
    "y_true": convert_to_native(evaluation_result["y_true"]),
    "y_pred": convert_to_native(evaluation_result["y_pred"]),
    "classes": CLASSES,
    "timestamp": datetime.now().isoformat(),
}

# Save to JSON
results_file = os.path.join(RESULTS_DIR, f"{MODEL_NAME}_results.json")
with open(results_file, 'w') as f:
    json.dump(results_to_save, f, indent=2)

print(f"Results saved to: {results_file}")

Results saved to: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\results\RT-DETR-L_results.json


In [12]:
# Summary
print("\n" + "="*60)
print("TRAINING COMPLETE")
print("="*60)
print(f"Model: {MODEL_NAME} ({BACKBONE})")
print(f"Accuracy: {evaluation_result['accuracy']:.4f}")
print(f"Inference Time: {evaluation_result['avg_inference_time']*1000:.2f}ms")
print(f"Training Time: {training_result['training_time']:.1f}s")
print(f"\nBest model: {training_result['best_model_path']}")
print(f"Results JSON: {results_file}")
print("="*60)


TRAINING COMPLETE
Model: RT-DETR-L (ResNet-50)
Accuracy: 0.6914
Inference Time: 39.89ms
Training Time: 238.5s

Best model: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\training_runs\RT-DETR-L_20260131_105955\weights\best.pt
Results JSON: C:\D drive\mydata\MSML\GitHub\RT-DETR-Based-Explainable-CAD-System-for-Automated-Detection-and-Classification-of-White-Blood-Cells\output\results\RT-DETR-L_results.json
