<a href="https://colab.research.google.com/github/Akash8292/RT-DETR/blob/main/RTDETR2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RT-DETR Training on VisDrone Dataset

This notebook guides you through setting up the RT-DETR environment, preparing the VisDrone dataset, and training the model in Google Colab.

## Step 0: Prerequisites

1.  **Enable GPU:** Make sure you have enabled a GPU runtime in Colab (`Runtime` -> `Change runtime type` -> `Hardware accelerator` -> `GPU`).
2.  **Upload Data:** Upload the following VisDrone zip files to the root directory of your Colab session (`/content/`):
    *   `VisDrone2019-DET-train.zip`
    *   `VisDrone2019-DET-val.zip`
    *   `VisDrone2019-DET-test-dev.zip`

In [9]:
# @title Force Remove Existing RT-DETR Directory (if it exists)
import os
import shutil

dir_to_remove = "/content/RT-DETR"

if os.path.exists(dir_to_remove):
  print(f"Directory '{dir_to_remove}' exists. Removing it...")
  # Using shutil.rmtree is generally safer in Python than !rm -rf
  try:
      shutil.rmtree(dir_to_remove)
      print(f"✅ Successfully removed '{dir_to_remove}'.")
  except Exception as e:
      print(f"❌ Error removing directory with shutil: {e}")
      print("Attempting removal with shell command...")
      !rm -rf {dir_to_remove} # Use shell command as fallback
      if not os.path.exists(dir_to_remove):
           print(f"✅ Successfully removed '{dir_to_remove}' using shell command.")
      else:
           print(f"❌ Failed to remove '{dir_to_remove}' even with shell command.")
else:
    print(f"Directory '{dir_to_remove}' does not exist. No need to remove.")

Directory '/content/RT-DETR' exists. Removing it...
✅ Successfully removed '/content/RT-DETR'.


In [10]:
# @title 1. Clone RT-DETR Repository
!git clone https://github.com/lyuwenyu/RT-DETR.git

Cloning into 'RT-DETR'...
remote: Enumerating objects: 1020, done.[K
remote: Counting objects: 100% (220/220), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 1020 (delta 145), reused 120 (delta 120), pack-reused 800 (from 1)[K
Receiving objects: 100% (1020/1020), 626.16 KiB | 22.36 MiB/s, done.
Resolving deltas: 100% (496/496), done.


In [11]:
# @title 2. Change Directory to PyTorch Implementation
%cd RT-DETR/rtdetrv2_pytorch
!pwd

/content/RT-DETR/rtdetrv2_pytorch
/content/RT-DETR/rtdetrv2_pytorch


In [12]:
# @title 3. Install Requirements
!pip install -q -r requirements.txt
# Install wget if not present (usually is in Colab)
!apt-get install -qq wget

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m114.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m91.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [13]:
# @title 4. Download Pretrained Weights
# Download the R50 VD pretrained weights (trained on COCO)
!wget https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth -O rtdetr_r50vd_6x_coco_from_paddle.pth
print("✅ Pretrained weights downloaded.")

--2025-04-01 08:56:36--  https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/676791659/bd85c705-7c81-4059-9a22-dbd22b0b8c29?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250401%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250401T085636Z&X-Amz-Expires=300&X-Amz-Signature=10a2a35065d7f742220e24cbcc96b7d4e8f7ea9722b62f3422d3d3fe86ad51eb&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Drtdetr_r50vd_6x_coco_from_paddle.pth&response-content-type=application%2Foctet-stream [following]
--2025-04-01 08:56:36--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/676791659/bd85c705-7c81-4059-9a22-dbd22b0b8c29?

---
**IMPORTANT:** Ensure you have uploaded the VisDrone zip files to `/content/` before running the next cell.

In [14]:
# @title 5. Unzip VisDrone Datasets
import zipfile
import os
import time
import shutil

# --- IMPORTANT ---
# --- Upload your VisDrone zip files to /content/ before running this cell ---
# Expected files:
# /content/VisDrone2019-DET-train.zip
# /content/VisDrone2019-DET-val.zip
# /content/VisDrone2019-DET-test-dev.zip
# ---------------

zip_files = {
    "/content/VisDrone2019-DET-train.zip": "/content/VisDrone2019-DET-train",
    "/content/VisDrone2019-DET-val.zip": "/content/VisDrone2019-DET-val",
    "/content/VisDrone2019-DET-test-dev.zip": "/content/VisDrone2019-DET-test-dev"
}

missing_files = [zip_path for zip_path in zip_files if not os.path.exists(zip_path)]
if missing_files:
    print("🚨 ERROR: The following zip files are missing in /content/:")
    for f in missing_files:
        print(f"- {f}")
    print("🛑 Please upload the files and try again.")
else:
    print("👍 Found all necessary zip files.")
    start_time = time.time()
    for zip_path, extract_path in zip_files.items():
        print(f"📦 Unzipping {os.path.basename(zip_path)}...")
        try:
            # Ensure target directory exists and is empty
            if os.path.exists(extract_path):
                shutil.rmtree(extract_path)
            os.makedirs(extract_path, exist_ok=True)

            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
            print(f"✅ Extracted to: {extract_path}")
        except zipfile.BadZipFile:
            print(f"❌ Error: {zip_path} is not a valid zip file.")
        except Exception as e:
            print(f"❌ An error occurred during extraction: {e}")
    end_time = time.time()
    print(f"\n⏱️ Unzipping completed in {end_time - start_time:.2f} seconds.")

👍 Found all necessary zip files.
📦 Unzipping VisDrone2019-DET-train.zip...
✅ Extracted to: /content/VisDrone2019-DET-train
📦 Unzipping VisDrone2019-DET-val.zip...
✅ Extracted to: /content/VisDrone2019-DET-val
📦 Unzipping VisDrone2019-DET-test-dev.zip...
✅ Extracted to: /content/VisDrone2019-DET-test-dev

⏱️ Unzipping completed in 9.04 seconds.


In [16]:
# @title 6. Clean Up Dataset Structure
import shutil
import os

def clean_visdrone_folder(folder_path, expected_name):
    """Checks for nested folders and removes extra files."""
    print(f"\n🧹 Cleaning up {folder_path}...")
    if not os.path.exists(folder_path):
        print(f"⚠️ Folder not found: {folder_path}")
        return

    # Handle potential nesting (e.g., VisDrone2019-DET-train/VisDrone2019-DET-train/)
    sub_items = os.listdir(folder_path)
    nested_path = os.path.join(folder_path, expected_name)

    if len(sub_items) == 1 and os.path.isdir(nested_path):
        print(f"  -> Detected nested folder: {nested_path}. Moving contents up.")
        for item in os.listdir(nested_path):
            try:
                shutil.move(os.path.join(nested_path, item), folder_path)
            except Exception as e:
                 print(f"  -> Error moving {item}: {e}. It might already exist.")
                 # If move fails because it exists, try removing the source if it's a dir
                 if os.path.isdir(os.path.join(nested_path, item)):
                     try:
                         shutil.rmtree(os.path.join(nested_path, item))
                     except: pass # Ignore removal error
                 elif os.path.isfile(os.path.join(nested_path, item)):
                     try:
                         os.remove(os.path.join(nested_path, item))
                     except: pass # Ignore removal error

        # Attempt to remove the now empty nested directory
        try:
            os.rmdir(nested_path) # Use os.rmdir for potentially empty dir first
            print(f"  -> Removed empty nested folder.")
        except OSError:
           try:
               shutil.rmtree(nested_path) # Force remove if not empty
               print(f"  -> Force removed nested folder structure.")
           except OSError as e:
               print(f"  -> Warning: Could not remove nested folder {nested_path}: {e}")
               if os.path.exists(nested_path):
                   print(f"  -> Contents of nested folder: {os.listdir(nested_path)}")

    # Remove specific unwanted files/folders
    items_to_remove = ['.DS_Store', '__MACOSX'] # Add other unwanted items if needed
    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        if item in items_to_remove or item.endswith('.zip'):
             try:
                if os.path.isfile(item_path):
                    os.remove(item_path)
                    print(f"  -> Removed file: {item}")
                elif os.path.isdir(item_path):
                    shutil.rmtree(item_path)
                    print(f"  -> Removed directory: {item}")
             except Exception as e:
                 print(f"  -> Error removing {item}: {e}")

    print(f"✅ Cleanup finished for {folder_path}.")
    if os.path.exists(folder_path):
      print(f"   Contents: {os.listdir(folder_path)}")
    else:
      print(f"   Folder {folder_path} no longer exists after cleanup?")


# Clean each dataset directory
clean_visdrone_folder("/content/VisDrone2019-DET-train", "VisDrone2019-DET-train")
clean_visdrone_folder("/content/VisDrone2019-DET-val", "VisDrone2019-DET-val")
clean_visdrone_folder("/content/VisDrone2019-DET-test-dev", "VisDrone2019-DET-test-dev")

# Final check
print("\n📁 Final Data Structure Check:")
for folder in ["/content/VisDrone2019-DET-train", "/content/VisDrone2019-DET-val", "/content/VisDrone2019-DET-test-dev"]:
    if os.path.exists(folder):
        print(f"  {folder}:")
        for sub in os.listdir(folder):
            print(f"    - {sub}")
        # Verify key subfolders
        if not os.path.exists(os.path.join(folder, "images")):
             print(f"    ⚠️ Missing 'images' folder in {folder}")
        if not os.path.exists(os.path.join(folder, "annotations")):
             print(f"    ⚠️ Missing 'annotations' folder in {folder}")
    else:
        print(f"  {folder}: ❌ NOT FOUND")


🧹 Cleaning up /content/VisDrone2019-DET-train...
✅ Cleanup finished for /content/VisDrone2019-DET-train.
   Contents: ['images', 'annotations']

🧹 Cleaning up /content/VisDrone2019-DET-val...
✅ Cleanup finished for /content/VisDrone2019-DET-val.
   Contents: ['images', 'annotations']

🧹 Cleaning up /content/VisDrone2019-DET-test-dev...
✅ Cleanup finished for /content/VisDrone2019-DET-test-dev.
   Contents: ['images', 'annotations']

📁 Final Data Structure Check:
  /content/VisDrone2019-DET-train:
    - images
    - annotations
  /content/VisDrone2019-DET-val:
    - images
    - annotations
  /content/VisDrone2019-DET-test-dev:
    - images
    - annotations


In [17]:
# @title 7. Convert VisDrone Annotations to COCO Format
import json
from pathlib import Path
from tqdm.notebook import tqdm # Use notebook version for better Colab display
import cv2 # Import OpenCV to get image dimensions
import os

def visdrone_to_coco(images_dir, annotations_dir, output_json):
    images_path = Path(images_dir)
    annotations_path = Path(annotations_dir)
    output_file = Path(output_json)

    print(f"\n🔄 Converting {images_path.parent.name} to COCO format...")
    print(f"   Images: {images_dir}")
    print(f"   Annotations: {annotations_dir}")
    print(f"   Output: {output_json}")

    coco = {
        "images": [],
        "annotations": [],
        "categories": [
            # COCO format typically starts IDs from 1, but VisDrone uses 0-11.
            # We keep the VisDrone IDs but map them. RT-DETR handles this.
            # Class IDs MUST match the 'num_classes' in the model config later.
            # We target the 10 main classes (1-10).
            {"id": 0, "name": "ignored regions"}, # Often ignored in training
            {"id": 1, "name": "pedestrian"},
            {"id": 2, "name": "people"}, # Often grouped with pedestrian
            {"id": 3, "name": "bicycle"},
            {"id": 4, "name": "car"},
            {"id": 5, "name": "van"},
            {"id": 6, "name": "truck"},
            {"id": 7, "name": "tricycle"},
            {"id": 8, "name": "awning-tricycle"},
            {"id": 9, "name": "bus"},
            {"id": 10, "name": "motor"},
            {"id": 11, "name": "others"} # Often ignored
        ]
        # It's common to remap VisDrone categories slightly for standard detection tasks
        # E.g., map 'people' (2) -> 'pedestrian' (1), map 'ignored'(0) and 'others'(11) to a background/ignore class.
        # For simplicity here, we keep the original 12 categories. The model config's `num_classes=10`
        # implies it will focus on classes 1-10.
    }

    # VisDrone category IDs we care about (assuming 10 main classes)
    valid_category_ids = set(range(1, 11)) # 1 to 10

    annotation_id = 1
    image_id_map = {} # Map image filenames to COCO image IDs

    image_files = sorted(list(images_path.glob('*.jpg')))
    if not image_files:
        print(f"❌ No JPG images found in {images_dir}. Please check the path.")
        return

    print(f"   Found {len(image_files)} images.")

    current_image_id = 1
    for image_path in tqdm(image_files, desc=f"Processing {images_path.parent.name}"):
        # Get image dimensions using OpenCV
        try:
            img = cv2.imread(str(image_path))
            if img is None:
                print(f"   ⚠️ Warning: Could not read image {image_path.name}. Skipping.")
                continue
            height, width, _ = img.shape
        except Exception as e:
            print(f"   ⚠️ Warning: Error reading {image_path.name}: {e}. Skipping.")
            continue

        image_info = {
            "file_name": image_path.name,
            "id": current_image_id,
            "height": height,
            "width": width
        }
        coco["images"].append(image_info)
        image_id_map[image_path.name] = current_image_id

        ann_file = annotations_path / image_path.with_suffix('.txt').name
        if ann_file.exists():
            with open(ann_file, 'r') as f:
                for line in f.readlines():
                    try:
                        # <bbox_left>,<bbox_top>,<width>,<height>,<score>,<category>,<truncation>,<occlusion>
                        parts = list(map(int, line.strip().split(',')[:8]))
                        x, y, w, h, score, cat_id, trunc, occ = parts

                        # Skip categories we decided to ignore (like 0: ignored, 11: others)
                        # Also skip score 0 if desired (often means ignore)
                        if cat_id not in valid_category_ids or score == 0:
                           continue

                        # Basic sanity check for bounding boxes
                        if w <= 0 or h <= 0:
                            # print(f"   ⚠️ Invalid bbox dims (w={w}, h={h}) in {ann_file.name}. Skipping annotation.")
                            continue
                        # Ensure bbox is within image bounds (optional, but good practice)
                        # x = max(0, x)
                        # y = max(0, y)
                        # w = min(width - x, w)
                        # h = min(height - y, h)
                        # if w <= 0 or h <= 0: continue # Skip if adjusted dims are invalid

                        coco["annotations"].append({
                            "id": annotation_id,
                            "image_id": current_image_id, # Use the mapped ID
                            "category_id": cat_id,        # Use original VisDrone cat ID
                            "bbox": [x, y, w, h],         # COCO format: [top_left_x, top_left_y, width, height]
                            "area": float(w * h),
                            "iscrowd": 0,                 # VisDrone doesn't typically use 'iscrowd'
                            # You could add truncation/occlusion here if needed by your model/eval
                            # "truncation": trunc,
                            # "occlusion": occ
                        })
                        annotation_id += 1
                    except ValueError:
                        print(f"   ⚠️ Warning: Could not parse line in {ann_file.name}: '{line.strip()}'. Skipping.")
                    except Exception as e:
                         print(f"   ⚠️ Error processing line in {ann_file.name}: {e}. Skipping annotation.")

        else:
            # print(f"   Note: No annotation file found for {image_path.name}") # Optional: report missing annotations
            pass

        current_image_id += 1 # Increment for the next image

    print(f"   Processed {len(coco['images'])} images and {len(coco['annotations'])} annotations.")

    # Create output directory if it doesn't exist
    output_file.parent.mkdir(parents=True, exist_ok=True)

    with open(output_file, 'w') as f:
        json.dump(coco, f, indent=4)
    print(f"✅ Saved COCO annotations to: {output_file}\n")

# Define dataset splits and paths
# Ensure these paths match the output of the unzipping/cleanup step
splits = {
    "train": {
        "images": "/content/VisDrone2019-DET-train/images",
        "annotations": "/content/VisDrone2019-DET-train/annotations",
        "output": "/content/visdrone_train_coco.json" # Save JSON in /content/
    },
    "val": {
        "images": "/content/VisDrone2019-DET-val/images",
        "annotations": "/content/VisDrone2019-DET-val/annotations",
        "output": "/content/visdrone_val_coco.json"
    },
    "test": { # Using test-dev for testing/inference later
        "images": "/content/VisDrone2019-DET-test-dev/images",
        "annotations": "/content/VisDrone2019-DET-test-dev/annotations", # VisDrone test annotations might be empty or dummy
        "output": "/content/visdrone_test_coco.json"
    }
}

# Run conversion for all splits
conversion_successful = True
for split_name, paths in splits.items():
    if not os.path.exists(paths["images"]):
        print(f"🚨 ERROR: Image directory not found for '{split_name}': {paths['images']}")
        conversion_successful = False
    if not os.path.exists(paths["annotations"]):
         print(f"🚨 ERROR: Annotation directory not found for '{split_name}': {paths['annotations']}")
         # Allow continuing for 'test' split as annotations might be absent
         if split_name != 'test':
              conversion_successful = False

if conversion_successful:
    for split_name, paths in splits.items():
        visdrone_to_coco(paths["images"], paths["annotations"], paths["output"])
    print("✅ All conversions attempted.")
else:
    print("🛑 Conversion skipped due to missing directories. Please check the paths and the output of Step 6.")


🔄 Converting VisDrone2019-DET-train to COCO format...
   Images: /content/VisDrone2019-DET-train/images
   Annotations: /content/VisDrone2019-DET-train/annotations
   Output: /content/visdrone_train_coco.json
   Found 6471 images.


Processing VisDrone2019-DET-train:   0%|          | 0/6471 [00:00<?, ?it/s]

   Processed 6471 images and 343204 annotations.
✅ Saved COCO annotations to: /content/visdrone_train_coco.json


🔄 Converting VisDrone2019-DET-val to COCO format...
   Images: /content/VisDrone2019-DET-val/images
   Annotations: /content/VisDrone2019-DET-val/annotations
   Output: /content/visdrone_val_coco.json
   Found 548 images.


Processing VisDrone2019-DET-val:   0%|          | 0/548 [00:00<?, ?it/s]

   Processed 548 images and 38759 annotations.
✅ Saved COCO annotations to: /content/visdrone_val_coco.json


🔄 Converting VisDrone2019-DET-test-dev to COCO format...
   Images: /content/VisDrone2019-DET-test-dev/images
   Annotations: /content/VisDrone2019-DET-test-dev/annotations
   Output: /content/visdrone_test_coco.json
   Found 1610 images.


Processing VisDrone2019-DET-test-dev:   0%|          | 0/1610 [00:00<?, ?it/s]

   Processed 1610 images and 75102 annotations.
✅ Saved COCO annotations to: /content/visdrone_test_coco.json

✅ All conversions attempted.


In [18]:
# @title 8. Create Dataset Configuration File
import os

# Create the directory structure within the rtdetrv2_pytorch folder
# This is where the training script expects config files relative to its location
dataset_config_dir = "/content/RT-DETR/rtdetrv2_pytorch/configs/datasets"
os.makedirs(dataset_config_dir, exist_ok=True)

yaml_path = os.path.join(dataset_config_dir, 'visdrone_coco.yaml')

# YAML content using the paths where we saved the COCO JSON files
# and the absolute paths to the image directories
yaml_content = f"""
# Dataset specific configurations
dataset_type: coco # Specify the dataset type reader
train_dataset:
  name: coco_train # Can be any name
  img_folder: /content/VisDrone2019-DET-train/images # Absolute path to train images
  ann_file: /content/visdrone_train_coco.json      # Absolute path to train COCO json
  transforms:

val_dataset:
  name: coco_val
  img_folder: /content/VisDrone2019-DET-val/images   # Absolute path to val images
  ann_file: /content/visdrone_val_coco.json        # Absolute path to val COCO json
  transforms:

test_dataset: # Optional: if you want to run evaluation on the test set later
  name: coco_test
  img_folder: /content/VisDrone2019-DET-test-dev/images # Absolute path to test images
  ann_file: /content/visdrone_test_coco.json       # Absolute path to test COCO json
  transforms:

# Define label map if needed, otherwise defaults might be used
# For VisDrone (10 classes + background), this might not be strictly necessary if num_classes is set correctly
# label_map: ...
num_classes: 10 # IMPORTANT: Number of main VisDrone classes (1-10)
"""

# Write the content to the file
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"✅ Created Dataset Config: {yaml_path}")
# Verify creation
!ls -l /content/RT-DETR/rtdetrv2_pytorch/configs/datasets/

✅ Created Dataset Config: /content/RT-DETR/rtdetrv2_pytorch/configs/datasets/visdrone_coco.yaml
total 4
-rw-r--r-- 1 root root 1080 Apr  1 09:01 visdrone_coco.yaml


In [65]:
# @title 9. Create Training Configuration File (Consolidated v9 - Default Matcher Name)
import os

# Create the directory structure within the rtdetrv2_pytorch folder
model_config_dir = "/content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr"
os.makedirs(model_config_dir, exist_ok=True)

# Use a new distinct name
config_path = os.path.join(model_config_dir, "rtdetr_r50vd_6x_visdrone_consolidated_v9.yml")

# Define the YAML structure template
# Define HungarianMatcher component using its default name
visdrone_config_template = """
# --- Consolidated Config for VisDrone (v9 - Default Matcher Name) ---

task: detection

# --- Runtime Settings ---
use_ema: true
ema_decay: 0.9999
# epoches: 72
# snapshot_epoch: 1
# log_iter: 20
# save_dir: ./output
# sync_bn: false

# --- Dataset Settings ---
dataset_type: coco
train_dataset:
  name: coco_train
  img_folder: /content/VisDrone2019-DET-train/images
  ann_file: /content/visdrone_train_coco.json
val_dataset:
  name: coco_val
  img_folder: /content/VisDrone2019-DET-val/images
  ann_file: /content/visdrone_val_coco.json

num_classes: {num_classes_placeholder}

# --- DataLoader Settings ---
train_dataloader:
  batch_size: 8
  num_workers: 4
  shuffle: True
val_dataloader:
  batch_size: 8
  num_workers: 4
  shuffle: False

# --- Linking Model Components ---
model: RTDETR
criterion: RTDETRCriterion
postprocessor: RTDETRPostProcessor

# --- Component Definitions ---
use_focal_loss: {use_focal_loss_placeholder}
eval_spatial_size: [640, 640]

RTDETR:
  backbone: PResNet
  encoder: HybridEncoder
  decoder: RTDETRTransformer

PResNet:
  depth: 50
  variant: d
  freeze_at: 0
  return_idx: [1, 2, 3]
  num_stages: 4
  freeze_norm: True
  pretrained: True

HybridEncoder:
  in_channels: [512, 1024, 2048]
  feat_strides: [8, 16, 32]
  hidden_dim: 256
  use_encoder_idx: [2]
  num_encoder_layers: 1
  nhead: 8
  dim_feedforward: 1024
  dropout: 0.
  enc_act: 'gelu'
  expansion: 1.0
  depth_mult: 1.0
  act: 'silu'

RTDETRTransformer:
  num_classes: {num_classes_placeholder}
  feat_channels: [256, 256, 256]
  feat_strides: [8, 16, 32]
  hidden_dim: 256
  num_levels: 3
  num_layers: 6
  num_queries: 300
  num_denoising: 100
  label_noise_ratio: 0.5
  box_noise_scale: 1.0
  eval_idx: -1

RTDETRPostProcessor:
  num_classes: {num_classes_placeholder}
  num_top_queries: 300

# --- Define the Matcher Component using its CLASS NAME as the key ---
# This mirrors the structure seen in the default included rtdetr_r50vd.yml
HungarianMatcher: # Use the actual class name as the component key
  # 'type' key might not be needed if the key matches the class name
  cost_class: 2
  cost_bbox: 5
  cost_giou: 2

RTDETRCriterion:
  num_classes: {num_classes_placeholder}
  use_focal_loss: {use_focal_loss_placeholder}
  weight_dict: {{loss_vfl: 1, loss_bbox: 5, loss_giou: 2}}
  losses: ['vfl', 'boxes']
  alpha: 0.75
  gamma: 2.0
  matcher: HungarianMatcher # Reference the component defined above by its name (which is the class name)

# --- Optimizer Settings ---
optimizer:
  type: AdamW
  lr: 0.0001
  weight_decay: 0.0001

lr_scheduler:
  type: MultiStep
  milestones: [60, 68]
  gamma: 0.1

# --- Training Overrides ---
output_dir: output/rtdetr_r50vd_6x_visdrone_consolidated_v9
"""

# Write the config file using .format() to substitute the placeholders
with open(config_path, "w") as f:
    f.write(visdrone_config_template.format(
        num_classes_placeholder=10,
        use_focal_loss_placeholder=True
    ))
print(f"✅ Created Consolidated Config (v9): {config_path}")

# Verify the created file
print(f"\n--- Verifying created config: {config_path} ---")
!cat {config_path}
print("--- End config verification ---")

# IMPORTANT: Update the training command
print("\n❗ Remember to update the training command in Cell 11 to use:")
print(f"   -c {config_path}")

✅ Created Consolidated Config (v9): /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_visdrone_consolidated_v9.yml

--- Verifying created config: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_visdrone_consolidated_v9.yml ---

# --- Consolidated Config for VisDrone (v9 - Default Matcher Name) ---

task: detection

# --- Runtime Settings ---
use_ema: true
ema_decay: 0.9999
# epoches: 72
# snapshot_epoch: 1
# log_iter: 20
# save_dir: ./output
# sync_bn: false

# --- Dataset Settings ---
dataset_type: coco
train_dataset:
  name: coco_train
  img_folder: /content/VisDrone2019-DET-train/images
  ann_file: /content/visdrone_train_coco.json
val_dataset:
  name: coco_val
  img_folder: /content/VisDrone2019-DET-val/images
  ann_file: /content/visdrone_val_coco.json

num_classes: 10

# --- DataLoader Settings ---
train_dataloader:
  batch_size: 8
  num_workers: 4
  shuffle: True
val_dataloader:
  batch_size: 8
  num_workers: 4
  shuffle: False

# --- Linking Mode

In [68]:
# @title Restore Original coco_detection.yml (Optional)
import os

# Re-clone repo to a temp directory to get original file
print("Cloning repo temporarily to get original dataset config...")
!git clone https://github.com/lyuwenyu/RT-DETR.git /tmp/RT-DETR-temp >> /dev/null # Suppress output

original_file = '/tmp/RT-DETR-temp/rtdetrv2_pytorch/configs/dataset/coco_detection.yml'
target_file = '/content/RT-DETR/rtdetrv2_pytorch/configs/dataset/coco_detection.yml'

if os.path.exists(original_file):
    try:
        !cp {original_file} {target_file}
        print(f"✅ Restored {target_file} from repository.")
        # Clean up temp clone
        !rm -rf /tmp/RT-DETR-temp
        print("   Cleaned up temporary clone.")
        # Verify
        print("\n--- Verifying restored file content: ---")
        !cat {target_file}
        print("\n--- End verification ---")
    except Exception as e:
        print(f"❌ Error restoring file: {e}")
else:
    print("❌ Could not find original file in temporary clone.")

Cloning repo temporarily to get original dataset config...
Cloning into '/tmp/RT-DETR-temp'...
remote: Enumerating objects: 1020, done.[K
remote: Counting objects: 100% (220/220), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 1020 (delta 145), reused 120 (delta 120), pack-reused 800 (from 1)[K
Receiving objects: 100% (1020/1020), 626.16 KiB | 2.75 MiB/s, done.
Resolving deltas: 100% (496/496), done.
✅ Restored /content/RT-DETR/rtdetrv2_pytorch/configs/dataset/coco_detection.yml from repository.
   Cleaned up temporary clone.

--- Verifying restored file content: ---
task: detection

evaluator:
  type: CocoEvaluator
  iou_types: ['bbox', ]

# num_classes: 365
# remap_mscoco_category: False

# num_classes: 91
# remap_mscoco_category: False

num_classes: 80
remap_mscoco_category: True


train_dataloader: 
  type: DataLoader
  dataset: 
    type: CocoDetection
    img_folder: ./dataset/coco/train2017/
    ann_file: ./dataset/coco/annotations/instances_train

In [70]:
# @title Install ruamel.yaml dependency
print("Installing ruamel.yaml...")
!pip install ruamel.yaml
print("✅ ruamel.yaml installed.")

Installing ruamel.yaml...
Collecting ruamel.yaml
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml)
  Downloading ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)
Downloading ruamel.yaml-0.18.10-py3-none-any.whl (117 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.7/117.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (739 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m739.1/739.1 kB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ruamel.yaml.clib, ruamel.yaml
Successfully installed ruamel.yaml-0.18.10 ruamel.yaml.clib-0.2.12
✅ ruamel.yaml installed.


In [71]:
# @title Add/Override Dataset Settings in Main Config (rtdetr_r50vd_6x_coco.yml)
import os
from ruamel.yaml import YAML

main_config_path = '/content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml'
print(f"--- Adding/Overriding Dataset settings in: {main_config_path} ---")

yaml_loader = YAML()
yaml_loader.preserve_quotes = True
yaml_loader.indent(mapping=2, sequence=4, offset=2) # Adjust indent as needed

try:
    with open(main_config_path, 'r') as f:
        main_config = yaml_loader.load(f)

    # --- Define VisDrone Dataset Blocks ---
    # These will override anything included from coco_detection.yml
    visdrone_train_dataset = {
        'name': 'visdrone_train',
        'img_folder': '/content/VisDrone2019-DET-train/images',
        'ann_file': '/content/visdrone_train_coco.json',
        # Add 'transforms:' if needed, copying structure from default if available
        # 'transforms': {}
    }
    visdrone_val_dataset = {
        'name': 'visdrone_val',
        'img_folder': '/content/VisDrone2019-DET-val/images',
        'ann_file': '/content/visdrone_val_coco.json',
        # 'transforms': {}
    }
    visdrone_test_dataset = {
        'name': 'visdrone_test',
        'img_folder': '/content/VisDrone2019-DET-test-dev/images',
        'ann_file': '/content/visdrone_test_coco.json',
        # 'transforms': {}
    }

    # --- Add/Update keys in the loaded config ---
    print("  Updating num_classes...")
    main_config['num_classes'] = 10
    print("  Updating train_dataset...")
    main_config['train_dataset'] = visdrone_train_dataset
    print("  Updating val_dataset...")
    main_config['val_dataset'] = visdrone_val_dataset
    print("  Updating test_dataset...")
    main_config['test_dataset'] = visdrone_test_dataset
    print("  Ensuring output_dir is set for VisDrone...")
    main_config['output_dir'] = './output/rtdetr_r50vd_6x_visdrone' # Use the name we wanted


    # --- Remove specific dataloader settings if they exist at top level ---
    # These should ideally come from the included dataloader.yml
    keys_to_remove = ['train_dataloader', 'val_dataloader']
    for key in keys_to_remove:
      if key in main_config:
          print(f"  Removing top-level '{key}' (should be included)")
          del main_config[key]


    # --- Write the modified content back ---
    with open(main_config_path, 'w') as f:
        yaml_loader.dump(main_config, f)

    print(f"\n✅ Successfully updated: {main_config_path}")
    print("\n--- Verifying updated file content: ---")
    !cat {main_config_path}
    print("\n--- End verification ---")


except Exception as e:
    print(f"❌ Error modifying {main_config_path}: {e}")

--- Adding/Overriding Dataset settings in: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml ---
  Updating num_classes...
  Updating train_dataset...
  Updating val_dataset...
  Updating test_dataset...
  Ensuring output_dir is set for VisDrone...

✅ Successfully updated: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml

--- Verifying updated file content: ---
__include__: ['../dataset/coco_detection.yml', '../runtime.yml', './include/dataloader.yml',
  './include/optimizer.yml', './include/rtdetr_r50vd.yml']


output_dir: ./output/rtdetr_r50vd_6x_visdrone



num_classes: 10
train_dataset:
  name: visdrone_train
  img_folder: /content/VisDrone2019-DET-train/images
  ann_file: /content/visdrone_train_coco.json
val_dataset:
  name: visdrone_val
  img_folder: /content/VisDrone2019-DET-val/images
  ann_file: /content/visdrone_val_coco.json
test_dataset:
  name: visdrone_test
  img_folder: /content/VisDrone2019-DET-test-dev/images
  ann_file

In [74]:
# @title Modify Dataloader Dataset Settings in Main Config (rtdetr_r50vd_6x_coco.yml)
import os
from ruamel.yaml import YAML
import sys # For error handling

main_config_path = '/content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml'
print(f"--- Modifying nested Dataset settings in: {main_config_path} ---")

yaml_loader = YAML()
yaml_loader.preserve_quotes = True
yaml_loader.indent(mapping=2, sequence=4, offset=2)

try:
    # Load the existing main config
    with open(main_config_path, 'r') as f:
        main_config = yaml_loader.load(f)

    # --- Ensure top-level num_classes is correct ---
    print("  Setting top-level num_classes...")
    main_config['num_classes'] = 10

    # --- Define the VisDrone dataset configuration blocks ---
    visdrone_train_dataset_def = {
        'type': 'CocoDetection', # Assuming this is the correct type from includes
        'img_folder': '/content/VisDrone2019-DET-train/images',
        'ann_file': '/content/visdrone_train_coco.json',
        'return_masks': False,
        # If transforms were defined in the original dataloader include,
        # they might still be inherited unless explicitly overridden here.
        # We'll leave transforms out for now to inherit defaults.
    }
    visdrone_val_dataset_def = {
        'type': 'CocoDetection',
        'img_folder': '/content/VisDrone2019-DET-val/images',
        'ann_file': '/content/visdrone_val_coco.json',
        'return_masks': False,
    }

    # --- Explicitly define/override dataloader sections in the main config ---
    # This ensures these definitions take precedence over includes.

    print("  Defining/Overriding train_dataloader settings...")
    if 'train_dataloader' not in main_config:
        main_config['train_dataloader'] = {} # Create block if missing
    # Set the nested dataset key to our VisDrone definition
    main_config['train_dataloader']['dataset'] = visdrone_train_dataset_def
    # Set other dataloader params directly here if needed (overrides includes)
    main_config['train_dataloader']['type'] = 'DataLoader' # Ensure type is set
    main_config['train_dataloader']['shuffle'] = True
    main_config['train_dataloader']['num_workers'] = 4
    main_config['train_dataloader']['drop_last'] = True
    # Use a default collate_fn type if needed, or let it inherit
    # if 'collate_fn' not in main_config['train_dataloader']:
    #     main_config['train_dataloader']['collate_fn'] = {'type': 'BatchImageCollateFuncion'}


    print("  Defining/Overriding val_dataloader settings...")
    if 'val_dataloader' not in main_config:
        main_config['val_dataloader'] = {}
    main_config['val_dataloader']['dataset'] = visdrone_val_dataset_def
    main_config['val_dataloader']['type'] = 'DataLoader'
    main_config['val_dataloader']['shuffle'] = False
    main_config['val_dataloader']['num_workers'] = 4 # Adjusted from default 8
    main_config['val_dataloader']['drop_last'] = False
    # if 'collate_fn' not in main_config['val_dataloader']:
    #     main_config['val_dataloader']['collate_fn'] = {'type': 'BatchImageCollateFuncion'}


    # --- Remove the separate top-level dataset definitions we added previously ---
    print("  Removing previous top-level dataset definitions (if present)...")
    keys_to_remove = ['train_dataset', 'val_dataset', 'test_dataset']
    for key in keys_to_remove:
        if key in main_config:
            try:
                del main_config[key]
                print(f"    Removed '{key}'")
            except KeyError:
                pass # Already removed or never existed

    # --- Ensure output_dir is still correct ---
    print("  Ensuring output_dir...")
    main_config['output_dir'] = './output/rtdetr_r50vd_6x_visdrone'

    # --- Write the modified content back ---
    with open(main_config_path, 'w') as f:
        yaml_loader.dump(main_config, f)

    print(f"\n✅ Successfully updated: {main_config_path}")
    print("\n--- Verifying updated file content: ---")
    !cat {main_config_path}
    print("\n--- End verification ---")


except Exception as e:
    print(f"❌ Error modifying {main_config_path}: {e}", file=sys.stderr)
    print(f"   Type: {type(e).__name__}", file=sys.stderr)
    # Print existing content for debugging
    try:
        print("\n--- Existing file content (on error): ---", file=sys.stderr)
        with open(main_config_path, 'r') as f_err:
            print(f_err.read(), file=sys.stderr)
        print("\n--- End existing content ---", file=sys.stderr)
    except Exception as read_e:
         print(f"  (Could not read file content on error: {read_e})", file=sys.stderr)

--- Modifying nested Dataset settings in: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml ---
  Setting top-level num_classes...
  Defining/Overriding train_dataloader settings...
  Defining/Overriding val_dataloader settings...
  Removing previous top-level dataset definitions (if present)...
    Removed 'train_dataset'
    Removed 'val_dataset'
    Removed 'test_dataset'
  Ensuring output_dir...

✅ Successfully updated: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml

--- Verifying updated file content: ---
__include__: ['../dataset/coco_detection.yml', '../runtime.yml', './include/dataloader.yml',
  './include/optimizer.yml', './include/rtdetr_r50vd.yml']


output_dir: ./output/rtdetr_r50vd_6x_visdrone



num_classes: 10
train_dataloader:
  dataset:
    type: CocoDetection
    img_folder: /content/VisDrone2019-DET-train/images
    ann_file: /content/visdrone_train_coco.json
    return_masks: false
  type: DataLoader
  shuffle: true

In [77]:
# @title Simplify Transforms in Included Config (./include/dataloader.yml)
import os
from ruamel.yaml import YAML
import sys

# Path to the included dataloader config
include_config_path = '/content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml'
print(f"--- Simplifying Transforms in included file: {include_config_path} ---")

# Ensure ruamel.yaml is installed (just in case)
try:
    from ruamel.yaml import YAML
except ImportError:
    print("Installing ruamel.yaml...")
    !pip install ruamel.yaml -q
    from ruamel.yaml import YAML


yaml_loader = YAML()
yaml_loader.preserve_quotes = True
# Adjust indentation based on the target file's style if needed
yaml_loader.indent(mapping=2, sequence=4, offset=2)


try:
    # Load the included dataloader config
    with open(include_config_path, 'r') as f:
        include_config = yaml_loader.load(f)

    # --- Define Simplified Transform Ops ---
    simplified_train_transforms_ops = [
        {'type': 'RandomHorizontalFlip'},
        {'type': 'Resize', 'size': [640, 640]},
        {'type': 'ConvertPILImage', 'dtype': 'float32', 'scale': True},
        # Keep SanitizeBoundingBoxes after resize if needed
        {'type': 'SanitizeBoundingBoxes', 'min_size': 1},
        # Keep ConvertBoxes if model expects normalized cxcywh format
        {'type': 'ConvertBoxes', 'fmt': 'cxcywh', 'normalize': True}
    ]

    # --- Update the train_dataloader transforms in this included file ---
    # Navigate the structure carefully
    found_path = False
    if 'train_dataloader' in include_config and \
       isinstance(include_config.get('train_dataloader'), dict) and \
       'dataset' in include_config['train_dataloader'] and \
       isinstance(include_config['train_dataloader'].get('dataset'), dict) and \
       'transforms' in include_config['train_dataloader']['dataset'] and \
       isinstance(include_config['train_dataloader']['dataset'].get('transforms'), dict) and \
       'ops' in include_config['train_dataloader']['dataset']['transforms']:

        print("  Simplifying train_dataloader -> dataset -> transforms -> ops...")
        include_config['train_dataloader']['dataset']['transforms']['ops'] = simplified_train_transforms_ops
        found_path = True
    else:
        # Attempt to find transforms directly under dataset if ops isn't there
        if 'train_dataloader' in include_config and \
           isinstance(include_config.get('train_dataloader'), dict) and \
           'dataset' in include_config['train_dataloader'] and \
           isinstance(include_config['train_dataloader'].get('dataset'), dict) and \
           'transforms' in include_config['train_dataloader']['dataset']:
             print("  Found transforms directly under dataset. Assuming it's a list of ops...")
             # Check if it's already a list (might not need 'ops' key)
             if isinstance(include_config['train_dataloader']['dataset']['transforms'], list):
                 include_config['train_dataloader']['dataset']['transforms'] = simplified_train_transforms_ops
                 found_path = True
             # Or if it's a dict with an 'ops' key we missed somehow
             elif isinstance(include_config['train_dataloader']['dataset']['transforms'], dict) and \
                  'ops' in include_config['train_dataloader']['dataset']['transforms']:
                   print("    Re-simplifying train_dataloader -> dataset -> transforms -> ops...")
                   include_config['train_dataloader']['dataset']['transforms']['ops'] = simplified_train_transforms_ops
                   found_path = True


    if not found_path:
        print("  Could not find expected structure to simplify transforms in dataloader.yml.", file=sys.stderr)
        print("  Please check the file manually: ", include_config_path, file=sys.stderr)


    # --- Write the modified content back ---
    if found_path:
        with open(include_config_path, 'w') as f:
            yaml_loader.dump(include_config, f)
        print(f"\n✅ Successfully simplified transforms in: {include_config_path}")
        print("\n--- Verifying updated file content (showing relevant part): ---")
        !grep -A 20 "train_dataloader:" {include_config_path} # Show train_dataloader section
        print("\n--- End verification ---")
    else:
         print("\n❌ No changes written due to missing config structure.", file=sys.stderr)


except FileNotFoundError:
     print(f"❌ Error: File not found at {include_config_path}. Please check the path.", file=sys.stderr)
except Exception as e:
    print(f"❌ Error modifying {include_config_path}: {e}", file=sys.stderr)
    print(f"   Type: {type(e).__name__}", file=sys.stderr)

--- Simplifying Transforms in included file: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml ---
  Simplifying train_dataloader -> dataset -> transforms -> ops...

✅ Successfully simplified transforms in: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml

--- Verifying updated file content (showing relevant part): ---
train_dataloader:
  dataset:
    return_masks: false
    transforms:
      ops:
        - type: RandomHorizontalFlip
        - type: Resize
          size:
            - 640
            - 640
        - type: ConvertPILImage
          dtype: float32
          scale: true
        - type: SanitizeBoundingBoxes
          min_size: 1
        - type: ConvertBoxes
          fmt: cxcywh
          normalize: true
  collate_fn:
    type: BatchImageCollateFuncion
    scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]

--- End verification ---


In [72]:
# @title 10. (Check/Fix Solver Init - Revised v3) Remove Bad Import ONLY

import os

init_file_path = '/content/RT-DETR/rtdetrv2_pytorch/src/solver/__init__.py'
print(f"Attempting to remove incorrect import from: {init_file_path}")

try:
    with open(init_file_path, 'r') as file:
        lines = file.readlines() # Read lines into a list

    incorrect_import = "from src.solver.detection import DetectionSolver"
    original_num_lines = len(lines)
    cleaned_lines = []
    import_found = False

    # Create a new list excluding the incorrect import line
    for line in lines:
        if incorrect_import in line:
            import_found = True
            print(f"  -> Found and skipping line: {line.strip()}")
        else:
            cleaned_lines.append(line)

    if import_found:
        if len(cleaned_lines) < original_num_lines:
            print("  -> Attempting to write file back without the incorrect import...")
            try:
                with open(init_file_path, 'w') as file:
                    file.writelines(cleaned_lines) # Write the cleaned lines back
                print("  ✅ File successfully rewritten.")

                # Optional: Verify content after writing
                print("\n--- Verifying file content after removal ---")
                with open(init_file_path, 'r') as file:
                   print(file.read())
                print("--- End of file content ---")

            except Exception as e:
                print(f"  ❌ Failed to write cleaned content back to file: {e}")
        else:
            # Should not happen if import_found is True, but as a safeguard
            print("  ⚠️ Import line found, but line count did not decrease after filtering. No changes made.")
    else:
        print(f"  ✅ Incorrect import '{incorrect_import}' not found. File should be okay.")
        # Optional: Print content anyway for verification
        # print("\n--- Current file content ---")
        # with open(init_file_path, 'r') as file:
        #    print(file.read())
        # print("--- End of file content ---")


except FileNotFoundError:
    print(f"❌ Error: Solver init file not found at {init_file_path}. Cannot check/patch.")
except Exception as e:
     print(f"❌ Error processing solver init file: {e}")

Attempting to remove incorrect import from: /content/RT-DETR/rtdetrv2_pytorch/src/solver/__init__.py
  ✅ Incorrect import 'from src.solver.detection import DetectionSolver' not found. File should be okay.


In [79]:
# @title 11. Start Training! (Using Main Config with NESTED Overrides)
# Ensure we are in the right directory: /content/RT-DETR/rtdetrv2_pytorch
%cd /content/RT-DETR/rtdetrv2_pytorch

print("\n🚀 Starting Training using main config with NESTED dataset overrides...")
# Use the original main config file - it now contains overrides for the dataloader's dataset section.
!python tools/train.py \
    -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
    # Add --amp if needed

print("\n🏁 Training script finished.")

/content/RT-DETR/rtdetrv2_pytorch

🚀 Starting Training using main config with NESTED dataset overrides...
2025-04-01 11:45:47.994507: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-01 11:45:48.012664: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743507948.034632   56991 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743507948.041368   56991 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-01 11:45:48.063778: I tensorf

In [34]:
# @title Display Base Config Content (rtdetr_r50vd_6x_coco.yml)
print("--- Content of: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml ---")
!cat /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml
print("--- End of file content ---")

--- Content of: /content/RT-DETR/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml ---

__include__: [
  '../dataset/coco_detection.yml',
  '../runtime.yml',
  './include/dataloader.yml',
  './include/optimizer.yml',
  './include/rtdetr_r50vd.yml',
]


output_dir: ./output/rtdetr_r50vd_6x_coco



--- End of file content ---


In [54]:
# @title 12. (Optional) Evaluate Model on Validation Set
# After training finishes, you can evaluate the final model (or a specific checkpoint)
import os

# Ensure we are in the correct directory
%cd /content/RT-DETR/rtdetrv2_pytorch

# Path to the final weights (adjust if you saved checkpoints differently)
OUTPUT_DIR = "output/rtdetr_r50vd_6x_visdrone" # Matches config
FINAL_WEIGHTS = os.path.join(OUTPUT_DIR, "model_final.pth") # Default save location
BEST_CHECKPOINT = os.path.join(OUTPUT_DIR, "best_checkpoint.pth") # Common name for best ckpt
CONFIG_FILE = "configs/rtdetr/rtdetr_r50vd_6x_visdrone.yml"

eval_weights = None
if os.path.exists(FINAL_WEIGHTS):
    eval_weights = FINAL_WEIGHTS
elif os.path.exists(BEST_CHECKPOINT):
    print(f"\n⚠️ Final weights not found at {FINAL_WEIGHTS}. Using best checkpoint instead.")
    eval_weights = BEST_CHECKPOINT
else:
    print(f"\n⚠️ Evaluation skipped: Neither final weights ({FINAL_WEIGHTS}) nor best checkpoint ({BEST_CHECKPOINT}) found.")
    print("   Check the 'output_dir' in your config and the training logs.")

if eval_weights:
    print(f"\n🧪 Evaluating model: {eval_weights}")
    !python tools/eval.py \
        -c {CONFIG_FILE} \
        -w {eval_weights} \
        --amp # Use amp if you trained with it

/content/RT-DETR/rtdetrv2_pytorch

⚠️ Evaluation skipped: Neither final weights (output/rtdetr_r50vd_6x_visdrone/model_final.pth) nor best checkpoint (output/rtdetr_r50vd_6x_visdrone/best_checkpoint.pth) found.
   Check the 'output_dir' in your config and the training logs.


In [None]:
# @title 13. (Optional) Inference on Test Images
# Perform inference on a few images from the test set
import os
import glob
import random
from IPython.display import Image, display

# Ensure we are in the correct directory
%cd /content/RT-DETR/rtdetrv2_pytorch

OUTPUT_DIR = "output/rtdetr_r50vd_6x_visdrone" # Matches config
FINAL_WEIGHTS = os.path.join(OUTPUT_DIR, "model_final.pth")
BEST_CHECKPOINT = os.path.join(OUTPUT_DIR, "best_checkpoint.pth")
CONFIG_FILE = "configs/rtdetr/rtdetr_r50vd_6x_visdrone.yml"

infer_weights = None
if os.path.exists(FINAL_WEIGHTS):
    infer_weights = FINAL_WEIGHTS
elif os.path.exists(BEST_CHECKPOINT):
    print(f"\n⚠️ Final weights not found. Using best checkpoint for inference.")
    infer_weights = BEST_CHECKPOINT
else:
    print(f"\n⚠️ Inference skipped: Trained weights not found in {OUTPUT_DIR}")

if infer_weights:
    print(f"\n🖼️ Running inference using weights: {infer_weights}")

    # Path to test images
    test_image_dir = "/content/VisDrone2019-DET-test-dev/images"
    if os.path.exists(test_image_dir):
        test_images = glob.glob(os.path.join(test_image_dir, "*.jpg"))

        if test_images:
            # Select a few random images
            num_images_to_show = 3
            selected_images = random.sample(test_images, min(num_images_to_show, len(test_images)))

            output_infer_dir = os.path.join(OUTPUT_DIR, "inference_results")
            os.makedirs(output_infer_dir, exist_ok=True)

            for img_path in selected_images:
                print(f"  -> Processing: {os.path.basename(img_path)}")
                !python tools/infer.py \
                    --infer_img {img_path} \
                    --output_dir {output_infer_dir} \
                    -c {CONFIG_FILE} \
                    -w {infer_weights} \
                    --draw_threshold 0.4 # Adjust confidence threshold for drawing boxes

                # Display the result image
                result_img_path = os.path.join(output_infer_dir, os.path.basename(img_path))
                if os.path.exists(result_img_path):
                     print(f"  -> Result saved to: {result_img_path}")
                     display(Image(filename=result_img_path, width=600))
                else:
                     print(f"  -> Output image not found at {result_img_path}")

        else:
            print(f"⚠️ No test images (*.jpg) found in {test_image_dir}")
    else:
      print(f"⚠️ Test image directory not found: {test_image_dir}")
