In [1]:
pip install pycocotools opencv-python numpy tqdm albumentations



In [2]:
!mkdir coco
!cd coco && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!cd coco && wget http://images.cocodataset.org/zips/train2017.zip
!cd coco && wget http://images.cocodataset.org/zips/val2017.zip
!cd coco && unzip annotations_trainval2017.zip
!cd coco && unzip train2017.zip
!cd coco && unzip val2017.zip


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 extracting: val2017/000000212226.jpg  
 extracting: val2017/000000231527.jpg  
 extracting: val2017/000000578922.jpg  
 extracting: val2017/000000062808.jpg  
 extracting: val2017/000000119038.jpg  
 extracting: val2017/000000114871.jpg  
 extracting: val2017/000000463918.jpg  
 extracting: val2017/000000365745.jpg  
 extracting: val2017/000000320425.jpg  
 extracting: val2017/000000481404.jpg  
 extracting: val2017/000000314294.jpg  
 extracting: val2017/000000335328.jpg  
 extracting: val2017/000000513688.jpg  
 extracting: val2017/000000158548.jpg  
 extracting: val2017/000000132116.jpg  
 extracting: val2017/000000415238.jpg  
 extracting: val2017/000000321333.jpg  
 extracting: val2017/000000081738.jpg  
 extracting: val2017/000000577584.jpg  
 extracting: val2017/000000346905.jpg  
 extracting: val2017/000000433980.jpg  
 extracting: val2017/000000228144.jpg  
 extracting: val2017/000000041872.jpg  
 extracting: va

In [3]:
import json
import os

# Define paths
coco_annotation_path = 'coco/annotations/instances_train2017.json'
filtered_annotation_path = 'coco/annotations/instances_train2017_small.json'
SMALL_OBJECT_THRESHOLD = 32 * 32  # 1024 px²

# Load COCO Annotations
with open(coco_annotation_path, 'r') as f:
    coco_data = json.load(f)

small_annotations = []
small_image_ids = set()

# Filter annotations with small objects
for ann in coco_data['annotations']:
    area = ann["bbox"][2] * ann["bbox"][3]  # width * height
    if area < SMALL_OBJECT_THRESHOLD:
        small_annotations.append(ann)
        small_image_ids.add(ann["image_id"])

# Filter images that contain small objects
filtered_images = [img for img in coco_data['images'] if img['id'] in small_image_ids]

# Save new dataset
filtered_coco_data = {
    "info": coco_data.get("info", {}),
    "licenses": coco_data.get("licenses", []),
    "images": filtered_images,
    "annotations": small_annotations,
    "categories": coco_data.get("categories", [])
}

with open(filtered_annotation_path, 'w') as f:
    json.dump(filtered_coco_data, f)

print(f"Filtered dataset saved at {filtered_annotation_path}")


Filtered dataset saved at coco/annotations/instances_train2017_small.json


In [4]:
!pip install albumentations==1.3.0


Collecting albumentations==1.3.0
  Downloading albumentations-1.3.0-py3-none-any.whl.metadata (34 kB)
Collecting qudida>=0.0.4 (from albumentations==1.3.0)
  Downloading qudida-0.0.4-py3-none-any.whl.metadata (1.5 kB)
Downloading albumentations-1.3.0-py3-none-any.whl (123 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.5/123.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Installing collected packages: qudida, albumentations
  Attempting uninstall: albumentations
    Found existing installation: albumentations 2.0.3
    Uninstalling albumentations-2.0.3:
      Successfully uninstalled albumentations-2.0.3
Successfully installed albumentations-1.3.0 qudida-0.0.4


In [5]:
import cv2
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
import json

# Updated transform with size as a tuple
transform = A.Compose([
    A.RandomResizedCrop(height=640, width=640, scale=(0.5, 1.0), ratio=(0.75, 1.333), p=1.0),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.GaussNoise(var_limit=(10, 50), p=0.3),
    ToTensorV2()
])

# Load filtered dataset
with open(filtered_annotation_path, 'r') as f:
    filtered_coco_data = json.load(f)

image_dir = "coco/train2017/"
augmented_dir = "coco/train2017_augmented/"
os.makedirs(augmented_dir, exist_ok=True)

for img_info in filtered_coco_data['images']:
    img_path = os.path.join(image_dir, img_info['file_name'])
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    augmented = transform(image=image)
    aug_img = augmented['image']

    # Convert the PyTorch tensor to a NumPy array
    aug_img = aug_img.permute(1, 2, 0).numpy()
    # permute to change the order of dimensions from (C, H, W) to (H, W, C)
    # .numpy() converts the tensor to a NumPy array

    aug_img_path = os.path.join(augmented_dir, img_info['file_name'])
    cv2.imwrite(aug_img_path, aug_img)

print(f"Augmented images saved in {augmented_dir}")


Augmented images saved in coco/train2017_augmented/


In [6]:
# Clone the RT-DETR repository
!git clone https://github.com/lyuwenyu/RT-DETR.git




Cloning into 'RT-DETR'...
remote: Enumerating objects: 1010, done.[K
remote: Counting objects: 100% (202/202), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 1010 (delta 136), reused 116 (delta 116), pack-reused 808 (from 1)[K
Receiving objects: 100% (1010/1010), 624.62 KiB | 3.55 MiB/s, done.
Resolving deltas: 100% (489/489), done.


In [7]:
# Navigate into the RT-DETR directory
%cd RT-DETR

# Install the required packages
%cd /content/RT-DETR/rtdetr_pytorch/
!pip install -r requirements.txt

/content/RT-DETR
/content/RT-DETR/rtdetr_pytorch
Collecting torch==2.0.1 (from -r requirements.txt (line 1))
  Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchvision==0.15.2 (from -r requirements.txt (line 2))
  Downloading torchvision-0.15.2-cp311-cp311-manylinux1_x86_64.whl.metadata (11 kB)
Collecting onnx==1.14.0 (from -r requirements.txt (line 3))
  Downloading onnx-1.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Collecting onnxruntime==1.15.1 (from -r requirements.txt (line 4))
  Downloading onnxruntime-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1->-r requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1->-r requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu11-1

In [1]:
# Train RT-DETR using the modified configuration
!python /content/RT-DETR/rtdetr_pytorch/tools/train.py -c /content/RT-DETR/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml


2025-02-09 13:23:32.781046: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739107413.085504   10599 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739107413.169014   10599 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-09 13:23:33.782128: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Not init distributed mode.
Start training
Downloading: "https://github.com/lyuwenyu/storage/releases/download/v0.1/Re

In [None]:
!ls /content/coco/annotations/

captions_train2017.json   instances_train2017_small.json   person_keypoints_val2017.json
captions_val2017.json	  instances_val2017.json
instances_train2017.json  person_keypoints_train2017.json
