In [3]:
import json
import os
import shutil

# Paths
TRAIN_IMG_DIR = './dataset/train/'
VALID_IMG_DIR = './dataset/valid/'
VAL_JSON = './dataset/valid/_annotations.coco.json'

# 1. Load the new validation JSON we created
with open(VAL_JSON, 'r') as f:
    val_data = json.load(f)

# 2. Ensure valid directory exists
os.makedirs(VALID_IMG_DIR, exist_ok=True)

# 3. Move the images mentioned in the validation JSON from train to valid
moved_count = 0
for img_entry in val_data['images']:
    file_name = img_entry['file_name']
    src = os.path.join(TRAIN_IMG_DIR, file_name)
    dst = os.path.join(VALID_IMG_DIR, file_name)
    
    if os.path.exists(src):
        shutil.move(src, dst)
        moved_count += 1

print(f"✅ Successfully moved {moved_count} images to the valid folder.")

✅ Successfully moved 30 images to the valid folder.


In [None]:
import os
from rfdetr import RFDETRMedium  # or RFDETRBase/Large depending on your needs

def main():
    # 1. Configuration
    DATASET_PATH = "./dataset" # Path to your COCO folder (must have train/ and valid/)
    OUTPUT_PATH = "./output"
    
    # Ensure output directory exists
    os.makedirs(OUTPUT_PATH, exist_ok=True)

    # 2. Initialize Model with 2 Classes
    # Setting use_ema=False makes weights much easier to load later
    model = RFDETRMedium() 

    print("--- Starting Training: 2 Classes (Lightbulb, Sea Shell) ---")
    
    # 3. Training Loop
    # Adjust batch_size and grad_accum_steps so their product is 16
    # Example: batch_size 4 * grad_accum 4 = 16 (Standard for RF-DETR)
    model.train(
        dataset_dir=DATASET_PATH,
        epochs=100,             # Aim for 100, but 50 is enough for CVAT
        batch_size=2,          
        grad_accum_steps=8,     # Effective batch size of 16
        lr=2e-5,                # Conservative learning rate for stability
        output_dir=OUTPUT_PATH,
        early_stopping=False,   # Keep it running to avoid the save-error
        resolution=640
    )

    print(f"--- Training Complete. Weights saved to {OUTPUT_PATH} ---")

if __name__ == "__main__":
    main()

Using a different number of positional encodings than DINOv2, which means we're not loading DINOv2 backbone weights. This is not a problem if finetuning a pretrained RF-DETR model.
Using patch size 16 instead of 14, which means we're not loading DINOv2 backbone weights. This is not a problem if finetuning a pretrained RF-DETR model.
Loading pretrain weights
--- Starting Training: 2 Classes (Lightbulb, Sea Shell) ---
TensorBoard logging initialized. To monitor logs, use 'tensorboard --logdir ./output' and open http://localhost:6006/ in browser.
Not using distributed mode
git:
  sha: N/A, status: clean, branch: N/A

Namespace(num_classes=2, grad_accum_steps=8, amp=True, lr=2e-05, lr_encoder=0.00015, batch_size=2, weight_decay=0.0001, epochs=100, lr_drop=100, clip_max_norm=0.1, lr_vit_layer_decay=0.8, lr_component_decay=0.7, do_benchmark=False, dropout=0, drop_path=0.0, drop_mode='standard', drop_schedule='constant', cutoff_epoch=0, pretrained_encoder=None, pretrain_weights='rf-detr-mediu

fatal: not a git repository (or any of the parent directories): .git


Epoch: [0]  [ 0/10]  eta: 0:00:21  lr: 0.000020  class_error: 100.00  loss: 8.4111 (8.4111)  loss_ce: 1.1131 (1.1131)  loss_bbox: 0.2782 (0.2782)  loss_giou: 0.2612 (0.2612)  loss_ce_0: 1.1911 (1.1911)  loss_bbox_0: 0.2375 (0.2375)  loss_giou_0: 0.2226 (0.2226)  loss_ce_1: 1.1453 (1.1453)  loss_bbox_1: 0.3021 (0.3021)  loss_giou_1: 0.2893 (0.2893)  loss_ce_2: 1.1375 (1.1375)  loss_bbox_2: 0.2789 (0.2789)  loss_giou_2: 0.2579 (0.2579)  loss_ce_enc: 1.1638 (1.1638)  loss_bbox_enc: 0.2614 (0.2614)  loss_giou_enc: 0.2710 (0.2710)  loss_ce_unscaled: 1.1131 (1.1131)  class_error_unscaled: 100.0000 (100.0000)  loss_bbox_unscaled: 0.0556 (0.0556)  loss_giou_unscaled: 0.1306 (0.1306)  cardinality_error_unscaled: 1.0000 (1.0000)  loss_ce_0_unscaled: 1.1911 (1.1911)  loss_bbox_0_unscaled: 0.0475 (0.0475)  loss_giou_0_unscaled: 0.1113 (0.1113)  cardinality_error_0_unscaled: 35.5000 (35.5000)  loss_ce_1_unscaled: 1.1453 (1.1453)  loss_bbox_1_unscaled: 0.0604 (0.0604)  loss_giou_1_unscaled: 0.1447 (

Exception ignored in: <function _releaseLock at 0x151aae0a8540>
Traceback (most recent call last):
  File "/usr/lib/python3.12/logging/__init__.py", line 243, in _releaseLock
    def _releaseLock():
    
KeyboardInterrupt: 
