# Object Detection

In [None]:
import os
from dataset import HFCocoDetection
from datasets import load_dataset

from training import TrainingCommand, TrainingArguments
from transformers import DeformableDetrForObjectDetection, DeformableDetrImageProcessor

%load_ext autoreload
%autoreload 2

In [None]:
base_dir = os.path.expanduser("~/DeepLearningProjects/mmdetection/data/coco")
model_checkpoint = "SenseTime/deformable-detr"

processor = DeformableDetrImageProcessor.from_pretrained(model_checkpoint, format="coco_detection")
dataset = HFCocoDetection(img_dir=f"{base_dir}/train2017", ann_file=f"{base_dir}/annotations/instances_train2017_mixed.json", processor=processor)

In [None]:
dataset[0]

In [None]:
config = DeformableDetrForObjectDetection.from_pretrained(
    pretrained_model_name_or_path=model_checkpoint, 
    num_labels=1,
    ignore_mismatched_sizes=True   # safely re-initialises the class head
)

training_args = TrainingArguments(
    output_dir="../../checkpoints/object-detection",
    per_device_train_batch_size=4,
    per_gpu_eval_batch_size=4,
    num_train_epochs=12,
    learning_rate=1e-4,
    weight_decay=1e-4,
    lr_scheduler_type="cosine",
    warmup_steps=2000,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    remove_unused_columns=False,     # **must** stay False for detection tasks
    fp16=False,                     # → bf16 if you’re on A100 / H100
)

cmd = TrainingCommand(
    model_checkpoint="SenseTime/deformable-detr",
    config=config,
    train_img_dir=f"{base_dir}/train2017",
    train_ann_file=f"{base_dir}/annotations/instances_train2017_mixed.json",
    val_img_dir=f"{base_dir}/val2017",
    val_ann_file=f"{base_dir}/annotations/instances_val2017_mixed.json",
    training_args=training_args
)

In [None]:
cmd.invoke()