# Train on CoCo

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
from ultralytics import YOLO

model = YOLO("runs/detect/train/weights/best.pt")  # pretrained on COCO

model.train(
    data="datasets/coco_subset/data.yaml",   # built-in COCO dataset
    epochs=30,
    imgsz=640,
    batch=16,
    optimizer="AdamW",
    lr0=0.001,
    lrf=0.1,
    patience=10,
    cos_lr=True,
    device="cuda",
    amp=False,
)

New https://pypi.org/project/ultralytics/8.3.231 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.229  Python-3.10.0 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 6140MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=False, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=datasets/coco_subset/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.1, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs/detect/train/weights/best.pt, momentum=0.937, mosaic=1.0, mul

In [None]:
from ultralytics import YOLO
model = YOLO("runs/detect/train/weights/best.pt")
model.val(data="datasets/coco_subset/data.yaml")

In [None]:
import os, shutil
from tqdm import tqdm

# ------------ SETTINGS -------------
SRC_IMAGES = "datasets/coco_subset/images/val2017"
SRC_LABELS = "datasets/coco_subset/labels/val2017"

OUT_ROOT = "datasets/coco_subset"
OUT_IMAGES = os.path.join(OUT_ROOT, "images/val")
OUT_LABELS = os.path.join(OUT_ROOT, "labels/val")

os.makedirs(OUT_IMAGES, exist_ok=True)
os.makedirs(OUT_LABELS, exist_ok=True)

# Your 12 COCO class IDs
TARGET_CLASSES = {0, 24, 25, 39, 41, 42, 44, 56, 63, 64, 67, 73}

# Map old COCO ids -> new YOLO (0-11)
CLASS_ID_MAP = {c: i for i, c in enumerate(sorted(TARGET_CLASSES))}

# ------------------------------------

kept = 0
skipped = 0

for label_file in tqdm(os.listdir(SRC_LABELS)):
    if not label_file.endswith(".txt"):
        continue

    label_path = os.path.join(SRC_LABELS, label_file)
    image_path = os.path.join(SRC_IMAGES, label_file.replace(".txt", ".jpg"))

    if not os.path.exists(image_path):
        skipped += 1
        continue

    with open(label_path, "r") as f:
        lines = f.readlines()

    new_lines = []
    for line in lines:
        parts = line.strip().split()
        cls_id = int(parts[0])

        if cls_id in TARGET_CLASSES:
            new_cls = CLASS_ID_MAP[cls_id]
            parts[0] = str(new_cls)
            new_lines.append(" ".join(parts))

    # Only keep image if it contains at least 1 valid object
    if len(new_lines) > 0:
        shutil.copy(image_path, os.path.join(OUT_IMAGES, os.path.basename(image_path)))

        with open(os.path.join(OUT_LABELS, label_file), "w") as f:
            f.write("\n".join(new_lines))

        kept += 1
    else:
        skipped += 1

print("\nDONE")
print(f"Kept images: {kept}")
print(f"Skipped images: {skipped}")
print(f"New dataset at: {OUT_ROOT}")
