In [1]:
from pathlib import Path
import json
from tqdm import tqdm
import shutil
from collections import defaultdict

In [2]:
root = Path("../data/raw/DocLayNet_core")

In [7]:
target_classes = ["Picture", "Caption", "Table"]
categories = ['scientific_articles','patents']

In [8]:
def coco_to_yolo(coco_path, out_label_dir, src_img_dir, out_img_dir, limit):
    with open(coco_path, 'r') as f:
        coco = json.load(f)

    cat_map = {c['id']: c['name'] for c in coco['categories']}
    class_to_idx = {name: i for i, name in enumerate(target_classes[:2])}
    count = 0

    anns_by_image = defaultdict(list)
    for a in tqdm(coco['annotations'], desc='Appending to dict'):
        anns_by_image[a['image_id']].append(a)

    for img in tqdm(coco['images'], desc='Processing imgs'):
        if(count >= limit): break
        if(img["doc_category"] not in categories): continue
        
        file_name = Path(img["file_name"]).stem
        img_w, img_h = img['width'], img['height']
        anns = anns_by_image[img['id']]
        yolo_lines = []

        for ann in anns:
            cls_name = cat_map[ann['category_id']]
            if cls_name not in target_classes:
                continue
            if cls_name == "Table": cls_name = "Picture"
            x, y, w, h = ann['bbox']
            x_c = (x + w/2) / img_w
            y_c = (y + h/2) / img_h
            w /= img_w
            h /= img_h
            yolo_lines.append(f"{class_to_idx[cls_name]} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}")
        
        if not yolo_lines: continue

        (out_label_dir / f"{file_name}.txt").write_text("\n".join(yolo_lines))

        src_img = src_img_dir / f"{file_name}.png"
        dst_img = out_img_dir / f"{file_name}.png"

        count += 1
        if src_img.exists():
            shutil.copy2(src_img, dst_img)  # copy or move (use move if you prefer)
        else:
            print(f"Image not found for {file_name}")

In [9]:
coco_path_train = root / "COCO/train.json"
src_img_dir_train = root / "PNG"
out_label_dir_train = root / "train/labels"
out_img_dir_train = root / "train/PNG"

out_label_dir_train.mkdir(parents=True, exist_ok=True)
out_img_dir_train.mkdir(parents=True, exist_ok=True)

In [10]:
coco_to_yolo(coco_path_train, out_label_dir_train, src_img_dir_train, out_img_dir_train, 6000)

Appending to dict: 100%|██████████| 941123/941123 [00:02<00:00, 384765.36it/s] 
Processing imgs:  84%|████████▍ | 58375/69375 [00:16<00:03, 3495.16it/s]  


In [11]:
coco_path_test = root / "COCO/test.json"
src_img_dir_test = root / "PNG"
out_label_dir_test = root / "test/labels"
out_img_dir_test = root / "test/PNG"

out_label_dir_test.mkdir(parents=True, exist_ok=True)
out_img_dir_test.mkdir(parents=True, exist_ok=True)

In [12]:
coco_to_yolo(coco_path_test, out_label_dir_test, src_img_dir_test, out_img_dir_test, limit=1200)

Appending to dict: 100%|██████████| 66531/66531 [00:00<00:00, 752636.41it/s]
Processing imgs: 100%|██████████| 4999/4999 [00:01<00:00, 3746.32it/s] 


In [13]:
coco_path_val = root / "COCO/val.json"
coco_to_yolo(coco_path_val, out_label_dir_test, src_img_dir_test, out_img_dir_test, limit=1200-578)

Appending to dict: 100%|██████████| 99816/99816 [00:00<00:00, 3039992.51it/s]
Processing imgs:  88%|████████▊ | 5722/6489 [00:01<00:00, 3564.96it/s] 
