In [1]:
import os
from pycocotools.coco import COCO
import cv2

def crop_test_data(
    coco_json: str,
    img_dir: str,
    output_dir: str,
    margin: int = 10
):
    """
    Args:
        coco_json:  原始 COCO-format JSON（含 images + annotations + categories）
        img_dir:    存放待裁剪图片的目录，名称要和 JSON 里的 file_name 对应
        output_dir: 裁剪结果输出目录，会自动创建
        margin:     在每个 bbox 周围额外扩展的像素
    """
    # 准备
    os.makedirs(output_dir, exist_ok=True)
    coco = COCO(coco_json)
    all_img_ids = coco.getImgIds()
    all_infos   = coco.loadImgs(all_img_ids)

    # 建立 file_name -> img_info 映射
    fname2info = {info['file_name']: info for info in all_infos}

    # 遍历目录里每一张图片
    for fname in os.listdir(img_dir):
        if not fname.lower().endswith(('.jpg','.png','jpeg')):
            continue

        if fname not in fname2info:
            print(f"⚠️ 在 JSON 中找不到 `{fname}`，跳过")
            continue

        info   = fname2info[fname]
        img_id = info['id']
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns    = coco.loadAnns(ann_ids)

        img_path = os.path.join(img_dir, fname)
        img = cv2.imread(img_path)
        if img is None:
            print(f"⚠️ 无法加载图像 `{img_path}`，跳过")
            continue

        # 对这张图的每个 annotation 做裁剪
        for i, ann in enumerate(anns):
            x,y,w,h = ann['bbox']
            x1 = max(int(x - margin), 0)
            y1 = max(int(y - margin), 0)
            x2 = min(int(x + w + margin), img.shape[1])
            y2 = min(int(y + h + margin), img.shape[0])

            crop = img[y1:y2, x1:x2]
            base, _ = os.path.splitext(fname)
            out_name = f"{base}_ann{ann['id']:06d}.jpg"
            out_path = os.path.join(output_dir, out_name)
            cv2.imwrite(out_path, crop)
            print(f"[Saved] {out_name}")

    print("✅ 全部裁剪完成！")

if __name__ == "__main__":
    coco_json  = "refcoco/instances.json"                                 # 原始 COCO JSON
    img_dir    = "/root/data_preprocessing/box_labeled/test_data" # 你的测试图片目录
    output_dir = "test_data_croped"                                 # 裁剪图输出目录
    margin     = 10                                               # bbox 扩大 10px

    crop_test_data(coco_json, img_dir, output_dir, margin)

loading annotations into memory...
Done (t=3.70s)
creating index...
index created!
[Saved] COCO_train2014_000000098304_ann003007.jpg
[Saved] COCO_train2014_000000098304_ann099893.jpg
[Saved] COCO_train2014_000000098304_ann108703.jpg
[Saved] COCO_train2014_000000098304_ann115415.jpg
[Saved] COCO_train2014_000000098304_ann116865.jpg
[Saved] COCO_train2014_000000098304_ann120114.jpg
[Saved] COCO_train2014_000000098304_ann1490146.jpg
[Saved] COCO_train2014_000000098304_ann1511501.jpg
[Saved] COCO_train2014_000000098304_ann1513367.jpg
[Saved] COCO_train2014_000000098304_ann2099764.jpg
[Saved] COCO_train2014_000000098304_ann2222700.jpg
[Saved] COCO_train2014_000000098304_ann2222956.jpg
✅ 全部裁剪完成！
