In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from collections import defaultdict

In [2]:
# with open("code/object_detection_coco/coco/annotations/instances_train2017.json", 'r', encoding='utf-8') as f:
#     train2017 = json.load(f)

with open("code/object_detection_coco/coco/annotations/instances_val2017.json", 'r', encoding='utf-8') as f:
    val2017 = json.load(f)

In [None]:
selected_ids = [46, 25, 40, 79, 61, 19, 5, 9, 75, 35]
selected_categories  = []
for cat in val2017['categories']:
    if cat['id'] in selected_ids:
        selected_categories.append(cat)

selected_categories

[{'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'animal', 'id': 19, 'name': 'horse'},
 {'supercategory': 'animal', 'id': 25, 'name': 'giraffe'},
 {'supercategory': 'sports', 'id': 35, 'name': 'skis'},
 {'supercategory': 'sports', 'id': 40, 'name': 'baseball glove'},
 {'supercategory': 'kitchen', 'id': 46, 'name': 'wine glass'},
 {'supercategory': 'food', 'id': 61, 'name': 'cake'},
 {'supercategory': 'electronic', 'id': 75, 'name': 'remote'},
 {'supercategory': 'appliance', 'id': 79, 'name': 'oven'}]

In [8]:
name = [name['name'] for name in selected_categories]
name

['airplane',
 'boat',
 'horse',
 'giraffe',
 'skis',
 'baseball glove',
 'wine glass',
 'cake',
 'remote',
 'oven']

In [9]:
selected_categories_ids = {c["id"] for c in selected_categories}
selected_categories_ids

{5, 9, 19, 25, 35, 40, 46, 61, 75, 79}

In [10]:
def filter_coco(coco, selected_category_ids):
    anns = [
        ann for ann in coco["annotations"]
        if ann["category_id"] in selected_category_ids
    ]

    img_ids = {ann["image_id"] for ann in anns}

    imgs = [
        img for img in coco["images"]
        if img["id"] in img_ids
    ]

    cats = [
        c for c in coco["categories"]
        if c["id"] in selected_category_ids
    ]

    return {
        "images": imgs,
        "annotations": anns,
        "categories": cats
    }


In [11]:
train2017_new = filter_coco(val2017, selected_categories_ids)

In [12]:
print(len(train2017_new['images']))

1125


In [None]:
annotations = train2017_new["annotations"]
categories = train2017_new["categories"]

# Map category_id -> category_name
cat_id_to_name = {
    cat["id"]: cat["name"] for cat in categories
}

# map category_id -> name
cat_id_to_name = {c["id"]: c["name"] for c in categories}

# category_id -> set(image_id)
images_per_category = defaultdict(set)

for ann in annotations:
    images_per_category[ann["category_id"]].add(ann["image_id"])

data = []
for cat_id, img_ids in images_per_category.items():
    data.append({
        "category_id": cat_id,
        "category_name": cat_id_to_name.get(cat_id, "unknown"),
        "num_images": len(img_ids)
    })

df = pd.DataFrame(data).sort_values(
    by="num_images", ascending=False
).reset_index(drop=True)

In [15]:
df

Unnamed: 0,category_id,category_name,num_images
0,75,remote,145
1,19,horse,128
2,61,cake,124
3,9,boat,121
4,35,skis,120
5,79,oven,115
6,46,wine glass,110
7,25,giraffe,101
8,40,baseball glove,100
9,5,airplane,97


In [16]:
train2017_new['categories']

[{'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'animal', 'id': 19, 'name': 'horse'},
 {'supercategory': 'animal', 'id': 25, 'name': 'giraffe'},
 {'supercategory': 'sports', 'id': 35, 'name': 'skis'},
 {'supercategory': 'sports', 'id': 40, 'name': 'baseball glove'},
 {'supercategory': 'kitchen', 'id': 46, 'name': 'wine glass'},
 {'supercategory': 'food', 'id': 61, 'name': 'cake'},
 {'supercategory': 'electronic', 'id': 75, 'name': 'remote'},
 {'supercategory': 'appliance', 'id': 79, 'name': 'oven'}]

In [20]:
len(train2017_new['images'])

1125

In [18]:
with open('code/object_detection_coco/coco/annotations/instances_val2017_10cls.json', 'w', encoding='utf-8') as f:
    json.dump(train2017_new, f, ensure_ascii=False, indent=4)

In [None]:
# import json
# import os
# import shutil
# from tqdm import tqdm

# # paths
# json_path = "code/object_detection_coco/coco/annotations/instances_val2017_10cls.json"
# src_img_dir = "code/object_detection_coco/coco/val2017"
# dst_img_dir = "code/object_detection_coco/coco/val2017_10cls"

# os.makedirs(dst_img_dir, exist_ok=True)

# # load json
# with open(json_path, "r") as f:
#     coco = json.load(f)

# images = coco["images"]

# # copy images
# for img in tqdm(images):
#     file_name = img["file_name"]
#     src_path = os.path.join(src_img_dir, file_name)
#     dst_path = os.path.join(dst_img_dir, file_name)

#     if not os.path.exists(src_path):
#         print(f"[WARN] Missing image: {file_name}")
#         continue

#     shutil.copy2(src_path, dst_path)

100%|██████████| 1125/1125 [00:00<00:00, 1200.79it/s]
