In [1]:
%pip install pycocotools

Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
import json
from pycocotools.coco import COCO
import cv2
from matplotlib import pyplot as plt


# 경로 설정
image_dir = r'/Users/patrick/Documents/final_project/ImageNet/val2017'
annotation_file = r'/Users/patrick/Documents/final_project/ImageNet/annotations/instances_val2017.json'
output_dir = './output'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)


def extract_coco_data(image_dir, annotation_file, output_dir):
    # COCO 데이터셋 로드
    coco = COCO(annotation_file)
    #print(coco)
    # 모든 이미지 ID 가져오기
    image_ids = coco.getImgIds()
    #print(image_ids)
    # 이미지와 객체 정보 저장할 딕셔너리
    data = []
    
    # 모든 이미지에 대해 반복
    for img_id in image_ids:
        img_info = coco.loadImgs(img_id)[0]
        img_file = os.path.join(image_dir, img_info['file_name'])
        
        # 이미지 읽기
        image = cv2.imread(img_file)
        if image is None:
            print("not\n")
            continue
        
        # 이미지의 객체들 가져오기
        ann_ids = coco.getAnnIds(imgIds=img_info['id'])
        anns = coco.loadAnns(ann_ids)
        
        # 객체 정보 저장할 리스트
        objects = []
        for ann in anns:
            bbox = ann['bbox']
            category_id = ann['category_id']
            category_name = coco.loadCats(category_id)[0]['name']
            objects.append({category_name: bbox})
        
        # 데이터 저장
        data.append({
            'image_id': img_info['id'],
            'file_name': img_info['file_name'],
            'objects': objects
        })
        
        # # 이미지와 객체 정보 시각화 (옵션)
        # for obj in objects:
        #     for label, bbox in obj.items():
        #         x, y, w, h = bbox
        #         cv2.rectangle(image, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2)
        #         cv2.putText(image, label, (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        
        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.show()
    
    # 결과 JSON 파일로 저장
    with open(os.path.join(output_dir, 'coco_data.json'), 'w') as f:
        json.dump(data, f, indent=4)


# 함수 실행
extract_coco_data(image_dir, annotation_file, output_dir)

loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
