# pycocotools 을 이용한 COCO dataset 살펴보기

In [1]:
import sys
IN_COLAB = 'google.colab' in sys.modules
print(IN_COLAB)

True


In [2]:
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    os.chdir('/content/drive/MyDrive/ColabNotebooks/2022_fall_bigdata/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

* Annotation  
2017 Train/Val annotations [241MB] http://images.cocodataset.org/annotations/annotations_trainval2017.zip
* Images
2017 Val images [5K/1GB] http://images.cocodataset.org/zips/val2017.zip

In [4]:
!pwd

/content/drive/MyDrive/ColabNotebooks/2022_fall_bigdata


In [5]:
dataDir='./data/annotations_trainval2017'
loc = '{}/annotations/'.format(dataDir)

In [6]:
loc

'./data/annotations_trainval2017/annotations/'

In [7]:
!ls $loc

captions_val2017.json		person_keypoints_val2017.json
instances_val2017.json		person_keypoints_val2017_small2.json
person_keypoints_val2017_.json	person_keypoints_val2017_small.json


## Annotation - instances

In [8]:
dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

In [9]:
annFile

'./data/annotations_trainval2017/annotations/instances_val2017.json'

In [10]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

loading annotations into memory...
Done (t=1.96s)
creating index...
index created!


In [11]:
coco

<pycocotools.coco.COCO at 0x7f60aed66090>

In [12]:
coco.getCatIds() #카테고리 index

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 27,
 28,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 67,
 70,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 84,
 85,
 86,
 87,
 88,
 89,
 90]

In [13]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())

In [14]:
cats

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},

In [15]:
len(cats)

80

In [16]:
#cats

In [17]:
cats[60]

{'supercategory': 'furniture', 'id': 67, 'name': 'dining table'}

In [18]:
cats[60]['id']

67

In [19]:
# for cat in cats:
#     print(cat['name'])

In [20]:
nms=[cat['name'] for cat in cats]

In [21]:
print(nms)

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [22]:
snms=[cat['supercategory'] for cat in cats]

In [23]:
set(snms)

{'accessory',
 'animal',
 'appliance',
 'electronic',
 'food',
 'furniture',
 'indoor',
 'kitchen',
 'outdoor',
 'person',
 'sports',
 'vehicle'}

In [24]:
nms = set([cat['supercategory'] for cat in cats])

In [25]:
print('COCO supercategories: \n{}'.format(' '.join(nms)))

COCO supercategories: 
food outdoor person animal vehicle furniture indoor accessory electronic kitchen sports appliance


* References
    * https://cocodataset.org/#download
    * https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoDemo.ipynb