# Imports

In [4]:
import matplotlib.pyplot as plt
import PIL
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import json

Init Plugin
Init Graph Optimizer
Init Kernel


# Import Datasets

In [5]:
# import all COCO datasets

# file path
paths = {
    'train_anns' : os.path.join('data', 'annotations', 'instances_train2017.json'),
    'val_anns' : os.path.join('data', 'annotations', 'instances_val2017.json')
}

train_anns_file = open(paths['train_anns'])
train_anns = json.load(train_anns_file)

val_anns_file = open(paths['val_anns'])
val_anns = json.load(val_anns_file)

In [6]:
val_anns.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [9]:
# helper functions on selecting / viewing the label types

"""
A script to pick categories to a new dictionary
"""
def pick_categories_by_name(categories_list):
    
    new_categories_list = []
    
    for category in categories_list:
        while True:
            picking_input = input(f"name: { category['name'] }, y/n: ")
            if picking_input.lower() == 'y':
                new_categories_list.append(category)
                break
            elif picking_input == 'n':
                break
    return new_categories_list
            

"""
Print list of all categories names
"""
def print_categories_names(categories_list):
    print('[', end='')
    for category in categories_list[:-1]:
        print(f"\'{category['name']}\', ", end='')
    print(f"\'{categories_list[-1]['name']}\']")  

In [16]:
print_categories_names(train_anns['categories'])

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


# List of Labels

In [44]:
# get list of all labels
all_superlabels = [ item['supercategory'] for item in train_anns['categories'] ]
all_labels = [ item['name'] for item in train_anns['categories'] ]
all_label_id = [ item['id'] for item in train_anns['categories'] ]

labels_to_label_id = dict(zip(all_labels, all_label_id))
labels_to_superlabels = dict(zip(all_labels, all_superlabels))

# Save COCO Labels to local json file

In [49]:
# save label lists to json file
import json
with open("coco_categories.json", "w") as fp:
    json.dump(labels_to_label_id, fp)
with open("coco_super_categories.json", "w") as fp:
    json.dump(labels_to_superlabels, fp)

# List of Image paths mapped to image ids

In [31]:
# list of train / val img paths
list_train_img_paths = [os.path.join('data', 'train2017', image['file_name']) for image in train_anns['images']]
list_train_img_ids = [image['id'] for image in train_anns['images']]
list_val_img_paths = [os.path.join('data', 'val2017', image['file_name']) for image in val_anns['images']]
list_val_img_ids = [image['id'] for image in val_anns['images']]

# dictionary of { image id : path }
dict_train_imgs = dict(zip(list_train_img_ids, list_train_img_paths))
dict_val_imgs = dict(zip(list_val_img_ids, list_val_img_paths))

In [33]:
dict_val_imgs

{397133: 'data/val2017/000000397133.jpg',
 37777: 'data/val2017/000000037777.jpg',
 252219: 'data/val2017/000000252219.jpg',
 87038: 'data/val2017/000000087038.jpg',
 174482: 'data/val2017/000000174482.jpg',
 403385: 'data/val2017/000000403385.jpg',
 6818: 'data/val2017/000000006818.jpg',
 480985: 'data/val2017/000000480985.jpg',
 458054: 'data/val2017/000000458054.jpg',
 331352: 'data/val2017/000000331352.jpg',
 296649: 'data/val2017/000000296649.jpg',
 386912: 'data/val2017/000000386912.jpg',
 502136: 'data/val2017/000000502136.jpg',
 491497: 'data/val2017/000000491497.jpg',
 184791: 'data/val2017/000000184791.jpg',
 348881: 'data/val2017/000000348881.jpg',
 289393: 'data/val2017/000000289393.jpg',
 522713: 'data/val2017/000000522713.jpg',
 181666: 'data/val2017/000000181666.jpg',
 17627: 'data/val2017/000000017627.jpg',
 143931: 'data/val2017/000000143931.jpg',
 303818: 'data/val2017/000000303818.jpg',
 463730: 'data/val2017/000000463730.jpg',
 460347: 'data/val2017/000000460347.jpg

# Dict of Image Anns mapped to image ids

In [41]:
# list of annotation image ids
list_train_anns_img_ids = [ anns['image_id'] for anns in train_anns['annotations'] ]
list_val_anns_img_ids = [ anns['image_id'] for anns in val_anns['annotations'] ]

# list of annotation bounding boxes
list_train_anns_bbox = [ anns['bbox'] for anns in train_anns['annotations'] ]
list_val_anns_bbox = [ anns['bbox'] for anns in val_anns['annotations'] ]

# lsit of annotation category ids
list_train_anns_category_ids = [ anns['category_id'] for anns in train_anns['annotations'] ]
list_val_anns_category_ids = [ anns['category_id'] for anns in val_anns['annotations'] ]

# dict of category ids mapped to image ids
dict_train_category_ids = dict(zip(list_train_anns_img_ids, list_train_anns_category_ids))
dict_val_category_ids = dict(zip(list_val_anns_img_ids, list_val_anns_category_ids))

# dict of bounding boxes mapped to image ids
dict_train_bbox = dict(zip(list_train_anns_img_ids, list_train_anns_bbox))
dict_val_bbox = dict(zip(list_val_anns_img_ids, list_val_anns_bbox))
