# Inspect LabelAR COCO-formatted Dataset

Use this notebook to inspect properties and bounding box labels of a dataset created from ./scripts/create_dataset(.py||.sh)


In [6]:
from contextlib import redirect_stdout
import glob
import json
import os
from pathlib import Path
import subprocess
import sys
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline
from PIL import Image
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils

In [7]:
root_dir = Path(subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).strip().decode("utf-8"))
os.chdir(root_dir)

In [None]:
## Configure paths for the labelAR-collected dataset

This should be a dataset that was collected with LabelAR and then converted to MS COCO format via the ./scripts/create_dataset.sh (which calls ./scripts/create_da)

In [13]:
ds_name = "uist-mugs-v2"
split = "val"
data_dir = root_dir / f"training/data/{ds_name}-{split}"
img_path = data_dir / f"images/{ds_name}_{split}"
ann_file = data_dir / f"annotations/instances_{ds_name}_{split}.json"

coco_all = COCO(ann_file)
print("ann_file: ", ann_file)
print("img_path: ", img_path)
coco_all.dataset.keys()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
ann_file:  /home/gbiamby/school/labelar-det-demo/training/data/uist-mugs-v2-val/annotations/instances_uist-mugs-v2_val.json
img_path:  /home/gbiamby/school/labelar-det-demo/training/data/uist-mugs-v2-val/images/uist-mugs-v2_val


dict_keys(['images', 'annotations', 'categories'])

## Display some stats about the labelAR-collected COCO-formatted dataset:

In [14]:
cats = coco_all.dataset["categories"]
print("Cats: ", cats)
print("Total images: ", len(coco_all.dataset["images"]))
for cat in cats:
    imgs = coco_all.loadImgs(ids=coco_all.getImgIds(catIds=[cat["id"]]))
    print(f"Found {len(imgs)} images for cat.id: {cat['id']}, cat.name: '{cat['name']}'")

Cats:  [{'supercategory': '', 'id': 0, 'name': 'background'}, {'supercategory': '', 'id': 1, 'name': 'blue'}, {'supercategory': '', 'id': 2, 'name': 'cal'}, {'supercategory': '', 'id': 3, 'name': 'dunder'}, {'supercategory': '', 'id': 4, 'name': 'flower'}, {'supercategory': '', 'id': 5, 'name': 'gold'}]
Total images:  179
Found 0 images for cat.id: 0, cat.name: 'background'
Found 78 images for cat.id: 1, cat.name: 'blue'
Found 77 images for cat.id: 2, cat.name: 'cal'
Found 72 images for cat.id: 3, cat.name: 'dunder'
Found 72 images for cat.id: 4, cat.name: 'flower'
Found 72 images for cat.id: 5, cat.name: 'gold'


## Inspect all bounding box labels for one category at a time

The output from the previous cell lists all the category id's and names. You can enter one of the category names into the `cat_name` variable below and then run the cell to output all images that have that category annotated somewhere in the image. We use the pycocotools API to also highlight the bounding box(es) for `cat_name`

In [17]:
# Configure options:
cat_name = "flower"

In [None]:
# Display some stats about the coco-formatted dataset:
cat_ids = coco_all.getCatIds(catNms=[cat_name]);
print("cat_ids: ", cat_ids)
coco_imgs = coco_all.loadImgs(ids=coco_all.getImgIds(catIds=cat_ids))

# Display and highlight all annotations for cat_name:
for coco_im in coco_imgs:
    im = Image.open(img_path / coco_im['file_name'])
    fig = plt.figure(figsize=(10, 10)) #change display size of image here
    plt.title(f"{coco_im['file_name']} - Image id: {coco_im['id']}")
    plt.axis('off')
    imshow(im)
    # display annotations
    ann_ids = coco_all.getAnnIds(imgIds=[coco_im['id']], catIds=cat_ids)
    anns = coco_all.loadAnns(ann_ids)
    coco_all.showAnns(anns)