In [1]:
import os
import json
import random
import matplotlib.pyplot as plt

import torch
from lvis import LVIS
from PIL import Image
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms import transforms
from torchvision.ops import box_convert

In [2]:
def append_categories(path_1, path_2, outPath):
    with open(path_1, "r") as f:
        eval = json.load(f)
    with open(path_2, "r") as f:
        metadata = json.load(f)

    eval["categories"] = metadata["categories"]
    with open(outPath, "w") as f:
        json.dump(eval, f)

In [3]:
def preprocess(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)
    for i in range(len(data["annotations"])):
        data["annotations"][i]["category_id"] = data["annotations"][i]["_category_id"]
    with open(json_path, "w") as f:
        json.dump(data, f)

In [4]:
dataPath = os.path.join(os.getcwd(), "data", "EgoObjects")
imgPath = os.path.join(dataPath, "images")

In [5]:
evalPath = os.path.join(dataPath, "ego_objects_eval.json")
categoriesPath = os.path.join(dataPath, "ego_objects_metadata.json")
outPath = os.path.join(dataPath, "ego_objects_eval_all.json")

if not os.path.exists(outPath):
    append_categories(evalPath, categoriesPath, outPath)
    preprocess(outPath)

In [6]:
lvis = LVIS(outPath)

In [8]:
img_ids = lvis.imgs.keys()
img_ids = sorted(img_ids, key=lambda x: random.random())

for img_id in img_ids:
    ann_ids = lvis.get_ann_ids(img_ids = [img_id])
    target = lvis.load_anns(ann_ids)

    path = lvis.load_imgs([img_id])[0]
    path = str(path["group_id"]) + "_" + str(path["video_id"]) + "_" + str(path["frame_id"]) + ".jpg"
    img = Image.open(os.path.join(imgPath, path)).convert('RGB')
    img = transforms.ToTensor()(img) * 255
    img = img.type(torch.uint8)
    
    nums = len(target)
    boxes, labels = [], []
    for i in range(nums):
        boxes.append(target[i]['bbox'])
        labels.append(target[i]['category_id'])
    
    labels = torch.tensor(labels).type(torch.uint8)
    boxes = torch.tensor(boxes).type(torch.uint8)
    
    print(boxes)
    boxes = box_convert(boxes, 'xywh', 'xyxy')
    print(boxes)
    
    img = draw_bounding_boxes(img, boxes, labels, colors=None, width=1)
    plt.imshow(img)
    plt.show()
    break

tensor([[ 41, 185,  54, 155],
        [ 15, 242,  60, 150],
        [247,  75,  89,  56],
        [119,  83,  77,  45]], dtype=torch.uint8)
tensor([[ 41, 185,  95,  84],
        [ 15, 242,  75, 136],
        [247,  75,  80, 131],
        [119,  83, 196, 128]], dtype=torch.uint8)


ValueError: Boxes need to be in (xmin, ymin, xmax, ymax) format. Use torchvision.ops.box_convert to convert them