In [174]:
import json
import numpy as np
import os
import glob
from pathlib import Path
import cv2

In [175]:
### Covert from ActiveLoop Data Lake format to CoCo

In [176]:
#import deeplake
#import hub

In [177]:
# Convert custom Dataset to CoCO

In [183]:
def convert_to_coco(dir_path:"Path with images and labels", jsons:"all the annotatons json files"):
    max_objs=200
    coco_ann={}
    ## Add all the known possible categories and their ids
    cat_dict={"Ballon":1}
    coco_ann["categories"]=[{"supercategory": "Things","id": 1,"name": "Ballon"}]
    coco_ann["images"]=[]
    coco_ann["annotations"]=[]
    idx=0
    for file in jsons:
        with open(file,'r') as f:
            ann=json.load(f)
            for key in ann.keys():
                idx+=1
                content=ann[key]
                filename=content['filename']
                img=cv2.imread(os.path.join(dir_path,filename))
                assert img is not None, f"Image {filename} is None"
                h,w,_=img.shape
                coco_ann["images"].append({"file_name": filename,"height": h,"width": w,"id": idx})
                for obj_idx,(name,inst) in enumerate(content['regions'].items()):
                    obj={}
                    obj["segmentation"]=[]
                    all_xs=inst['shape_attributes']['all_points_x']
                    all_ys=inst['shape_attributes']['all_points_y']
                    minx=min(all_xs)
                    miny=min(all_ys)
                    maxx=max(all_xs)
                    maxy=max(all_ys)
                    w=maxx-minx
                    h=maxy-miny
                    segm=list(zip(all_xs,all_ys))
                    obj["segmentation"].append([item for sublist in segm for item in sublist])
                    obj["area"]=w*h
                    obj["iscrowd"]=0
                    obj["image_id"]=idx
                    obj["bbox"]=[minx,miny,w,h]
                    obj["category_id"]=cat_dict["Ballon"]
                    obj["id"]=idx*max_objs+obj_idx
                    coco_ann["annotations"].append(obj)
    return coco_ann

In [184]:
dir_path="/home/asad/dev/Transformers-Tutorials/DETR/balloon_dataset/balloon/"

In [185]:
## Training coco

In [186]:
path=Path(dir_path+'train')
jsons=list(path.glob('*.json'))

In [187]:
converted_ann=convert_to_coco(dir_path+'train',jsons)

In [188]:
print(f'Length of Training images {len(converted_ann["images"])}')

Length of Training images 61


In [189]:
with open(os.path.join(dir_path,'train_coco.json'), 'w') as f:
    json.dump(converted_ann, f)

In [190]:
## Val Coco

In [191]:
path=Path(dir_path+'val')
jsons=list(path.glob('*.json'))

In [192]:
converted_ann=convert_to_coco(dir_path+'val',jsons)

In [193]:
print(f'Length of Val images {len(converted_ann["images"])}')

Length of Val images 13


In [194]:
with open(os.path.join(dir_path,'val_coco.json'), 'w') as f:
    json.dump(converted_ann, f)

In [195]:
converted_ann

{'categories': [{'supercategory': 'Things', 'id': 1, 'name': 'Ballon'}],
 'images': [{'file_name': '24631331976_defa3bb61f_k.jpg',
   'height': 2048,
   'width': 1323,
   'id': 1},
  {'file_name': '16335852991_f55de7958d_k.jpg',
   'height': 2048,
   'width': 1536,
   'id': 2},
  {'file_name': '14898532020_ba6199dd22_k.jpg',
   'height': 1365,
   'width': 2048,
   'id': 3},
  {'file_name': '8053085540_a72bd21a64_k.jpg',
   'height': 1595,
   'width': 2048,
   'id': 4},
  {'file_name': '6810773040_3d81036d05_k.jpg',
   'height': 1536,
   'width': 2048,
   'id': 5},
  {'file_name': '5603212091_2dfe16ea72_b.jpg',
   'height': 1024,
   'width': 768,
   'id': 6},
  {'file_name': '5555705118_3390d70abe_b.jpg',
   'height': 1024,
   'width': 683,
   'id': 7},
  {'file_name': '4838031651_3e7b5ea5c7_b.jpg',
   'height': 675,
   'width': 1024,
   'id': 8},
  {'file_name': '4581425993_72b9b15fc0_b.jpg',
   'height': 1024,
   'width': 776,
   'id': 9},
  {'file_name': '3825919971_93fb1ec581_b.jpg'