In [112]:
from pycocotools.coco import COCO
from grcore.coco import ann, init_dataset_folder
from grcore.coco.dataset import CocoDataset
from grcore.coco.file_io import save_cocodict
import copy
from pathlib import Path
from grcore.common.utils import tqdm
from grcore.img import maskutils
import pandas as pd
from sklearn.model_selection import train_test_split
import shutil

In [113]:
dsettype = "train"
orig_root_path = Path("/data/dataset/car-parts-segmentation/orig")
dsetpath = orig_root_path / f"annotations/{dsettype}.json"
new_root_path = Path("/data/dataset/car-parts-segmentation/grformat")
newpath = new_root_path / f"annotations/{dsettype}_parts.json"

In [114]:
init_dataset_folder(new_root_path)

In [115]:
dset = COCO(str(dsetpath))

# Copy info
newinfo = copy.deepcopy(dset.dataset["info"])

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


## Correct licenses

In [116]:
licinfos = copy.deepcopy(dset.dataset["licenses"])
licinfos = {lic["id"]: lic for lic in licinfos}

licmap = {}
newlicinfos = []
for newid, oldid in enumerate(sorted(licinfos.keys()), 1):
    licinfo = licinfos[oldid]
    licinfo["id"] = newid
    licmap[oldid] = newid
    newlicinfos.append(licinfo) 

## Correct categories

In [117]:
catmap = {}
newcats = []

# newid==1 is whole object
catinfo = ann.build_cat_info(1,"car","car")
newcats.append(catinfo)

newid = 2
for oldid in sorted(dset.cats.keys()):
    # oldid==0 is background.
    # delete it
    if oldid == 0:
        continue

    catinfo = dset.cats[oldid]
    catinfo["id"] = newid
    catmap[oldid] = newid
    catinfo["supercategory"] = "car"
    newcats.append(catinfo)
    newid += 1    

## Correct images

In [118]:
imgmap = {}
newimgs = []

for newid, oldid in enumerate(sorted(dset.imgs.keys()), 1):
    imginfo = dset.imgs[oldid]
    imginfo["id"] = newid
    imgmap[oldid] = newid

    imginfo["license"] = licmap[imginfo["license"]]
    path = Path(imginfo["file_name"])
    imginfo["file_name"] = str(path.name)
    imginfo["coco_url"] = f"{dsettype}/{str(path.name)}"
    imginfo["image_url"] = str(path)
    imginfo["dataset"] = "Car Parts"
    newimgs.append(imginfo)

### Copy images

In [119]:
for newimg in newimgs:
    src = orig_root_path / newimg["image_url"]
    dst = new_root_path / newimg["coco_url"]
    shutil.copy(src, dst)

## Correct annotations

In [120]:
newanns = []
for newid, oldid in enumerate(sorted(dset.anns.keys()), 1):
    anninfo = dset.anns[oldid]
    
    # Skip background annotations
    if anninfo["category_id"] == 0:
        continue

    anninfo["id"] = newid
    anninfo["image_id"] = imgmap[anninfo["image_id"]]
    anninfo["category_id"] = catmap[anninfo["category_id"]]
    anninfo["is_occluded"] = False
    anninfo["occluders"] = []
    newanns.append(anninfo)

In [121]:
# Save and load as cocodset
newdset = ann.build_dset_info(newinfo, newlicinfos, newcats, newimgs, newanns)
save_cocodict(newdset,newpath)


Saving json: /data/dataset/car-parts-segmentation/grformat/annotations/train_parts.json


### Add spawn id and create wholes
We suppose only 1 instance per image



In [122]:
coco = CocoDataset(newpath)

loading annotations into memory...
Loading json: /data/dataset/car-parts-segmentation/grformat/annotations/train_parts.json
Done 0.12s
creating index...
index created!


In [123]:
print(pd.DataFrame.from_dict(coco.cats.values()))

    id               name supercategory
0    1                car           car
1    2        back_bumper           car
2    3         back_glass           car
3    4     back_left_door           car
4    5    back_left_light           car
5    6    back_right_door           car
6    7   back_right_light           car
7    8       front_bumper           car
8    9        front_glass           car
9   10    front_left_door           car
10  11   front_left_light           car
11  12   front_right_door           car
12  13  front_right_light           car
13  14               hood           car
14  15        left_mirror           car
15  16       right_mirror           car
16  17           tailgate           car
17  18              trunk           car
18  19              wheel           car


In [124]:
# Check instances per image
for imgid in coco.imgids:
    for catid in coco.catids:
        annids = coco.get_annids([catid], imgids=[imgid])
        if catid != 19 and len(annids) > 1:
            print(f"found more than 1 object instance on an image: {imgid}")


In [125]:
wholeanns = []
for imgid in tqdm(coco.imgids):
    annids = coco.get_annids(imgids=[imgid])
    if not annids:
        tqdm.write(f"no annotations for: {imgid}")
        continue
    anninfos = coco.load_anninfos(annids)
    rles = [coco.ann_to_rle(anninfo) for anninfo in anninfos]
    rle = maskutils.merge(rles)

    # Correct part anninfos
    for anninfo in anninfos:
        anninfo["spawn_id"] = 1

    # Create whole anninfo
    # annid == imgid because: 1. imgids are sorted; 2. one instance per image
    # catid == 1 beceause only car category
    anninfo = ann.build_ann_info(imgid, imgid, 1, rle, is_occluded=False, occluders=[], spawn_id=1)
    wholeanns.append(anninfo)

100%|██████████| 400/400 [00:00<00:00, 1559.65it/s]


In [126]:
# save back
save_cocodict(coco.gt.dataset, newpath)
wholepath = newpath.with_name(f"{dsettype}_wholes.json")
wholesdset = copy.deepcopy(coco.gt.dataset)
wholesdset["annotations"] = wholeanns
save_cocodict(wholesdset, wholepath)

Saving json: /data/dataset/car-parts-segmentation/grformat/annotations/train_parts.json
Saving json: /data/dataset/car-parts-segmentation/grformat/annotations/train_wholes.json


### Select 20% random images and save as val data


In [127]:
if dsettype == "train":
    coco = CocoDataset(wholepath, newpath, from_gt=True)

    inforecord = copy.deepcopy(coco.gt.dataset["info"])
    licrecords = copy.deepcopy(coco.gt.dataset["licenses"])
    catrecords = copy.deepcopy(coco.gt.dataset["categories"])

loading annotations into memory...
Loading json: /data/dataset/car-parts-segmentation/grformat/annotations/train_wholes.json
Done 0.02s
creating index...
index created!
loading annotations into memory...
Loading json: /data/dataset/car-parts-segmentation/grformat/annotations/train_parts.json
Done 0.06s
creating index...
index created!


In [128]:
if dsettype == "train":
    imgids = coco.imgids
    imgids_train, imgids_val = train_test_split(imgids, test_size=0.2)
    imgrecords = copy.deepcopy(coco.load_imginfos(imgids_val))

    #Change train to val in coco_url
    imgmap = {}
    for id_, imgrecord in enumerate(imgrecords, 1):
        imgrecord["coco_url"] = f"val/{imgrecord['file_name']}"
        imgmap[imgrecord["id"]] = id_
        imgrecord["id"] = id_

    annids = coco.get_annids(imgids=imgids_val)
    wholes_annrecords = copy.deepcopy(coco.load_anninfos(annids))
    for id_, annrecord in enumerate(wholes_annrecords, 1):
        annrecord["id"] = id_
        annrecord["image_id"] = imgmap[annrecord["image_id"]]

    wholesdset = ann.build_dset_info(inforecord, licrecords, catrecords, imgrecords, wholes_annrecords)
    valwholepath = wholepath.with_name(f"val_wholes.json")
    save_cocodict(wholesdset, valwholepath)

    annids = coco.get_annids(imgids=imgids_val, from_gt=False)
    parts_annrecords = coco.load_anninfos(annids, from_gt=False)
    for id_, annrecord in enumerate(parts_annrecords, 1):
        annrecord["id"] = id_
        annrecord["image_id"] = imgmap[annrecord["image_id"]]

    partsdset = ann.build_dset_info(inforecord, licrecords, catrecords, imgrecords, parts_annrecords)
    valpartspath = wholepath.with_name(f"val_parts.json")
    save_cocodict(partsdset, valpartspath)

Saving json: /data/dataset/car-parts-segmentation/grformat/annotations/val_wholes.json
Saving json: /data/dataset/car-parts-segmentation/grformat/annotations/val_parts.json


### Save val images

In [129]:
if dsettype == "train":
    valpath = coco.root_path
    for imgrecord in imgrecords:
        src = coco.root_path / "train" / imgrecord["file_name"]
        dst = coco.root_path / imgrecord["coco_url"]
        shutil.copy(src, dst)