In [5]:
from matplotlib import pyplot as plt
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
from upolygon import find_contours
from PIL import Image
import numpy as np
import yaml
import os

In [6]:
modes = ["train", "val", "test"]

# delete converted folder if it exists
if os.path.exists("./dataset"):
    os.system("rm -rf ./dataset")

# create converted folder
os.makedirs("./dataset")
os.makedirs("./dataset/images")
os.makedirs("./dataset/labels")

# read classes from /teamspace/studios/this_studio/endoscapes/seg_label_map.txt
with open("/teamspace/studios/this_studio/endoscapes/seg_label_map.txt") as f:
    classes = f.readlines()
    classes = [c.strip() for c in classes]
    classes = {i: class_name for i, class_name in enumerate(classes)}

dataset = {
    "names": classes
}

for mode in modes:
    print("Processing", mode)
    os.makedirs(f"./dataset/images/{mode}")
    os.makedirs(f"./dataset/labels/{mode}")
    coco = COCO(f"/teamspace/studios/this_studio/endoscapes/{mode}_seg/annotation_coco.json")
    dataset[mode] = f"images/{mode}"
    print("images len:", len(coco.imgs))

    cat_ids = coco.getCatIds()

    for img_id in coco.imgs:
        img = coco.imgs[img_id]
        img_name = img['file_name']
        txt_name = img_name.replace(".jpg", ".txt")

        anns_ids = coco.getAnnIds(imgIds=img['id'], catIds=cat_ids, iscrowd=None)
        anns = coco.loadAnns(anns_ids)

        # Symlink image in images folder
        os.symlink(f"/teamspace/studios/this_studio/endoscapes/{mode}_seg/{img_name}", f"./dataset/images/{mode}/{img_name}")

        with open(f"./dataset/labels/{mode}/{txt_name}", "w") as f:
            for ann in anns:
                rle = [ann['segmentation']]
                mask = maskUtils.decode(rle)
                img_size = ann['segmentation']['size']  # [height, width]

                _labels, external, _internal = find_contours(mask[:, :, 0])
                path = []
                for external_path in external:
                    # skip paths with less than 2 points
                    if len(external_path) // 2 <= 2:
                        continue
                    points = iter(external_path)
                    while True:
                        try:
                            x, y = next(points), next(points)
                            path.append({"x": x / img_size[1], "y": y / img_size[0]})
                        except StopIteration:
                            break

                path_str = ' '.join(f"{p['x']} {p['y']}" for p in path)
                f.write(f"{ann['category_id']} {path_str}\n")

# dump dataset object in dataset/dataset.yaml
with open("./dataset/dataset.yaml", "w") as f:
    yaml.dump(dataset, f)

Processing train
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
images len: 343
Processing val
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images len: 76
Processing test
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images len: 74


In [7]:
!zip -r dataset.zip dataset/

  adding: dataset/ (stored 0%)
  adding: dataset/labels/ (stored 0%)
  adding: dataset/labels/val/ (stored 0%)
  adding: dataset/labels/val/153_22200.txt (deflated 85%)
  adding: dataset/labels/val/126_11550.txt (deflated 86%)
  adding: dataset/labels/val/142_34000.txt (deflated 86%)
  adding: dataset/labels/val/131_42625.txt (deflated 85%)
  adding: dataset/labels/val/126_13050.txt (deflated 86%)
  adding: dataset/labels/val/131_44875.txt (deflated 85%)
  adding: dataset/labels/val/153_25950.txt (deflated 85%)
  adding: dataset/labels/val/159_58550.txt (deflated 86%)
  adding: dataset/labels/val/153_20700.txt (stored 0%)
  adding: dataset/labels/val/142_34750.txt (deflated 86%)
  adding: dataset/labels/val/137_7875.txt (deflated 86%)
  adding: dataset/labels/val/153_32700.txt (deflated 84%)
  adding: dataset/labels/val/146_12525.txt (deflated 86%)
  adding: dataset/labels/val/131_47875.txt (deflated 85%)
  adding: dataset/labels/val/131_49375.txt (deflated 84%)
  adding: dataset/label