In [None]:
#------------------------------
# imports
#------------------------------
import os
import json
from tqdm.auto import tqdm
from glob import glob
from shutil import copy
#------------------------------
# GLOBALS
#------------------------------
train_f = "/kaggle/input/badlad/badlad-train-coco.json"
test_f  = "/kaggle/input/badlad/badlad-test-coco.json"

with open(train_f) as json_data:
    train_json = json.load(json_data)
with open(test_f) as json_data:
    test_json = json.load(json_data)

IMG_PATHS=[_path for _path in tqdm(glob(os.path.join("/kaggle/input/badlad/","*/*/*.png")))]
IMG_IDENS=[os.path.basename(_path) for _path in tqdm(IMG_PATHS)]

#------------------------------
# helper functions
#------------------------------
def create_dir(base,ext):
    '''
        creates a folder at {base} with {ext} name if it does not exist at {base}
        args:
            base: the path to create the directory
            ext : the name of the directory to create
        returns:
            _path: the created directory path as string
    '''
    # forming the path
    _path=os.path.join(base,ext)
    # checking the existance
    if not os.path.exists(_path):
        # creating the path
        os.mkdir(_path)
    return _path
#------------------------------
def get_img_path_from_filename(filename):
    '''
        returns the image path from a given filename
        args:
            filename : the name of the image file
        returns:
            _path: the path of the image file
    '''
    # index from global IDEN list
    idx=IMG_IDENS.index(filename)
    # image path from global path list
    _path=IMG_PATHS[idx]
    return _path
#------------------------------
def save_data(idx,data_json,gt_path,img_path):
    '''
        - creates a ground truth .txt file data in yolov8-seg format from a provided COCO format JSON data
        - copy's the image to proper location
        
        args:
            idx      : the index of the data to use from data_json
            data_json: the COCO format annotation based json dictionary
            gt_path  : the path to save the .txt file
            img_path : the path to copy the image file
    '''
    image_id = data_json['images'][idx]['id']
    filename = data_json['images'][idx]['file_name']
    height   = data_json['images'][idx]['height']
    width    = data_json['images'][idx]['width']
    
    # copy data
    src_img=get_img_path_from_filename(filename)
    dst_img=os.path.join(img_path,filename)
    copy(src_img,dst_img)
    
    # ground truth write to file
    gt_txt=os.path.join(gt_path,filename.split(".")[0]+".txt")
    with open(gt_txt,"a+") as f:
        for polygon in data_json['annotations']:
            if polygon['image_id'] == image_id:
                # format-- <label> <x1> <y1> <x2> <y2>........
                cat=polygon['category_id']
                # using polygons for segmentation
                data=[val for val in polygon['segmentation'][0]]
                # normalize segment
                for idx,val in enumerate(data):
                    if idx%2==0:
                        data[idx]=round(val/width,3)
                    else:
                        data[idx]=round(val/height,3)
                line=" ".join([str(cat)]+[str(v) for v in data])
                f.write(f"{line}\n")

In [None]:
#------------------------------
# structure--yolo-seg
#------------------------------
ds_dir=create_dir(os.getcwd(),"badlad-yolo-seg")
#------------------------------
train_dir=create_dir(ds_dir,"train")
train_img_dir=create_dir(train_dir,"images")
train_lbl_dir=create_dir(train_dir,"labels")
#-------------------------------
val_dir=create_dir(ds_dir,"val")
val_img_dir=create_dir(val_dir,"images")
val_lbl_dir=create_dir(val_dir,"labels")
#-----------------train data-----------------------------
for idx in tqdm(range(len(train_json['images']))):
    save_data(idx=idx,
              data_json=train_json,
              gt_path=train_lbl_dir,
              img_path=train_img_dir,
              use_poly=True)
#-----------------val data-----------------------------
for idx in tqdm(range(len(test_json['images']))):
    save_data(idx=idx,
              data_json=test_json,
              gt_path=val_lbl_dir,
              img_path=val_img_dir,
              use_poly=True)
    

# GLOBAL CATEGORY LISTING

In [None]:
train_json["categories"]

In [None]:
test_json["categories"]