In [12]:
import json
from pprint import pprint
import pandas as pd
import os
import ast
import shutil

In [95]:
def reformat_labels(raw_data, name, path="images_2.0"):
    valid_df = pd.DataFrame([{key: item[key] for key in item if key in ['category_id', 'id', 'image_id', 'iscrowd', 'segmentation', 'area']} for item in raw_data["annotations"]])
    category_df = pd.DataFrame([{key:item[key] for key in item if key in ['id', 'name', 'name_readable']} for item in raw_data["categories"]])
    image_df = pd.DataFrame(raw_data["images"])

    category_df = category_df.sort_values(by="name").reset_index(drop=True).drop_duplicates()
    category_df["labelId"] = category_df.index

    if not os.path.exists(f"{path}/category_info.csv"):
        category_df.to_csv(f"{path}/category_info.csv", index=False)

    temp = pd.merge(left=valid_df, right=category_df, how="left", right_on="id", left_on="category_id").drop("id_y", axis=1)
    final_df = pd.merge(left=temp, right=image_df, how="left", right_on="id", left_on="image_id").drop("id", axis=1)
    final_df.columns = ["id"]+list(final_df.columns[1:])

    final_df = final_df.explode('segmentation').reset_index(drop=True)

    final_df["scaled_segmentation"] = [
        [
            final_df.loc[i, "segmentation"][j] / final_df.loc[i, "width"]
            if j % 2 == 0
            else final_df.loc[i, "segmentation"][j] / final_df.loc[i, "height"]
            for j in range(len(final_df.loc[i, "segmentation"]))
        ]
        for i in range(final_df.shape[0])
    ]
    final_df.to_csv(os.path.join(path,name), index=False)

    

In [96]:
with open("images_2.0/train_annotations.json", "r") as file:
    train_raw = json.load(file)

with open("images_2.0/validation_annotations.json", "r") as file:
    valid_raw = json.load(file)

reformat_labels(train_raw, "train_annotations.csv")
reformat_labels(valid_raw, "validation_annotations.csv")

In [115]:
def write_yaml(category_df):
    category_names = category_df.apply(lambda row: f"  {row['labelId']}: {row['name']}", axis=1)
    yaml = """path:xxxxxxxx
train:xxxxxxx
val:xxxxxxxxx

names:
{}""".format('\n'.join(list(category_names)))
    return yaml



In [114]:
category_df = pd.read_csv("images_2.0/category_info.csv")
with open("images_2.0/data_info.yaml", "w") as f:
    f.write(write_yaml(category_df))

In [153]:
def create_YOLO_labels(total_df, path):
    if not os.path.isdir(path):
        os.mkdir(path)
    unique_imgs = list(total_df["file_name"].drop_duplicates())
    for img in unique_imgs:
        data = total_df.loc[total_df["file_name"] == img, ["labelId", "scaled_segmentation"]]
        string = "\n".join(data.apply(lambda row: f"{row['labelId']} {' '.join(map(str,row['scaled_segmentation']))}", axis=1))
        filename = os.path.join(path, img.replace("jpg", "txt"))
        print("Writing to",filename)
        with open(filename, "w") as f:
            f.write(string)

In [154]:
train_df = pd.read_csv("images_2.0/train_annotations.csv",
                       converters={'scaled_segmentation': ast.literal_eval})

validation_df = pd.read_csv("images_2.0/validation_annotations.csv",
                       converters={'scaled_segmentation': ast.literal_eval})

create_YOLO_labels(train_df, "images_2.0/train/labels")
create_YOLO_labels(validation_df, "images_2.0/validation/labels")

Writing to images_2.0/train/labels/131094.txt
Writing to images_2.0/train/labels/131097.txt
Writing to images_2.0/train/labels/131098.txt
Writing to images_2.0/train/labels/131100.txt
Writing to images_2.0/train/labels/131101.txt
Writing to images_2.0/train/labels/131110.txt
Writing to images_2.0/train/labels/131148.txt
Writing to images_2.0/train/labels/131152.txt
Writing to images_2.0/train/labels/131154.txt
Writing to images_2.0/train/labels/131160.txt
Writing to images_2.0/train/labels/131177.txt
Writing to images_2.0/train/labels/131181.txt
Writing to images_2.0/train/labels/131201.txt
Writing to images_2.0/train/labels/131207.txt
Writing to images_2.0/train/labels/131212.txt
Writing to images_2.0/train/labels/131216.txt
Writing to images_2.0/train/labels/131226.txt
Writing to images_2.0/train/labels/131229.txt
Writing to images_2.0/train/labels/131257.txt
Writing to images_2.0/train/labels/131268.txt
Writing to images_2.0/train/labels/131273.txt
Writing to images_2.0/train/labels

In [8]:
import random

train_files = os.listdir("images_2.0/train/labels")
tiny_train_files = random.sample(train_files,k=round(0.1*len(train_files)))

In [14]:
if not os.path.isdir("images_2.0/tiny_train"):
    os.mkdir("images_2.0/tinytrain")
if not os.path.isdir("images_2.0/tinytrain/images"):
    os.mkdir("images_2.0/tinytrain/images")
if not os.path.isdir("images_2.0/tinytrain/labels"):
    os.mkdir("images_2.0/tinytrain/labels")

tiny_train_names = list(map(lambda x: x[:-4], tiny_train_files))
base = "images_2.0"
for name in tiny_train_names:
    shutil.copy2(
        os.path.join(base,"train/images",f"{name}.jpg"), 
        os.path.join(base,"tinytrain/images",f"{name}.jpg")
    )
    shutil.copy2(
        os.path.join(base,"train/labels",f"{name}.txt"), 
        os.path.join(base,"tinytrain/labels",f"{name}.txt")
    )
