In [1]:
import json
import yaml
import os
import shutil
from tqdm.auto import tqdm
import pandas as pd
from glob import glob
import bkh_pytorch_utils as bpu

In [2]:
CLASSNAMES = ["marker"]
ANNOTATION_DATA_PATH = "../data/chexpert_data"
YOLO_DATA_PATH = "../data/yolo_data_finetuning/"

In [17]:
# emptying the yolo data folder
for file in glob(f"{YOLO_DATA_PATH}/*/*/*.*"):
    os.remove(file)

In [3]:
all_imgages = glob(f"{ANNOTATION_DATA_PATH}/images/*.jpg")
all_imgages.sort()

In [4]:
all_imgages_df = pd.DataFrame({"ImagePath": all_imgages})
all_imgages_df.head()

Unnamed: 0,ImagePath
0,/research/projects/m253231_Bardia/Current/05__...
1,/research/projects/m253231_Bardia/Current/05__...
2,/research/projects/m253231_Bardia/Current/05__...
3,/research/projects/m253231_Bardia/Current/05__...
4,/research/projects/m253231_Bardia/Current/05__...


In [7]:
for i in tqdm(range(len(all_imgages_df))):
    row = all_imgages_df.iloc[i]
    image_path = row["ImagePath"]
    filename = image_path.split("/")[-1][:-4]
        
    split = "train"
    
    annotation_label_path = f"{ANNOTATION_DATA_PATH}/labels/{filename}.json"
    if os.path.exists(annotation_label_path):
        annotation_label = json.load(open(annotation_label_path))
        image_height = annotation_label['imageHeight']
        image_weight = annotation_label['imageWidth']

        # getting the annotations in yolo format
        label_lines = []
        for bbox in annotation_label['shapes']:
            if bbox['shape_type'] == 'rectangle':
                class_idx = 0               # We have only one class
                x_min = bbox['points'][0][0]
                y_min = bbox['points'][0][1]
                x_max = bbox['points'][1][0]
                y_max = bbox['points'][1][1]
                x_center = ((x_min + x_max) / 2) / image_weight
                y_center = ((y_min + y_max) / 2) / image_height
                width = abs(x_max - x_min) / image_weight
                height = abs(y_max - y_min) / image_height

                # converting coordinates to string and joining them with " "
                label_line = " ".join([str(x) for x in [class_idx, x_center, y_center, width, height]])
                label_lines.append(label_line)
        
        # writing the labels to the file with 50x upsampling (only for fietuning)
        for k in range(50):
            with open(f"{YOLO_DATA_PATH}/{split}/labels/{filename}_{k}.txt", "w") as f:
                f.write("\n".join(label_lines))

    # copying the image 50x times to the yolo training directory (only for fietuning)
    for k in range(50):
        shutil.copy(image_path, f"{YOLO_DATA_PATH}/{split}/images/{filename}_{k}.png")


  0%|          | 0/20 [00:00<?, ?it/s]

In [22]:
# creating the training yaml file
data = {
    "train": f"{YOLO_DATA_PATH}/train",
    "val": f"{YOLO_DATA_PATH}/train",
    "nc": len(CLASSNAMES),
    "names": CLASSNAMES
}

with open(f'{YOLO_DATA_PATH}/data.yaml', 'w') as f:
    yaml.dump(data, f, default_flow_style=False)