In [None]:
# Function imports
from utils.preprocessing import convert_to_yolo_format, get_image_size
import os
import shutil
import tqdm 

# Setting core variables
# Folder building
root_data_folder = "data/train_data"
output_folder = "data/data_yolo"

# Data split source
train_ids_path = 'data/train.txt'
val_ids_path = 'data/validation.txt'

In [None]:
# Define folder structure
main_folder_path = os.path.join(root_data_folder, output_folder)

# Define subfolder paths
label_folder_path = os.path.join(main_folder_path, "labels")
labeltrain_folder_path = os.path.join(label_folder_path, "train")
labelval_folder_path = os.path.join(label_folder_path, "val")

image_folder_path = os.path.join(main_folder_path, "images")
imagetrain_folder_path = os.path.join(image_folder_path, "train")
imageval_folder_path = os.path.join(image_folder_path, "val")

# List of all folder paths to create
folders_to_create = [
    main_folder_path,
    label_folder_path,
    labeltrain_folder_path,
    labelval_folder_path,
    image_folder_path,
    imagetrain_folder_path,
    imageval_folder_path,
]

# Create folders only if they don't exist yet
for folder in folders_to_create:
    os.makedirs(folder, exist_ok=True)

In [None]:
# Load the train, val lists of ids

# Train
with open(train_ids_path, 'r') as file:
    train_ids = [line.strip() for line in file]

# Val
with open(val_ids_path, 'r') as file:
    val_ids = [line.strip() for line in file]


In [None]:
# Function for reorganizing the data into yolo format with yolo applicable labels

def data_yolo_reoorganize(directories, label_folder_path, image_folder_path, root_data_folder, option_image=True):
    # Initialize tqdm with the number of directories
    progress_bar = tqdm.tqdm(total=len(directories), desc='Processing Folders')

    # Iterate through folders to reformat data into YOLO format
    for file_id in directories:
        
        # Get json and image paths
        json_path = os.path.join(root_data_folder , f"{file_id}.json")
        image_path = os.path.join(root_data_folder , f"{file_id}.jpg")
        
        if json_path is not None and image_path is not None:
            im_width, im_height = get_image_size(image_path)
            # Call convert_to_yolo_format function with the JSON and image paths
            yolo_lines = convert_to_yolo_format(json_path,im_width, im_height)
        
            # Write to label file with the folder name
            label_filename = f"{file_id}.txt"
            label_file_path = os.path.join(label_folder_path, label_filename)
            with open(label_file_path, "w") as f:
                f.writelines(yolo_lines)

            if option_image == True:
                # Copy the image to the image folder
                image_filename = f"{file_id}.jpg"  # Assuming the image is always saved as a JPG
                image_output_file_path = os.path.join(image_folder_path, image_filename)
                shutil.copy(image_path, image_output_file_path)
        
        # Update progress bar
        progress_bar.update(1)

    # Close progress bar
    progress_bar.close()

In [None]:
# Run the data reoorganizaiton and relabelling
data_yolo_reoorganize(train_ids, labeltrain_folder_path, imagetrain_folder_path, root_data_folder)
data_yolo_reoorganize(val_ids, labelval_folder_path, imageval_folder_path, root_data_folder)