In [56]:
import os
from config.class_labels import class_labels
from config.image_dimensions import image_dimensions

In [57]:
# Path details
path_to_label_directory = "../data/labels/"
path_to_processed_label_directory = "../data/processed-labels/"

In [58]:
print(os.path.exists(path_to_label_directory))
print(os.path.exists(path_to_processed_label_directory))

True
True


In [59]:
def process_string_partition(partition, precision = 5) -> str:
    # Partition string into components by ' '
    string_content = partition.split(" ")

    # Check to see if label is valid
    if string_content[0] not in class_labels:
        return ""

    # Convert class label from string -> int
    class_label = class_labels[string_content[0]]

    # Fetch data components from input partition
    x_min = float(string_content[4])
    y_min = float(string_content[5])
    x_max = float(string_content[6])
    y_max = float(string_content[7])

    # Compute center of bounding box as ratio of image dimensions
    x_center = ((x_min + x_max) / 2) / image_dimensions["width"]
    y_center = ((y_min + y_max) / 2) / image_dimensions["height"]
    bounding_box_width = (x_max - x_min) / image_dimensions["width"]
    bounding_box_height = (y_max - y_min) / image_dimensions["height"]

    # Generate new string
    new_string_content = f"{class_label} {x_center:.{precision}f} {y_center:.{precision}f} {bounding_box_width:.{precision}f} {bounding_box_height:.{precision}f}\n"

    return new_string_content

In [60]:
# Used to termine how many files to load
# count = 0

# Sort and load files from label direcory
raw_files = sorted(os.listdir(path_to_label_directory))

# Loop over all files in labels folder
for raw_file in raw_files:

    # Used to only load x amount of files
    # if count == 5:
    #     break

    # Ensure that file is .txt
    if raw_file.endswith(".txt"):

        # Create path to file
        filepath = os.path.join(path_to_label_directory, raw_file)

        # Open file
        with open(filepath, "r") as file:

            # Open file
            file_content = file.read()

            # Split into array(s)
            file_content = file_content.split("\n")

            aggregate_strings = ""
            for string in file_content:

                # Skip empty strings
                if string == "":
                    continue

                # Attempt to refactor string
                try:
                    # Add processed string to string aggregator
                    aggregate_strings += process_string_partition(string)
                except Exception as e:
                    print(e)

        # Define path for new processed file
        processed_filepath = os.path.join(path_to_processed_label_directory, raw_file)

        # Write aggregated strings to the new file
        with open(processed_filepath, "w") as processed_file:
            processed_file.write(aggregate_strings)

        # Increments number of files loaded
        # count += 1