In [28]:
import os
import pandas as pd
import ast

In [29]:
# Define the paths to images and labels folders
PATH = "../"

images_dir = PATH + 'Dataset/images'
labels_dir = PATH + 'Dataset/labels'
csv_path = PATH + 'Data/CSVs/dataset_caltech_yolo.csv'

In [30]:
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_path)

In [13]:
# Create the folder structure in 'labels'
for _, row in df.iterrows():
    # Generate the path for the label folder corresponding to the image
    label_path = os.path.join(labels_dir, os.path.dirname(row['path']))
    os.makedirs(label_path, exist_ok=True)
    
    # Convert the bounding box string to a list of lists
    bounding_boxes = ast.literal_eval(row['bounding_boxes'])
    
    if bounding_boxes:
        # Generate the name for the .txt file based on the image name
        txt_filename = os.path.splitext(os.path.basename(row['path']))[0] + '.txt'
        txt_path = os.path.join(label_path, txt_filename)
        
        # Write each bounding box as a separate line in the .txt file
        with open(txt_path, 'w') as txt_file:
            for bbox in bounding_boxes:
                txt_file.write(" ".join(str(item) for item in bbox) + '\n')


# Test (by ChatGPT)

In [42]:
# Initialize counters for successful and failed tests
success_count = 0
fail_count = 0

# Iterate through each row in the CSV and perform tests
for _, row in df.iterrows():
    image_path = os.path.join(images_dir, row['path'])
    label_path = os.path.join(labels_dir, os.path.dirname(row['path']))
    txt_filename = os.path.splitext(os.path.basename(row['path']))[0] + '.txt'
    txt_path = os.path.join(label_path, txt_filename)
    
    # Test: Check if image exists in the images directory
    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}")
        fail_count += 1
    
    # Test: Check if label directory exists
    if not os.path.exists(label_path):
        print(f"Label directory not found: {label_path}")
        fail_count += 1
    else:
        # Test: Check if image category is not 'empty'
        if row['class'] != 'empty':
            # Test: Check if TXT file exists
            if os.path.exists(txt_path):
                with open(txt_path, 'r') as txt_file:
                    expected_boxes = row['bounding_boxes'].strip("[]").split("],")
                    expected_boxes = [box.strip().replace('[', '').replace(']', '').replace(",","") for box in expected_boxes]
                    actual_boxes = [line.strip() for line in txt_file.readlines()]
                    
                    if expected_boxes == actual_boxes:
                        success_count += 1
                    else:
                        print(f"Bounding boxes mismatch for: {txt_path}")
                        fail_count += 1
            else:
                print(f"TXT file not found: {txt_path}")
                fail_count += 1
        else:
            success_count+=1

# Print test results
print(f"Tests completed. Total: {success_count + fail_count}, Successful: {success_count}, Failed: {fail_count}")

Tests completed. Total: 25292, Successful: 25292, Failed: 0
