In [16]:
import os
import shutil
import random
import pandas as pd
from sklearn.model_selection import train_test_split

# Define the paths to the dataset and the annotations file
annotations_file = '../../dataset/bounding/annotations.csv'
train_dir = '../../dataset/bounding/split/train/'
test_dir = '../../dataset/bounding/split/test/'
val_dir = '../../dataset/bounding/split/validation/'

# Create the directories if they don't already exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [17]:
# Read the annotations file
df = pd.read_csv(annotations_file)

image_files = df['filename'].unique()
train_images, temp_images = train_test_split(image_files, test_size=0.2, random_state=42)
val_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

In [18]:
# Function to save the labels in YOLO format on their respective directories
def save_yolo_format(df, image_list, dest_dir):
    for image in image_list:
        annotations = df[df['filename'] == image]
        txt_filename = os.path.join(dest_dir, os.path.splitext(image)[0] + '.txt')
        
        with open(txt_filename, 'w') as f:
            for _, row in annotations.iterrows():
                class_id = 0
                x_center = (row['xmin'] + row['xmax']) / 2 / row['width']
                y_center = (row['ymin'] + row['ymax']) / 2 / row['height']
                width = (row['xmax'] - row['xmin']) / row['width']
                height = (row['ymax'] - row['ymin']) / row['height']
                
                f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

In [21]:
# Save annotations on their respective directories
save_yolo_format(df, train_images, train_dir)
save_yolo_format(df, test_images, test_dir)
save_yolo_format(df, val_images, val_dir)

# Print the number of images saved in YOLO format
print(f"Number of train images: {len(train_images)}")
print(f"Number of test images: {len(test_images)}")
print(f"Number of validation images: {len(val_images)}")

Number of train images: 2010
Number of test images: 252
Number of validation images: 251
