In [1]:
import os
import random
import shutil
import yaml

In [3]:
# Define paths
images_dir = "annotated_images/"  # Directory containing annotated images
labels_dir = "labels/"            # Directory containing label .txt files
train_images_dir = "dataset/images/train/"
val_images_dir = "dataset/images/val/"
train_labels_dir = "dataset/labels/train/"
val_labels_dir = "dataset/labels/val/"

In [4]:
# Create directories if they don't exist
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

In [5]:
all_images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
random.shuffle(all_images)

In [6]:
# Split into train and validation sets (80-20 split)
train_split = int(0.8 * len(all_images))
train_images = all_images[:train_split]
val_images = all_images[train_split:]

In [7]:
def copy_files(images, src_images_dir, src_labels_dir, dest_images_dir, dest_labels_dir):
    for image_name in images:
        # Copy image file
        src_image_path = os.path.join(src_images_dir, image_name)
        dest_image_path = os.path.join(dest_images_dir, image_name)
        shutil.copy(src_image_path, dest_image_path)
        
        # Copy label file
        label_name = os.path.splitext(image_name)[0] + ".txt"
        src_label_path = os.path.join(src_labels_dir, label_name)
        dest_label_path = os.path.join(dest_labels_dir, label_name)
        if os.path.exists(src_label_path):
            shutil.copy(src_label_path, dest_label_path)

In [8]:
# Copy training and validation images and labels
copy_files(train_images, images_dir, labels_dir, train_images_dir, train_labels_dir)
copy_files(val_images, images_dir, labels_dir, val_images_dir, val_labels_dir)

In [9]:
# Create the data.yaml file
data_config = {
    'train': os.path.abspath(train_images_dir),
    'val': os.path.abspath(val_images_dir),
    'nc': 1,  # Number of classes
    'names': ['face']  # Class name
}

In [10]:
# Write to data.yaml
with open("data.yaml", "w") as yaml_file:
    yaml.dump(data_config, yaml_file)

print("Dataset split and data.yaml file created successfully.")

Dataset split and data.yaml file created successfully.
