In [26]:
import os
import random
import shutil
import json
from ultralytics import YOLO

In [27]:
def copy_files(files, source_dir, labels_dir, output_dir, lst_names_file):
    lst_with_names = []
    with open(lst_names_file, 'w') as list_file:
        for file_name in files:
            base_name, ext = os.path.splitext(file_name)
            source_image_path = os.path.join(source_dir, file_name) #path to each image in source_dir
            label_file = os.path.join(labels_dir, base_name + '.txt') #path to each label in label_dir
            output_image_path = os.path.join(output_dir, 'images', file_name) #path where the image will be copied
            output_label_path = os.path.join(output_dir, 'labels', base_name + '.txt') #path where the labels will be copied
            
            #Copy image file
            if not os.path.exists(output_image_path):
                shutil.copy(source_image_path, output_image_path)
                print(f'Copied {file_name} to {output_image_path}')
            else:
                print(f'File {file_name} already exists in {output_image_path}')
            
            # Copy label file
            if os.path.exists(label_file):
                if not os.path.exists(output_label_path):
                    shutil.copy(label_file, output_label_path)
                    print(f'Copied {base_name}.txt to {output_label_path}')
                else:
                    print(f'Label file {base_name}.txt already exists in {output_label_path}')
            lst_with_names.append(file_name)
            list_file.write(file_name + '\n')


In [28]:
def split_train_val(source_dir, train_dir, val_dir, labels_dir, val_ratio=0.4):
    # Creating directories
    os.makedirs(os.path.join(train_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(train_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(val_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(val_dir, 'images'), exist_ok=True)
 
    all_files =os.listdir(source_dir)
    image_files = [f for f in all_files if os.path.isfile(os.path.join(source_dir, f))]
    random.shuffle(image_files)
    
    split_index = int(len(image_files) * (1 - val_ratio))
    train = image_files[:split_index]
    print(f"Numb of images in training set: {len(train)}")
    val = image_files[split_index:]
    print(f"Numb of images in validation set: {len(val)}")

    # Copy training and validation files
    copy_files(train, source_dir, labels_dir, train_dir, os.path.join(train_dir, 'train.txt'))
    copy_files(val, source_dir, labels_dir, val_dir, os.path.join(val_dir, 'val.txt'))
    print(f"Copied {len(train)} files to {train_dir}")
    print(f"Copied {len(val)} files to {val_dir}")
    return 

# Example usage
source_dir = '/data/datasets/model_validation/val_yolo/images'
labels_dir = '/data/datasets/model_validation/val_yolo/labels'
split_train_val(source_dir, 'train_dir', 'val_dir', labels_dir)


Numb of images in training set: 1808
Numb of images in validation set: 1206
Copied frame_001181.PNG to train_dir/images/frame_001181.PNG
Copied frame_001181.txt to train_dir/labels/frame_001181.txt
Copied frame_000073.PNG to train_dir/images/frame_000073.PNG
Copied frame_000073.txt to train_dir/labels/frame_000073.txt
Copied frame_000425.PNG to train_dir/images/frame_000425.PNG
Copied frame_000425.txt to train_dir/labels/frame_000425.txt
Copied frame_002693.PNG to train_dir/images/frame_002693.PNG
Copied frame_002693.txt to train_dir/labels/frame_002693.txt
Copied frame_002775.PNG to train_dir/images/frame_002775.PNG
Copied frame_002775.txt to train_dir/labels/frame_002775.txt
Copied frame_000790.PNG to train_dir/images/frame_000790.PNG
Copied frame_000790.txt to train_dir/labels/frame_000790.txt
Copied frame_002403.PNG to train_dir/images/frame_002403.PNG
Copied frame_002403.txt to train_dir/labels/frame_002403.txt
Copied frame_000564.PNG to train_dir/images/frame_000564.PNG
Copied fr