In [3]:
import os
import shutil
from glob import glob
import numpy as np
import random

In [2]:
def delete_files_with_prefix(directory, prefix):
    """
    Deletes all files in the specified directory that start with the given prefix.
    
    Parameters:
    - directory: Path to the directory where files should be deleted.
    - prefix: The prefix to match for file deletion.
    
    Returns:
    - A list of deleted files.
    """
    deleted_files = []
    # Check if the directory exists
    if not os.path.isdir(directory):
        print(f"The directory {directory} does not exist.")
        return deleted_files
    
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Check if the filename starts with the given prefix
        if filename.startswith(prefix):
            # Construct full file path
            file_path = os.path.join(directory, filename)
            # Delete the file
            os.remove(file_path)
            deleted_files.append(filename)
    
    return deleted_files

# delete_files_with_prefix('../data/labels', 'screenshot3')
# delete_files_with_prefix('../data/images', 'screenshot3')
# delete_files_with_prefix('../data/labels', 'screenshot5')
# delete_files_with_prefix('../data/images', 'screenshot5')
# delete_files_with_prefix('../data/labels', 'screenshot8')
# delete_files_with_prefix('../data/images', 'screenshot8')
# delete_files_with_prefix('../SLONI_label', 'screenshot3')
# delete_files_with_prefix('../SLONI_label', 'screenshot5')
# delete_files_with_prefix('../SLONI_label', 'screenshot8')


['screenshot8_11_02__16_45.png',
 'screenshot8_12_02__07_00.png',
 'screenshot8_12_02__11_15.png',
 'screenshot8_08_02__07_45.png',
 'screenshot8_12_02__08_45.png',
 'screenshot8_11_02__09_00.png',
 'screenshot8_11_02__12_30.png',
 'screenshot8_08_02__09_45.png',
 'screenshot8_11_02__14_45.png',
 'screenshot8_08_02__11_30.png',
 'screenshot8_10_02__09_00.png',
 'screenshot8_08_02__13_00.png',
 'screenshot8_08_02__10_00.png',
 'screenshot8_10_02__08_30.png',
 'screenshot8_08_02__06_30.png',
 'screenshot8_09_02__10_15.png',
 'screenshot8_09_02__09_00.png',
 'screenshot8_09_02__09_30.png',
 'screenshot8_10_02__07_15.png',
 'screenshot8_11_02__10_00.png',
 'screenshot8_11_02__15_30.png',
 'screenshot8_09_02__09_45.png',
 'screenshot8_11_02__13_30.png',
 'screenshot8_10_02__08_45.png',
 'screenshot8_11_02__07_15.png',
 'screenshot8_11_02__17_15.png',
 'screenshot8_07_02__06_30.png',
 'screenshot8_11_02__13_00.png',
 'screenshot8_10_02__06_45.png',
 'screenshot8_12_02__09_15.png',
 'screensh

In [6]:
PATH_DIR = '../data'
TARGET = '../target'
RATIOS = [0.6, 0.2, 0.2]
IMAGE_EXTENSION = 'png'
LABEL_EXTENSION = 'txt'
def data_split(path_dir, ratios, target):
    image_dir = os.path.join(path_dir, 'images')
    label_dir = os.path.join(path_dir, 'labels')

    # List all image files, assuming JPEG format for images
    image_files = [os.path.splitext(f)[0] for f in os.listdir(image_dir)]

    np.random.shuffle(image_files)

    train_split_idx = int(ratios[0] * len(image_files))
    val_split_idx = int((ratios[0]+ratios[1]) * len(image_files))

    # Split the filenames
    train_files = image_files[:train_split_idx]
    val_files = image_files[train_split_idx:val_split_idx]
    test_files = image_files[val_split_idx:]

    def copy_files(file_list, image_dir, label_dir, target_image_dir, target_label_dir):
        for filename in file_list:
            image_filename = f"{filename}.{IMAGE_EXTENSION}"
            label_filename = f"{filename}.{LABEL_EXTENSION}"

            image_path = os.path.join(image_dir, image_filename)
            label_path = os.path.join(label_dir, label_filename)
            image_target_path = os.path.join(target_image_dir, image_filename)
            label_target_path = os.path.join(target_label_dir, label_filename)
            if os.path.exists(label_path):
                shutil.copy(image_path, image_target_path)
                shutil.copy(label_path, label_target_path)
            else:
                if random.random() < 0.2:
                    shutil.copy(image_path, image_target_path)

                
    # Directories for the split datasets
    train_img_dir = os.path.join(target, 'images/train')
    train_label_dir = os.path.join(target, 'labels/train')
    val_img_dir = os.path.join(target, 'images/val')
    val_label_dir = os.path.join(target, 'labels/val')
    test_img_dir = os.path.join(target, 'images/test')
    test_label_dir = os.path.join(target, 'labels/test')

    if os.path.exists(target):
        shutil.rmtree(target)
    directories = [train_img_dir, train_label_dir, val_img_dir, val_label_dir, test_img_dir, test_label_dir]

    for dir in directories:
        os.makedirs(dir, exist_ok=False)


    # Copy files for each set
    copy_files(train_files, image_dir, label_dir, train_img_dir, train_label_dir)
    copy_files(val_files, image_dir, label_dir, val_img_dir, val_label_dir)
    copy_files(test_files, image_dir, label_dir, test_img_dir, test_label_dir)

data_split(PATH_DIR, RATIOS, TARGET)

# more data

In [8]:
PATH_DIR = '../data2'
TARGET = '../target'
RATIOS = [0.6, 0.2, 0.2]
IMAGE_EXTENSION = 'png'
LABEL_EXTENSION = 'txt'
def data_split(path_dir, ratios, target):
    image_dir = os.path.join(path_dir, 'images')
    label_dir = os.path.join(path_dir, 'labels')

    # List all image files, assuming JPEG format for images
    image_files = [os.path.splitext(f)[0] for f in os.listdir(image_dir)]

    np.random.shuffle(image_files)

    train_split_idx = int(ratios[0] * len(image_files))
    val_split_idx = int((ratios[0]+ratios[1]) * len(image_files))

    # Split the filenames
    train_files = image_files[:train_split_idx]
    val_files = image_files[train_split_idx:val_split_idx]
    test_files = image_files[val_split_idx:]

    def copy_files(file_list, image_dir, label_dir, target_image_dir, target_label_dir):
        for filename in file_list:
            image_filename = f"{filename}.{IMAGE_EXTENSION}"
            label_filename = f"{filename}.{LABEL_EXTENSION}"

            image_path = os.path.join(image_dir, image_filename)
            label_path = os.path.join(label_dir, label_filename)
            image_target_path = os.path.join(target_image_dir, image_filename)
            label_target_path = os.path.join(target_label_dir, label_filename)
            if os.path.exists(label_path):
                shutil.copy(image_path, image_target_path)
                shutil.copy(label_path, label_target_path)
            # else:
            #     if random.random() < 0.2:
            #         shutil.copy(image_path, image_target_path)

                
    # Directories for the split datasets
    train_img_dir = os.path.join(target, 'images/train')
    train_label_dir = os.path.join(target, 'labels/train')
    val_img_dir = os.path.join(target, 'images/val')
    val_label_dir = os.path.join(target, 'labels/val')
    test_img_dir = os.path.join(target, 'images/test')
    test_label_dir = os.path.join(target, 'labels/test')

    # if os.path.exists(target):
    #     shutil.rmtree(target)
    directories = [train_img_dir, train_label_dir, val_img_dir, val_label_dir, test_img_dir, test_label_dir]

    for dir in directories:
        os.makedirs(dir, exist_ok=True)


    # Copy files for each set
    copy_files(train_files, image_dir, label_dir, train_img_dir, train_label_dir)
    copy_files(val_files, image_dir, label_dir, val_img_dir, val_label_dir)
    copy_files(test_files, image_dir, label_dir, test_img_dir, test_label_dir)

data_split(PATH_DIR, RATIOS, TARGET)