In [41]:
# Run this code after the full set of images and labels are generated to split into train and validation sets
import os
from sklearn.model_selection import train_test_split
import shutil

In [42]:
def get_files_no_ext(directory):
    '''
    Get list of files in a directory, ignore extentions.
    Returns a list.
    '''
    try:
        # Initialize an empty set to store unique file names without extension
        unique_files_without_extension = set()

        # Recursively walk through the directory and its subdirectories
        for root, _, files in os.walk(directory):
            for file in files:
                # Remove the file extension and add to the set
                unique_files_without_extension.add(os.path.splitext(file)[0])

        return list(unique_files_without_extension)

    except FileNotFoundError:
        print(f"Directory not found: {directory}")
        return []

In [43]:
def train_val_split_list(data_list, test_size=0.2, random_state=None):
    """
    Perform a train-test split on a given list of data.

    Parameters:
    - data_list (list): The list of data to be split.
    - test_size (float or int, optional): The proportion or absolute number of test samples.
      Defaults to 0.2 (20%).
    - random_state (int or None, optional): Seed for random number generation to ensure reproducibility.
      Defaults to None.

    Returns:
    - train_data (list): The training set.
    - test_data (list): The test set.
    """
    try:
        train_data, val_data = train_test_split(data_list, test_size=test_size, random_state=random_state)
        return train_data, val_data

    except ValueError as e:
        print(f"Error in train-test split: {e}")
        return None, None

In [44]:
def copy_specific_files(source_directory, destination_directory, files_to_copy, ext):
    count = 0 
    try:
        # Create the destination directory if it doesn't exist
        if not os.path.exists(destination_directory):
            os.makedirs(destination_directory)

        # Iterate through the list of specific files
        for file_to_copy in files_to_copy:
            # add file extension
            file_name_with_ext = file_to_copy + ext

            # Build the source and destination paths
            source_path = os.path.join(source_directory, file_name_with_ext)
            destination_path = os.path.join(destination_directory, file_name_with_ext)

            # Copy the file to the destination directory
            shutil.copy2(source_path, destination_path)
            count += 1

        print(f"{count} {ext} files copied to {destination_directory} successfully.")

    except FileNotFoundError as e:
        print(f"Error: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")


In [45]:
def structure_dirs(images_dir, labels_dir, files_to_copy, out_dir):
    imgs_out = out_dir + "/images"
    labels_out = out_dir + "/labels"

    copy_specific_files(images_dir, imgs_out, files_to_copy, '.png')
    copy_specific_files(labels_dir, labels_out, files_to_copy, '.txt')

In [46]:
root_dir = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/full_set_v2/"
images_dir = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/full_set_v2/images"
labels_dir = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/full_set_v2/labels"

train_out = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/train"
val_out = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/val"

uni_imgs = get_files_no_ext(root_dir)
train, val = train_val_split_list(uni_imgs)

print(f'Train images: {len(train)} \nValidation images: {len(val)}')

structure_dirs(images_dir,labels_dir, train, train_out)
structure_dirs(images_dir,labels_dir, val, val_out)


Train images: 4061 
Validation images: 1016
4061 .png files copied to C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/train/images successfully.
4061 .txt files copied to C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/train/labels successfully.
1016 .png files copied to C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/val/images successfully.
1016 .txt files copied to C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/final_v2/val/labels successfully.
