In [3]:
import os
import random

# Define file paths
train_file_path = "/home/scott/Projects/DissertationProject/data/raw/splits/train_split_1.txt"
test_file_path = "/home/scott/Projects/DissertationProject/data/raw/splits/test_split_1.txt"

# Define output base directory
output_base_dir = "/home/scott/Projects/DissertationProject/data/splits"

# Function to read directories from a text file
def read_directories(file_path):
    with open(file_path, 'r') as file:
        directories = file.read().splitlines()
    return directories

# Function to split directories based on specific counts
def split_directories(directories, train_count_0, train_count_1, test_count_0, test_count_1):
    directories_0 = [d for d in directories if d.endswith('_0')]
    directories_1 = [d for d in directories if d.endswith('_1')]

    random.shuffle(directories_0)
    random.shuffle(directories_1)

    train_dirs = directories_0[:train_count_0] + directories_1[:train_count_1]
    test_dirs = directories_0[train_count_0:train_count_0 + test_count_0] + directories_1[train_count_1:train_count_1 + test_count_1]

    random.shuffle(train_dirs)
    random.shuffle(test_dirs)

    return train_dirs, test_dirs

# Function to write directories to a text file
def write_directories_to_file(directories, file_path):
    # Ensure the output directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w') as file:
        for directory in directories:
            file.write(f"{directory}\n")

# Read directories from train and test split files
train_directories = read_directories(train_file_path)
test_directories = read_directories(test_file_path)

# Combine all directories for shuffling and splitting
all_directories = train_directories + test_directories

# Define split specifications
split_specifications = {
    "50_50": (139, 67, 139, 67),
    "60_40": (166, 180, 112, 54),
    "90_10": (250, 120, 28, 14)
}

# Loop through each split specification and create the splits
for split_name, (train_count_0, train_count_1, test_count_0, test_count_1) in split_specifications.items():
    train_dirs, test_dirs = split_directories(all_directories, train_count_0, train_count_1, test_count_0, test_count_1)
    
    # Create output file paths
    train_file_output_path = os.path.join(output_base_dir, f'train_split_{split_name}.txt')
    test_file_output_path = os.path.join(output_base_dir, f'test_split_{split_name}.txt')
    
    # Write directories to text files
    write_directories_to_file(train_dirs, train_file_output_path)
    write_directories_to_file(test_dirs, test_file_output_path)

print("Splits and output text files created successfully.")


Splits and output text files created successfully.
