In [None]:
import os
import shutil
import random

# Set correct paths based on your directory structure
base_dir = "/home/locutus/Downloads/CNN_Project_Group_17/Data"  # Folder containing all images
output_dir = "/home/locutus/Downloads/CNN_Project_Group_17/dataset"  # Output dataset folder

# Define classes and manually balance by taking the minimum count
min_images = 14630  # Equalizing to the smaller class (animal_faces)
classes = {
    "Animals": min_images,  # Matches "Animals" folder name
    "Humans": min_images    # Matches "Humans" folder name
}

# Define split ratios
train_ratio = 0.70
val_ratio = 0.15
test_ratio = 0.15

# Function to create necessary directories
def create_dirs():
    for split in ["train", "val", "test"]:
        for class_name in classes.keys():
            os.makedirs(os.path.join(output_dir, split, class_name), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "single_prediction"), exist_ok=True)

# Function to split, rename, and move files
def split_data():
    for class_name, total_images in classes.items():
        source_folder = os.path.join(base_dir, class_name)  # Updated path for correct dataset structure
        images = os.listdir(source_folder)

        # Ensure we only use 'min_images' number of files for both classes
        images = random.sample(images, min_images)  # Select only min_images randomly

        # Rename the images with the prefix 'animal_' or 'human_'
        renamed_images = []
        for i, img in enumerate(images):
            extension = img.split('.')[-1]
            new_name = f"{class_name.lower()}_{i + 1}.{extension}"  # Renaming format
            os.rename(os.path.join(source_folder, img), os.path.join(source_folder, new_name))  # Rename image
            renamed_images.append(new_name)  # Keep track of renamed images

        # Pick one image for single prediction and remove it
        single_img = renamed_images.pop()  # Remove one image for the prediction set
        shutil.copy(os.path.join(source_folder, single_img), os.path.join(output_dir, "single_prediction", f"{class_name.lower()}.jpg"))

        # Compute split sizes after reserving single image
        train_count = int(train_ratio * (total_images - 1))
        val_count = int(val_ratio * (total_images - 1))
        test_count = int(test_ratio * (total_images - 1))

        # Assign images to respective splits
        train_files = renamed_images[:train_count]
        val_files = renamed_images[train_count:train_count + val_count]
        test_files = renamed_images[train_count + val_count:]

        # Move files to respective directories
        move_files(train_files, source_folder, os.path.join(output_dir, "train", class_name))
        move_files(val_files, source_folder, os.path.join(output_dir, "val", class_name))
        move_files(test_files, source_folder, os.path.join(output_dir, "test", class_name))

# Helper function to move files
def move_files(files, src_folder, dest_folder):
    for file in files:
        shutil.move(os.path.join(src_folder, file), os.path.join(dest_folder, file))

# Execute the data preparation
create_dirs()
split_data()

print("Data splitting complete. Class balancing applied. Folder structure created successfully!")

