In [15]:
#Importing the necessary Libraries
import os
import shutil
import random

# Defining the paths to my dataset folders
image_folder = r'C:\Users\Dipeolu Ayomide\OneDrive\Desktop\facemask_detection\data_preparation\images'
annotation_folder = r"C:\Users\Dipeolu Ayomide\OneDrive\Desktop\facemask_detection\data_preparation\annotations"

# Defining the destination folders for training and validation sets
train_folder = r"C:\Users\Dipeolu Ayomide\OneDrive\Desktop\facemask_detection\data_preparation\train"
validation_folder = r"C:\Users\Dipeolu Ayomide\OneDrive\Desktop\facemask_detection\data_preparation\validation"

# Creating the class folders in the destination folders
class_names = ["no_mask", "with_mask", "mask_weared_incorrect"]
for class_name in class_names:
    os.makedirs(os.path.join(train_folder, class_name), exist_ok=True)
    os.makedirs(os.path.join(validation_folder, class_name), exist_ok=True)

# Listing all image files in the image folder
image_files = [file for file in os.listdir(image_folder) if file.endswith(".png")]

# Setting the percentage of data for training
split_ratio = 0.7

# Randomly shufflling the image files
random.shuffle(image_files)

# Splitting the data into training and validation sets
split_index = int(len(image_files) * split_ratio)
train_files = image_files[:split_index]
validation_files = image_files[split_index:]

# Function to get class name from XML file
def get_class_name(annotation_file):
    with open(annotation_file, 'r') as file:
        xml_content = file.read()
        if "without_mask" in xml_content:
            return "no_mask"
        elif "with_mask" in xml_content:
            return "with_mask"
        elif "mask_weared_incorrect" in xml_content:
            return "mask_weared_incorrect"
        else:
            return None

# Copying images to their respective class folders in the training and validation sets
for image_file in train_files:
    # Finding the corresponding annotation file
    annotation_filename = image_file.replace(".png", ".xml")
    annotation_file = os.path.join(annotation_folder, annotation_filename)
    
    class_name = get_class_name(annotation_file)
    
    if class_name:
        destination_folder = os.path.join(train_folder, class_name)
        image_destination = os.path.join(destination_folder, image_file)  # Corrected destination path
        if os.path.exists(image_destination):
            # Renaming the image to avoid overwriting
            image_name, image_extension = os.path.splitext(image_file)
            image_destination = os.path.join(destination_folder, f"{image_name}_duplicate{image_extension}")
        shutil.copy(os.path.join(image_folder, image_file), image_destination)

for image_file in validation_files:
    # Finding the corresponding annotation file
    annotation_filename = image_file.replace(".png", ".xml")
    annotation_file = os.path.join(annotation_folder, annotation_filename)
    
    class_name = get_class_name(annotation_file)
    
    if class_name:
        destination_folder = os.path.join(validation_folder, class_name)
        image_destination = os.path.join(destination_folder, image_file)  # Corrected destination path
        if os.path.exists(image_destination):
            # Renaming the image to avoid overwriting
            image_name, image_extension = os.path.splitext(image_file)
            image_destination = os.path.join(destination_folder, f"{image_name}_duplicate{image_extension}")
        shutil.copy(os.path.join(image_folder, image_file), image_destination)

print("Dataset splitting and class organization complete.")


Dataset splitting and class organization complete.
