In [None]:
# Sort duplicate files 

import os

def sort_and_delete_images(folder1, folder2):
    """
    Compares images in `folder1` with `folder2` and deletes any matching images
    and their annotation files from `folder1`.

    Parameters:
    - folder1 (str): The path to the first folder (obj_train_folder).
    - folder2 (str): The path to the second folder (comparison folder).
    """
    print(f"Starting to process...")

    # Walk through all sub-folders in folder1
    for subdir, _, files in os.walk(folder1):
        # Find the corresponding sub-folder in folder2
        relative_path = os.path.relpath(subdir, folder1)
        folder2_subdir = os.path.join(folder2, relative_path)

        if not os.path.exists(folder2_subdir):
            continue  # Skip if the sub-folder doesn't exist in folder2

        for file in files:
            if file.endswith(('.jpg', '.png')):  # Add other image extensions if needed
                # Check if the image exists in the corresponding folder2 sub-folder
                image_path_folder2 = os.path.join(folder2_subdir, file)
                if os.path.exists(image_path_folder2):
                    # Delete the image from folder1
                    image_path_folder1 = os.path.join(subdir, file)
                    os.remove(image_path_folder1)
                    print(f"Deleted image: {image_path_folder1}")

                    # Delete the corresponding annotation text file from folder1
                    annotation_file = os.path.splitext(file)[0] + '.txt'
                    annotation_path = os.path.join(subdir, annotation_file)
                    if os.path.exists(annotation_path):
                        os.remove(annotation_path)
                        print(f"Deleted annotation: {annotation_path}")

# sort data images and annotations 
folder1 = r"D:\FlagDetectionDatasets\ExportedDatasetsReduced\Job_23\obj_train_data"
folder2 = r"D:\FlagDetectionDatasets\ExportedDatasetsReduced\Job_23_ssim\obj_train_data"

sort_and_delete_images(folder1, folder2)
