In [2]:
# Splice_Datasets_Delete_Alternate_Pairs.jpynb

import os
import csv
import re


def extract_base_name(file_name):
    """
    Extracts the base name for matching pairs (e.g., "Job_7_000000" from "Job_7_000000_scaled_2_4.PNG").

    Args:
        file_name (str): The file name to extract the base name from.

    Returns:
        str: The extracted base name.
    """
    match = re.match(r"(job_\d+_\d+)", file_name, re.IGNORECASE)
    return match.group(1) if match else None
    
def splice_folders(folderToSplice1, folderToSplice2):
    """
    Splices two folders by alternating deletion of matching image-annotation pairs.

    Args:
        folderToSplice1 (str): Path to the first folder.
        folderToSplice2 (str): Path to the second folder.
    """
    # List and sort image and annotation files for both folders
    files1 = sorted(os.listdir(folderToSplice1))
    files2 = sorted(os.listdir(folderToSplice2))

    image_files1 = sorted([f for f in files1 if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files1 = sorted([f for f in files1 if f.lower().endswith('.txt')])

    image_files2 = sorted([f for f in files2 if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files2 = sorted([f for f in files2 if f.lower().endswith('.txt')])

    # Ensure only paired files are considered
    paired_files1 = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files1
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files1
    ]

    paired_files2 = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files2
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files2
    ]

    # Alternate deletion of matching pairs
    while paired_files1 and paired_files2:
        # Delete the first pair from folderToSplice2 matching the first pair from folderToSplice1
        pair1 = paired_files1.pop(0)
        base_name1 = extract_base_name(pair1[0])
        match2 = next((pair for pair in paired_files2 if extract_base_name(pair[0]) == base_name1), None)
        if match2:
            try:
                os.remove(os.path.join(folderToSplice2, match2[0]))
                os.remove(os.path.join(folderToSplice2, match2[1]))
                print(f"Deleted from folderToSplice2: {match2[0]} and {match2[1]}")
                paired_files2.remove(match2)
            except Exception as e:
                print(f"Error deleting pair from folderToSplice2: {e}")
        else:
            print(f"No match found in folderToSplice2 for: {pair1[0]}")

        # Delete the first pair from folderToSplice1 matching the next pair from folderToSplice2
        if paired_files2:
            pair2 = paired_files2.pop(0)
            base_name2 = extract_base_name(pair2[0])
            match1 = next((pair for pair in paired_files1 if extract_base_name(pair[0]) == base_name2), None)
            if match1:
                try:
                    os.remove(os.path.join(folderToSplice1, match1[0]))
                    os.remove(os.path.join(folderToSplice1, match1[1]))
                    print(f"Deleted from folderToSplice1: {match1[0]} and {match1[1]}")
                    paired_files1.remove(match1)
                except Exception as e:
                    print(f"Error deleting pair from folderToSplice1: {e}")
            else:
                print(f"No match found in folderToSplice1 for: {pair2[0]}")

    print("Splicing complete.")
                  
#base_path_tgt = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected'

# Splice pairs in final selected dataset to merge with augmented sets 
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_11'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_11_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_12'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_12_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_13'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_13_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_14'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_14_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_15'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_15_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_16'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_16_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_17'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_17_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_18'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_18_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_21'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_21_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_22'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_22_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_filter'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_filter_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_filter'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_filter_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_25'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_25_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_27'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_27_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_28'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_28_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_29'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_29_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_30'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_30_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_31'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_31_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_32'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_32_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_36'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_36_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_37'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_37_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_41'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_41_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_43'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_43_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_42'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_42_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_48'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_48_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_50'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_50_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_51'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_51_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_52'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_52_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_56'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_56_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_57'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_57_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_54'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_54_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

## 55 using only 2_5
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_55'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_55_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_59'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_59_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_60'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_60_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_61'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_61_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_70'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_70_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_73'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_73_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_74'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_74_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_75'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_75_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_76'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_76_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_77'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_77_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_78'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_78_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_87'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_87_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_88'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_88_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_89'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_89_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_95'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_95_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_104'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_104_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_105'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_105_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_106'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_106_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_108'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_108_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_114'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_114_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_115'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_115_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_116'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_116_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_117'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_117_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_118'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_118_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_119'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_119_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_121'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_121_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_123'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_123_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_125'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_125_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_126'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_126_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_130'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_130_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_143'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_143_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_145'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_145_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

## ========================== Splice the augmented illumination sets ====================== #
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug_Illum'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug'
#splice_folders(folderToSplice1, folderToSplice2)


Deleted from folderToSplice2: Job_121_000000_scaled_3_0.PNG and Job_121_000000_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000008.PNG and Job_121_000008.txt
Deleted from folderToSplice2: Job_121_000016_scaled_3_0.PNG and Job_121_000016_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000024.PNG and Job_121_000024.txt
Deleted from folderToSplice2: Job_121_000032_scaled_3_0.PNG and Job_121_000032_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000040.PNG and Job_121_000040.txt
Deleted from folderToSplice2: Job_121_000048_scaled_3_0.PNG and Job_121_000048_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000056.PNG and Job_121_000056.txt
Deleted from folderToSplice2: Job_121_000064_scaled_3_0.PNG and Job_121_000064_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000072.PNG and Job_121_000072.txt
Deleted from folderToSplice2: Job_121_000080_scaled_3_0.PNG and Job_121_000080_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000088.PNG and Job_121_000088.txt
Dele