In [2]:
# Step 3 a_Step_x_ReduceDataset_by_pattern.jpynb
# Dataset preparation - Delete patterns or sequences of files to reduce the dataset size - mitigate againt overfitting, reduce training time etc. 

import os
import csv
import re

def delete_by_pattern(whichfolder, patternInt, folder):
    """
    Sorts files in the folder by name. Keeps the first image-annotation pair, deletes the next four pairs,
    and writes a summary to a CSV file.

    Args:
        whichfolder (str): The base directory containing the folders.
        folder (str): The specific folder to process.
        sub_folder_train (str): The sub-folder inside the folder to process.
        csv_file (str): Path to the CSV file to write the summary.
    """
    print(f"Starting 1/{patternInt} pattern for {folder}")
    csv_file="dataset_reduced_by_pattern.csv"
    sub_folder_train = 'obj_train_data' 

    # Build the path to the target folder
    path = os.path.join(whichfolder, folder)
    path = os.path.join(path, sub_folder_train)

    # Ensure the folder exists
    if not os.path.exists(path):
        print(f"Error: The folder {path} does not exist.")
        return

    # List all files in the folder
    files = os.listdir(path)

    # Filter and sort files by type
    image_files = sorted([f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    text_files = sorted([f for f in files if f.lower().endswith('.txt')])

    # Ensure file pairs (images and corresponding annotations)
    paired_files = []
    for image_file in image_files:
        base_name = os.path.splitext(image_file)[0]
        annotation_file = f"{base_name}.txt"
        if annotation_file in text_files:
            paired_files.append((image_file, annotation_file))

    # Record the initial count of image-annotation pairs
    total_pairs_before = len(paired_files)
    kept_count = 0
    deleted_count = 0

    # Keep one pair, delete the next n pairs
    for i, (image_file, annotation_file) in enumerate(paired_files):
        if i % patternInt == 0:
            # Keep this pair
            # print(f"Keeping: {image_file} and {annotation_file}")
            kept_count += 1
        else:
            # Delete this pair
            try:
                os.remove(os.path.join(path, image_file))
                os.remove(os.path.join(path, annotation_file))
                # print(f"Deleted: {image_file} and {annotation_file}")
                deleted_count += 1
            except Exception as e:
                print(f"Error deleting files {image_file} or {annotation_file}: {e}")

    # Calculate the number of pairs remaining
    total_pairs_after = kept_count

    # Write the summary to the CSV file
    try:
        with open(csv_file, mode='a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([folder, total_pairs_before, deleted_count, total_pairs_after])
        print(f"Summary written to {csv_file}")
    except Exception as e:
        print(f"Error writing to CSV file: {e}")

    print(f"Process complete. Kept {kept_count} images, deleted {deleted_count} image/annotation pairs from {folder}")

def delete_by_pattern_flat(whichfolder, patternInt, folder):
    """
    Sorts files in the folder by name. Keeps the first image-annotation pair, deletes the next four pairs,
    and writes a summary to a CSV file.

    Args:
        whichfolder (str): The base directory containing the folders.
        folder (str): The specific folder to process.
        sub_folder_train (str): The sub-folder inside the folder to process.
        csv_file (str): Path to the CSV file to write the summary.
    """
    print(f"Starting 1/{patternInt} pattern for {folder}")
    csv_file="dataset_reduced_by_pattern.csv"
    #sub_folder_train = 'obj_train_data' 

    # Build the path to the target folder
    path = os.path.join(whichfolder, folder)
    #path = os.path.join(path, sub_folder_train)

    # Ensure the folder exists
    if not os.path.exists(path):
        print(f"Error: The folder {path} does not exist.")
        return

    # List all files in the folder
    files = os.listdir(path)

    # Filter and sort files by type
    image_files = sorted([f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    text_files = sorted([f for f in files if f.lower().endswith('.txt')])

    # Ensure file pairs (images and corresponding annotations)
    paired_files = []
    for image_file in image_files:
        base_name = os.path.splitext(image_file)[0]
        annotation_file = f"{base_name}.txt"
        if annotation_file in text_files:
            paired_files.append((image_file, annotation_file))

    # Record the initial count of image-annotation pairs
    total_pairs_before = len(paired_files)
    kept_count = 0
    deleted_count = 0

    # Keep one pair, delete the next n pairs
    for i, (image_file, annotation_file) in enumerate(paired_files):
        if i % patternInt == 0:
            # Keep this pair
            # print(f"Keeping: {image_file} and {annotation_file}")
            kept_count += 1
        else:
            # Delete this pair
            try:
                os.remove(os.path.join(path, image_file))
                os.remove(os.path.join(path, annotation_file))
                # print(f"Deleted: {image_file} and {annotation_file}")
                deleted_count += 1
            except Exception as e:
                print(f"Error deleting files {image_file} or {annotation_file}: {e}")

    # Calculate the number of pairs remaining
    total_pairs_after = kept_count

    # Write the summary to the CSV file
    try:
        with open(csv_file, mode='a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([folder, total_pairs_before, deleted_count, total_pairs_after])
        print(f"Summary written to {csv_file}")
    except Exception as e:
        print(f"Error writing to CSV file: {e}")

    print(f"Process complete. Kept {kept_count} images, deleted {deleted_count} image/annotation pairs from {folder}")


def delete_every_nth_flat(whichfolder, folder, deleteEveryInt):
    """
    Deletes image and annotation pairs according to a pattern (e.g., every nth pair) after sorting them.

    Args:
        whichfolder (str): The base directory containing the folders.
        folder (str): The specific folder to process.
        deleteEveryInt (int): Delete every nth file pair.
    """
    print(f"Starting deletion of every {deleteEveryInt}th file pair in {folder}")

    # Build the path to the target folder
    path = os.path.join(whichfolder, folder)

    # Ensure the folder exists
    if not os.path.exists(path):
        print(f"Error: The folder {path} does not exist.")
        return

    # List and sort image and annotation files
    files = sorted(os.listdir(path))
    image_files = sorted([f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files = sorted([f for f in files if f.lower().endswith('.txt')])

    # Ensure only paired files are considered
    paired_files = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files
    ]

    deleted_count = 0

    for i, (image_file, annotation_file) in enumerate(paired_files):
        if (i + 1) % deleteEveryInt == 0:
            try:
                os.remove(os.path.join(path, image_file))
                os.remove(os.path.join(path, annotation_file))
                print(f"Deleted: {image_file} and {annotation_file}")
                deleted_count += 1
            except Exception as e:
                print(f"Error deleting pair {image_file} and {annotation_file}: {e}")

    print(f"Process complete. Deleted {deleted_count} file pairs from {folder}.")


# Delete segments of file pairs - for splicing 
def segment_and_delete_files(source_folder, numSegments, segmentToDelete):
    """
    Segments image-annotation pairs into a specified number of segments and deletes the specified segment.

    Args:
        source_folder (str): Path to the folder containing image and annotation pairs.
        numSegments (int): Number of segments to divide the pairs into.
        segmentToDelete (int): The segment index to delete (1-based).
    """
    if segmentToDelete < 1 or segmentToDelete > numSegments:
        print(f"Invalid segmentToDelete value: {segmentToDelete}. Must be between 1 and {numSegments}.")
        return

    # List and sort image and annotation files
    files = sorted(os.listdir(source_folder))
    image_files = sorted([f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files = sorted([f for f in files if f.lower().endswith('.txt')])

    # Ensure only paired files are considered
    paired_files = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files
    ]

    total_pairs = len(paired_files)
    if total_pairs == 0:
        print(f"No valid image-annotation pairs found in {source_folder}.")
        return

    # Determine segment size
    segment_size = total_pairs // numSegments
    remaining_pairs = total_pairs % numSegments

    # Assign pairs to segments
    segments = []
    start_idx = 0
    for i in range(numSegments):
        end_idx = start_idx + segment_size + (1 if i < remaining_pairs else 0)
        segments.append(paired_files[start_idx:end_idx])
        start_idx = end_idx

    # Delete the specified segment
    segment_to_delete = segments[segmentToDelete - 1]
    for image_file, annotation_file in segment_to_delete:
        try:
            os.remove(os.path.join(source_folder, image_file))
            os.remove(os.path.join(source_folder, annotation_file))
            print(f"Deleted: {image_file} and {annotation_file}")
        except Exception as e:
            print(f"Error deleting pair {image_file} and {annotation_file}: {e}")

    print(f"Deleted all pairs in segment {segmentToDelete} from {source_folder}.")

#================================================================================================================================================#
def extract_base_name(file_name):
    """
    Extracts the base name for matching pairs (e.g., "Job_7_000000" from "Job_7_000000_scaled_2_4.PNG").

    Args:
        file_name (str): The file name to extract the base name from.

    Returns:
        str: The extracted base name.
    """
    match = re.match(r"(job_\d+_\d+)", file_name, re.IGNORECASE)
    return match.group(1) if match else None
    
def splice_folders(folderToSplice1, folderToSplice2):
    """
    Splices two folders by alternating deletion of matching image-annotation pairs.

    Args:
        folderToSplice1 (str): Path to the first folder.
        folderToSplice2 (str): Path to the second folder.
    """
    # List and sort image and annotation files for both folders
    files1 = sorted(os.listdir(folderToSplice1))
    files2 = sorted(os.listdir(folderToSplice2))

    image_files1 = sorted([f for f in files1 if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files1 = sorted([f for f in files1 if f.lower().endswith('.txt')])

    image_files2 = sorted([f for f in files2 if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    annotation_files2 = sorted([f for f in files2 if f.lower().endswith('.txt')])

    # Ensure only paired files are considered
    paired_files1 = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files1
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files1
    ]

    paired_files2 = [
        (img, f"{os.path.splitext(img)[0]}.txt")
        for img in image_files2
        if f"{os.path.splitext(img)[0]}.txt" in annotation_files2
    ]

    # Alternate deletion of matching pairs
    while paired_files1 and paired_files2:
        # Delete the first pair from folderToSplice2 matching the first pair from folderToSplice1
        pair1 = paired_files1.pop(0)
        base_name1 = extract_base_name(pair1[0])
        match2 = next((pair for pair in paired_files2 if extract_base_name(pair[0]) == base_name1), None)
        if match2:
            try:
                os.remove(os.path.join(folderToSplice2, match2[0]))
                os.remove(os.path.join(folderToSplice2, match2[1]))
                print(f"Deleted from folderToSplice2: {match2[0]} and {match2[1]}")
                paired_files2.remove(match2)
            except Exception as e:
                print(f"Error deleting pair from folderToSplice2: {e}")
        else:
            print(f"No match found in folderToSplice2 for: {pair1[0]}")

        # Delete the first pair from folderToSplice1 matching the next pair from folderToSplice2
        if paired_files2:
            pair2 = paired_files2.pop(0)
            base_name2 = extract_base_name(pair2[0])
            match1 = next((pair for pair in paired_files1 if extract_base_name(pair[0]) == base_name2), None)
            if match1:
                try:
                    os.remove(os.path.join(folderToSplice1, match1[0]))
                    os.remove(os.path.join(folderToSplice1, match1[1]))
                    print(f"Deleted from folderToSplice1: {match1[0]} and {match1[1]}")
                    paired_files1.remove(match1)
                except Exception as e:
                    print(f"Error deleting pair from folderToSplice1: {e}")
            else:
                print(f"No match found in folderToSplice1 for: {pair2[0]}")

    print("Splicing complete.")

                          
# Delete files by pattern in the selected set, e.g. delete every 2nd file in the set 
#base_path_tgt = 'D:/FlagDetectionDatasets/ExportedDatasetsReduced'
base_path_tgt = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected'
#delete_every_nth_flat(base_path_tgt, 'Job_21', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_22', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_23', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_23_filter', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_28', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_29', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_30', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_31', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_32', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_36', 3)
#delete_every_nth_flat(base_path_tgt, 'Job_41', 2)
#delete_every_nth_flat(base_path_tgt, 'Job_43', 2)
#delete_every_nth_flat(base_path_tgt, 'Job_51', 2)
#delete_every_nth_flat(base_path_tgt, 'Job_51_Aug', 2)
#delete_every_nth_flat(base_path_tgt, 'Job_52_Aug', 2)

#delete_every_nth_flat(base_path_tgt, 'Job_54', 3) # ran twice 
#delete_every_nth_flat(base_path_tgt, 'Job_54', 3) 
#delete_every_nth_flat(base_path_tgt, 'Job_60', 3) 
#delete_every_nth_flat(base_path_tgt, 'Job_61', 3) 
#delete_every_nth_flat(base_path_tgt, 'Job_71', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_72', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_78', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_87', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_88', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_89', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_104', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_105', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_106', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_118', 2) 
#delete_every_nth_flat(base_path_tgt, 'Job_130', 2) 


# Splice pairs in final selected dataset to merge with augmented sets 
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_11'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_11_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_12'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_12_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_13'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_13_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_14'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_14_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_15'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_15_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_16'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_16_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_17'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_17_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_18'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_18_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_21'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_21_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_22'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_22_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_filter'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_23_filter_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_filter'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_24_filter_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_25'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_25_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_27'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_27_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_28'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_28_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_29'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_29_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_30'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_30_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_31'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_31_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_32'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_32_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_36'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_36_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_37'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_37_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_41'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_41_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_43'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_43_Aug'

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_42'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_42_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_48'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_48_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_50'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_50_Aug'
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_51'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_51_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_52'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_52_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_56'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_56_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_57'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_57_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_54'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_54_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

## 55 using only 2_5
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_55'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_55_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_59'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_59_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_60'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_60_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_61'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_61_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_70'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_70_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_73'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_73_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_74'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_74_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_75'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_75_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_76'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_76_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_77'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_77_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_78'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_78_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_87'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_87_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_88'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_88_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_89'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_89_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_95'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_95_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_104'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_104_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_105'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_105_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_106'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_106_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_108'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_108_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_114'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_114_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_115'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_115_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_116'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_116_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_117'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_117_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_118'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_118_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_119'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_119_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_121'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_121_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_123'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_123_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_125'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_125_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_126'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_126_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_130'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_130_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_143'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_143_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_145'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_145_Aug'
#splice_folders(folderToSplice1, folderToSplice2)

## ========================== Splice the augmented illumination sets ====================== #
#folderToSplice1 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug_Illum'
#folderToSplice2 = 'D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug'
#splice_folders(folderToSplice1, folderToSplice2)


##==========================================================================================#

# Delete whole segments of files - superseded with a splicing function that is more efficient 
#source_folder='D:/FlagDetectionDatasets/ExportedDatasetsSelected/Job_7_Aug_Copy'
#segment_and_delete_files(source_folder, numSegments, segmentToDelete):

#delete_by_pattern(base_path_tgt, 3, 'Job_106') 
#delete_by_pattern(base_path_tgt, 5, 'Job_108') 
#delete_by_pattern(base_path_tgt, 4, 'Job_70') 

#delete_by_pattern(base_path_tgt, 2, 'Job_7') # frame step of 5 left 0, 5, 10, This will reduce to 0 10 , 20 
#delete_by_pattern(base_path_tgt, 2, 'Job_11') # frame step of 5 left 1, 6, 11, This will reduce to 1, 11, 16, , 20 
#delete_by_pattern(base_path_tgt, 4, 'Job_65') 

#delete_by_pattern(base_path_tgt, 3, 'Job_116')  # DID TWICE 
#delete_by_pattern(base_path_tgt, 3, 'Job_114') 
#Starting 1/3 pattern for Job_114
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 96 images, deleted 192 image/annotation pairs from Job_114

#Starting 1/3 pattern for Job_116
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 176 images, deleted 352 image/annotation pairs from Job_116

# Delete sequence - keep 1, delete 4 -  keep 1/5
#delete_by_pattern(base_path_tgt,  folder="Job_15", sub_folder_train = r'obj_train_data',5)
#delete_by_pattern(base_path_tgt,  folder="Job_16", sub_folder_train = r'obj_train_data',5)
#delete_by_pattern(base_path_tgt,  folder="Job_17", sub_folder_train = r'obj_train_data',5)
#delete_by_pattern(base_path_tgt,  folder="Job_18", sub_folder_train = r'obj_train_data',5)

# Keep sequence is keep 1, delete 4, keep 1, delete 4... 
#delete_by_pattern(base_path_tgt, 5, 'Job_29')
#delete_by_pattern(base_path_tgt, 5, 'Job_41')
#delete_by_pattern(base_path_tgt, 4, 'Job_121_filter') 
#delete_by_pattern(base_path_tgt, 4, 'Job_121_a') # kept 2 deleted 8 


#delete_by_pattern_flat(base_path_tgt, 2, 'Job_48') 
#delete_by_pattern_flat(base_path_tgt, 2, 'Job_128') 

#base_path_tgt = 'D:/FlagDetectionDatasets/Augmentation/scaled/augmented'
#base_path_tgt = 'D:/FlagDetectionDatasets/Augmentation'
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_30') 
#base_path_tgt = 'D:/FlagDetectionDatasets/Augmentation'
#delete_by_pattern_flat(base_path_tgt, 10, 'Switch_flag_into') 

#delete_by_pattern_flat(base_path_tgt, 5, 'Job_123') 
#base_path_tgt = 'D:/FlagDetectionDatasets/Augmentation/scaled/augmented'
#delete_by_pattern_flat(base_path_tgt, 5, 'Job_120') 
#delete_by_pattern_flat(base_path_tgt, 5, 'Job_119') 
#delete_by_pattern_flat(base_path_tgt, 5, 'Job_118') 

#delete_by_pattern(base_path_tgt, 5, 'Job_115') 
#Starting 1/5 pattern for Job_115
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 63 images, deleted 249 image/annotation pairs from Job_115

# delete_by_pattern(base_path_tgt, 5, 'Job_126') # RAN TWICE 
#delete_by_pattern(base_path_tgt, 5, 'Job_128')
#delete_by_pattern(base_path_tgt, 5, 'Job_130')
#delete_by_pattern(base_path_tgt, 5, 'Job_131')  # did this twice 
#delete_by_pattern(base_path_tgt, 5, 'Job_142') # did this twice 
#delete_by_pattern(base_path_tgt, 5, 'Job_143') ## REDO 
#delete_by_pattern(base_path_tgt, 5, 'Job_147')  # Did this twice so 1/10 
#delete_by_pattern(base_path_tgt, 5, 'Job_160')  # WAS IN 5S ALREADY 
#delete_by_pattern(base_path_tgt, 5, 'Job_98')  # WAS IN 5S ALREADY 
#delete_by_pattern(base_path_tgt, 2, 'Job_98')  # then keep 1 in 2 so end result is 1 / 10 

## Example Log from above

#Starting 1/5 pattern for Job_143
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 27 images, deleted 105 image/annotation pairs from Job_143

#Starting 1/5 pattern for Job_126
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 156 images, deleted 624 image/annotation pairs from Job_126
#Starting 1/5 pattern for Job_128
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 107 images, deleted 425 image/annotation pairs from Job_128
#Starting 1/5 pattern for Job_130
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 111 images, deleted 441 image/annotation pairs from Job_130
#Starting 1/5 pattern for Job_131
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 108 images, deleted 432 image/annotation pairs from Job_131
#Starting 1/5 pattern for Job_142
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 106 images, deleted 422 image/annotation pairs from Job_142
#Starting 1/5 pattern for Job_160
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 59 images, deleted 234 image/annotation pairs from Job_160
#Starting 1/5 pattern for Job_160
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 59 images, deleted 234 image/annotation pairs from Job_160

# REDOO *********************************************
# Starting 1/5 pattern for Job_143
# Error: The folder D:/FlagDetectionDatasets/ExportedDatasetsReduced\Job_143\obj_train_data does not exist.

# Starting 1/5 pattern for Job_147
# Summary written to dataset_reduced_by_pattern.csv
# Process complete. Kept 106 images, deleted 422 image/annotation pairs from Job_147


# Keep 1, delete 9 pattern  - keep 1/10
#delete_by_pattern(base_path_tgt, 10, 'Job_30') ## CHECK PATTERN CORRECT?
#delete_by_pattern(base_path_tgt, 10, 'Job_31')
#delete_by_pattern(base_path_tgt, 10, 'Job_32')

#delete_by_pattern(base_path_tgt, 8, 'Job_72')  
#delete_by_pattern(base_path_tgt, 10, 'Job_73')  

#delete_by_pattern(base_path_tgt, 10, 'Job_73')
#Starting 1/10 pattern for Job_73
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 88 images, deleted 788 image/annotation pairs from Job_73

# Delete 
# delete_by_pattern(base_path_tgt,  2, 'Job_21')  # Keep all as frame step of 5 was effective 

# Keep 1/3 sequence 
#delete_by_pattern(base_path_tgt, 3, 'Job_118')

#delete_by_pattern(base_path_tgt, 3, 'Job_117')  

# Keep 1/4 sequence 
#delete_by_pattern(base_path_tgt, 4, 'Job_22')
#delete_by_pattern(base_path_tgt, 10, 'Job_25')
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_27')  
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_30')  
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_31')  
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_43')  
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_51')  
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_60')  

#delete_by_pattern(base_path_tgt, 4, 'Job_55')
#delete_by_pattern(base_path_tgt, 4, 'Job_56')
#delete_by_pattern(base_path_tgt, 4, 'Job_57')
#delete_by_pattern(base_path_tgt, 4, 'Job_59')
#delete_by_pattern(base_path_tgt, 4, 'Job_125')

#delete_by_pattern(base_path_tgt, 5, 'Job_78') COULD DELETE 1/5 AGAIN 
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_74')
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_75')
#delete_by_pattern_flat(base_path_tgt, 10, 'Job_77')
#delete_by_pattern(base_path_tgt, 10, 'Job_88')
#delete_by_pattern(base_path_tgt, 5, 'Job_95')  # handheld
#delete_by_pattern(base_path_tgt, 10, 'Job_36')  

#delete_by_pattern(base_path_tgt, 5, 'Job_89')
#delete_by_pattern(base_path_tgt, 5, 'Job_88')

#delete_by_pattern_flat(base_path_tgt, 10, 'Job_98')  

#delete_by_pattern_flat(base_path_tgt, 5, 'Job_109') 
#delete_by_pattern_flat(base_path_tgt, 5, 'Job_105')  

# Example output: 
# Starting 1/4 pattern for Job_22
# Summary written to dataset_reduced_by_pattern.csv
# Process complete. Kept 569 images, deleted 1705 image/annotation pairs from Job_22
#Starting 1/4 pattern for Job_125
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 96 images, deleted 288 image/annotation pairs from Job_125

# Example for Job_15
# Starting amt: 400. Deleted:  320 Remaining: 80
#sub_folder_train = r'obj_train_data'

## CHECK 
#Starting 1/10 pattern for Job_36
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 191 images, deleted 1710 image/annotation pairs from Job_36

#Starting 1/5 pattern for Job_120
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 58 images, deleted 230 image/annotation pairs from Job_120

#Starting 1/5 pattern for Job_120
#Summary written to dataset_reduced_by_pattern.csv
#Process complete. Kept 76 images, deleted 301 image/annotation pairs from Job_120


Deleted from folderToSplice2: Job_121_000000_scaled_3_0.PNG and Job_121_000000_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000008.PNG and Job_121_000008.txt
Deleted from folderToSplice2: Job_121_000016_scaled_3_0.PNG and Job_121_000016_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000024.PNG and Job_121_000024.txt
Deleted from folderToSplice2: Job_121_000032_scaled_3_0.PNG and Job_121_000032_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000040.PNG and Job_121_000040.txt
Deleted from folderToSplice2: Job_121_000048_scaled_3_0.PNG and Job_121_000048_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000056.PNG and Job_121_000056.txt
Deleted from folderToSplice2: Job_121_000064_scaled_3_0.PNG and Job_121_000064_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000072.PNG and Job_121_000072.txt
Deleted from folderToSplice2: Job_121_000080_scaled_3_0.PNG and Job_121_000080_scaled_3_0.txt
Deleted from folderToSplice1: Job_121_000088.PNG and Job_121_000088.txt
Dele