In [11]:
# Find images with minor classes for removal 
# Find 7 non_beach_flag 8 other_beach_flag 9 surf_kite 10 wind_sock
import os
import shutil

# Option to find classes only and view a lsit in a text file 
def find_images_with_class_ids(parent_folder, class_ids, output_file="images_with_class_ids.txt"):
    """
    Find images with specified class IDs in a folder of folders and save the results to a file.

    Args:
        parent_folder (str): Path to the parent folder containing subfolders.
        class_ids (list): List of class IDs to search for.
        output_file (str): Path to save the list of images with the specified class IDs.
    """
    print ("Checking for minor classes...")
    class_ids = set(map(str, class_ids))  # Convert class IDs to strings for comparison
    matching_images = []

    for root, _, files in os.walk(parent_folder):
        for file in files:
            if file.lower().endswith('.txt'):  # Process annotation files
                annotation_path = os.path.join(root, file)
                image_name = os.path.splitext(file)[0]  # Get image name without extension

                try:
                    with open(annotation_path, 'r') as ann_file:
                        for line in ann_file:
                            class_id = line.split()[0]  # Extract the class ID
                            if class_id in class_ids:
                                matching_images.append(os.path.join(root, image_name))
                                break  # No need to process further lines for this file
                except Exception as e:
                    print(f"Error reading {annotation_path}: {e}")

    # Save results to a file
    with open(output_file, 'w') as out_file:
        out_file.write('\n'.join(matching_images))

    print(f"Found {len(matching_images)} images with specified class IDs.")
    print(f"Results saved to {output_file}")

import os

# REMOVE A CLASS FROM AN INDIVIDUAL FILE option to backup the class 
def remove_minor_classes_from_file(file_path, class_ids_to_remove, backup=True):
    """
    Remove specified class IDs from a single YOLO annotation file.

    Args:
        file_path (str): Path to the annotation file to process.
        class_ids_to_remove (list): List of class IDs to remove.
        backup (bool): Whether to create a backup of the original annotation file.
    """
    class_ids_to_remove = set(map(str, class_ids_to_remove))  # Convert to strings for matching

    try:
        # Read the original annotation
        with open(file_path, 'r') as ann_file:
            lines = ann_file.readlines()

        # Filter out lines with minor classes
        filtered_lines = [
            line for line in lines if line.split()[0] not in class_ids_to_remove
        ]

        if filtered_lines != lines:  # Only write if changes are made
            if backup:
                # Create a backup of the original file
                backup_path = file_path + ".bak"
                with open(backup_path, 'w') as backup_file:
                    backup_file.writelines(lines)
                print(f"Backup created at: {backup_path}")

            # Write the filtered annotations back to the file
            with open(file_path, 'w') as ann_file:
                ann_file.writelines(filtered_lines)

            print(f"Modified file: {file_path}")
        else:
            print(f"No changes made to: {file_path} (no minor classes found)")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")


# All in one find them, back them up and remove the annotations 
def backup_and_process_annotations(source_folder, backup_folder, class_ids_to_remove):
    """
    Backup annotation files that contain specified class IDs and remove those class IDs.

    Args:
        source_folder (str): Path to the folder of folders containing annotation files.
        backup_folder (str): Path to the backup folder where annotations with specified classes will be copied.
        class_ids_to_remove (list): List of class IDs to remove.
    """
    class_ids_to_remove = set(map(str, class_ids_to_remove))  # Convert to strings for matching

    for root, _, files in os.walk(source_folder):
        for file in files:
            if file.lower().endswith('.txt'):
                file_path = os.path.join(root, file)
                try:
                    # Read the original annotation
                    with open(file_path, 'r') as ann_file:
                        lines = ann_file.readlines()

                    # Check if the file contains any class IDs to remove
                    contains_target_class = any(
                        line.split()[0] in class_ids_to_remove for line in lines
                    )

                    if contains_target_class:
                        # Backup the annotation file
                        relative_path = os.path.relpath(file_path, source_folder)
                        backup_path = os.path.join(backup_folder, relative_path)
                        os.makedirs(os.path.dirname(backup_path), exist_ok=True)
                        shutil.copy(file_path, backup_path)
                        print(f"Backed up: {file_path} to {backup_path}")

                        # Remove lines with target class IDs
                        filtered_lines = [
                            line for line in lines if line.split()[0] not in class_ids_to_remove
                        ]

                        # Write back the filtered annotations
                        with open(file_path, 'w') as ann_file:
                            ann_file.writelines(filtered_lines)
                        #print(f"Processed file: {file_path}")
                        #else:  
                        #print(f"No minor classes found in: {file_path}")

                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

        print(f"DOne ")

# Function 1 & 2 Look in ... 
parent_folder =  "D:/FlagDetectionDatasets/ExportedDatasetsSelectedMLROBIN"
class_ids_to_remove = [7, 8, 9, 10]  # classes to remove
#find_images_with_class_ids(parent_folder, class_ids, output_file="images_with_class_ids.txt")

# Try on a single file  #7non_beach_flag #8other_beach_flag #9surf_kite #10wind_sock
file_path = r"D:/FlagDetectionDatasets/ExportedDatasetsSelectedMLROBIN/train/Job_158_000002.txt"
move_bk_to ="D:/FlagDetectionDatasets/ExportedDatasetsSelectedML_Backupofannotationsremoved/val"
#remove_minor_classes_from_file(file_path, class_ids_to_remove, backup=True)

# find in folder of folders, back up and edit and remove the annotations 
source_folder = r"D:/FlagDetectionDatasets/ExportedDatasetsSelectedMLROBIN/val"
class_ids_to_remove = [7, 8, 9, 10]  # Minor classes to remove
backup_and_process_annotations(source_folder, move_bk_to, class_ids_to_remove)



DOne 
