#### Augment Images with Rare Classes

In [12]:
import os
import glob
import cv2
import shutil
import yaml
import numpy as np
import pandas as pd
from tqdm import tqdm
import albumentations as A
from albumentations import ToTensorV2

# Config
project_dir = "YOLO11_model"
dataset_name = "datasetv3"
dataset_yaml = dataset_name + ".yaml"
images_dir = os.path.join(project_dir, dataset_name, "train", "images")
labels_dir = images_dir.replace("images", "labels")
out_images_dir = images_dir + "_aug"
out_labels_dir = labels_dir + "_aug"

if os.path.exists(out_images_dir):
    shutil.rmtree(out_images_dir)
    print(f"Emptying directory: {out_images_dir}")
if os.path.exists(out_labels_dir):
    shutil.rmtree(out_labels_dir)
    print(f"Emptying directory: {out_labels_dir}")
    
os.makedirs(out_images_dir)
os.makedirs(out_labels_dir)

AUGS_PER_IMAGE = 3       # how many augmented versions to create
IMBALANCE_THRESHOLD = 200  # classes below this count are "rare"

# Load Class Names
with open(os.path.join(project_dir, dataset_yaml), "r") as f:
    data_cfg = yaml.safe_load(f)

class_names = data_cfg["names"]

# Copy Original Images and Labels
print("Copying originals...")
for img_path in glob.glob(os.path.join(images_dir, "*.jpg")):
    lbl_path = img_path.replace("images", "labels").replace(".jpg", ".txt")

    shutil.copy(img_path, out_images_dir)

    if os.path.exists(lbl_path):
        shutil.copy(lbl_path, out_labels_dir)

print("Original images and labels copied.")

# Count Class Frequencies
label_files = glob.glob(os.path.join(labels_dir, "*.txt"))

class_counts = {}

for lf in label_files:
    with open(lf, "r") as f:
        for line in f:
            cls = int(line.split()[0])
            class_counts[cls] = class_counts.get(cls, 0) + 1

class_counts_names = {class_names[key]: val for key, val in class_counts.items()}
print("Class counts:", class_counts_names)

# Identify rare classes
rare_classes = {cls for cls, c in class_counts.items() if c < IMBALANCE_THRESHOLD}
rare_classes_names = [class_names[class_id] for class_id in rare_classes]
print("Rare classes:", rare_classes_names)

# Define Augmentation Pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),

    A.Affine(
        translate_percent={"x": (-0.03, 0.03), "y": (-0.03, 0.03)},
        scale=(0.85, 1.15),
        rotate=(-5, 5),
        shear=(-4, 4),
        p=0.7
    ),

    A.RandomBrightnessContrast(p=0.5),
    A.HueSaturationValue(p=0.5),

    A.GaussNoise(p=0.15),
    A.MotionBlur(p=0.1),
], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))

# Process Images with Rare Classes
for label_path in tqdm(label_files):
    
    # Read labels
    with open(label_path, "r") as f:
        rows = [l.strip().split() for l in f.readlines()]

    # Skip if empty or no rare classes
    classes_in_image = {int(r[0]) for r in rows}
    if not (classes_in_image & rare_classes):
        continue

    # Load image
    img_name = os.path.basename(label_path).replace(".txt", ".jpg")
    img_path = os.path.join(images_dir, img_name)
    if not os.path.exists(img_path):
        img_name = os.path.basename(label_path).replace(".txt", ".png")
        img_path = os.path.join(images_dir, img_name)
    img = cv2.imread(img_path)
    if img is None:
        continue

    # Prepare bboxes + labels
    bboxes = []
    class_labels = []
    for r in rows:
        cls, x, y, w, h = r
        x, y, w, h = float(x), float(y), float(w), float(h)

        # Clip malformed boxes
        ymin = max(0, y - h/2)
        ymax = min(1, y + h/2)
    
        yc = (ymin + ymax) / 2
        h  = ymax - ymin
    
        xmin = max(0, x - w/2)
        xmax = min(1, x + w/2)
    
        xc = (xmin + xmax) / 2
        w  = xmax - xmin
    
        bboxes.append([xc, yc, w, h])
        class_labels.append(int(cls))
    
    # Generate augmented samples
    for i in range(AUGS_PER_IMAGE):
        augmented = transform(image=img, bboxes=bboxes, class_labels=class_labels)
        
        aug_img = augmented["image"]
        aug_boxes = augmented["bboxes"]
        aug_labels = augmented["class_labels"]

        # Save augmented image
        base = os.path.splitext(os.path.basename(img_path))[0]
        aug_name = f"{base}_aug{i}.jpg"
        cv2.imwrite(os.path.join(out_images_dir, aug_name), aug_img)

        # Save augmented labels
        with open(os.path.join(out_labels_dir, aug_name.replace(".jpg", ".txt")), "w") as f:
            for cls, bbox in zip(aug_labels, aug_boxes):
                x, y, w, h = bbox
                f.write(f"{int(cls)} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

print(f"Augmented images saved to {out_images_dir}")
print(f"Augmented labels saved to {out_labels_dir}")

Emptying directory: YOLO11_model\datasetv3\train\images_aug
Emptying directory: YOLO11_model\datasetv3\train\labels_aug
Copying originals...
Original images and labels copied.
Class counts: {'Eriocaulon Septangulare': 784, 'P. Amplifolius': 5728, 'M. Spicatum': 174, 'P. Praelongus': 675, 'Algae': 1189, 'Unknown Plant': 307, 'Vallisneria Americana': 157, 'P. Robbinsii': 1507, 'P. Perfoliatus': 255, 'Snail': 50, 'Leaves': 29, 'Clam Shell': 960, 'P. Zosteriformis': 367, 'Najas': 96, 'M. Tenellum': 24, 'Elodea Canadensis': 197, 'P. Gramineus': 154, 'I. Lacustris': 1}
Rare classes: ['Elodea Canadensis', 'I. Lacustris', 'Leaves', 'M. Spicatum', 'M. Tenellum', 'Najas', 'P. Gramineus', 'Snail', 'Vallisneria Americana']


100%|██████████████████████████████████████████████████████████████████████████████| 1840/1840 [00:24<00:00, 74.15it/s]

Augmented images saved to YOLO11_model\datasetv3\train\images_aug
Augmented labels saved to YOLO11_model\datasetv3\train\labels_aug



