In [6]:
# Listing - augmentation - 
# Merge cropped files back to the 'parent' file based on annotations

#import cv2
import os
import shutil
from PIL import Image, ImageDraw
import cv2
from pathlib import Path
import re  #  regular expression 

origin_folder = r'D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR' # Parent original images
augmented_folder = r"D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYear" # Augmented images 
merged_folder = r"D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack" # Augmented images 
annotation_extension = ".txt" 
os.makedirs(merged_folder, exist_ok=True)

# Ensure merged folder exists
os.makedirs(merged_folder, exist_ok=True)

# Helper to convert JPEG to PNG
def convert_to_png(image_path):
    img = cv2.imread(image_path)
    png_path = os.path.splitext(image_path)[0] + ".png"
    cv2.imwrite(png_path, img)
    os.remove(image_path)
    return png_path

# Load and sort all augmented files
augmented_files = sorted(os.listdir(augmented_folder))

# Group augmented files by their origin base name
grouped_files = {}
for augmented_file in augmented_files:
    augmented_path = os.path.join(augmented_folder, augmented_file)
    
    # Convert JPEG to PNG if necessary
    if augmented_file.lower().endswith(".jpg"):
        augmented_path = convert_to_png(augmented_path)
        augmented_file = os.path.basename(augmented_path)

    # Extract origin filename
    match = re.match(r"(frame_\d+)", augmented_file)
    if not match:
        continue  # Skip files not matching the naming pattern
    origin_base = match.group(1)

    # Add to grouped files
    if origin_base not in grouped_files:
        grouped_files[origin_base] = []
    grouped_files[origin_base].append(augmented_path)

# Process each group of augmented files
for origin_base, augmented_paths in grouped_files.items():
    origin_file = f"{origin_base}.PNG"
    origin_path = os.path.join(origin_folder, origin_file)

    # Check if the origin file exists
    if not os.path.exists(origin_path):
        print(f"Origin file {origin_file} not found. Skipping group {origin_base}...")
        continue

    # Load origin image
    origin_img = cv2.imread(origin_path)

    # Find bounding box for class 2 (blue flag class) in annotation file
    annotation_file = os.path.join(origin_folder, f"{origin_base}.txt")
    if not os.path.exists(annotation_file):
        print(f"Annotation file {annotation_file} not found. Skipping group {origin_base}...")
        continue

    # Parse annotation file to get class 2 bounding boxes
    with open(annotation_file, "r") as file:
        annotations = file.readlines()

    blue_flag_boxes = [
        list(map(float, line.split()[1:]))
        for line in annotations if line.startswith("2")
    ]

    if not blue_flag_boxes:
        print(f"No blue flag annotations found for {origin_file}. Skipping group {origin_base}...")
        continue

    # Merge each augmented file into the origin file
    for idx, augmented_path in enumerate(augmented_paths):
        augmented_img = cv2.imread(augmented_path)

        for bbox_idx, box in enumerate(blue_flag_boxes):
            x_center, y_center, box_width, box_height = box
            x1 = int((x_center - box_width / 2) * origin_img.shape[1])
            y1 = int((y_center - box_height / 2) * origin_img.shape[0])
            x2 = int((x_center + box_width / 2) * origin_img.shape[1])
            y2 = int((y_center + box_height / 2) * origin_img.shape[0])

            # Crop augmented image to fit the bounding box
            aug_resized = cv2.resize(augmented_img, (x2 - x1, y2 - y1))
            origin_img[y1:y2, x1:x2] = aug_resized

        # Determine the merged filename
        if idx == 0:
            merged_filename = f"{origin_base}.PNG"
        else:
            merged_filename = f"{origin_base}_{idx}.PNG"

        # Save the merged file
        merged_path = os.path.join(merged_folder, merged_filename)
        cv2.imwrite(merged_path, origin_img)

        # Copy and rename annotation file
        new_annotation_path = os.path.join(merged_folder, f"{os.path.splitext(merged_filename)[0]}.txt")
        shutil.copy(annotation_file, new_annotation_path)

        print(f"Merged and saved: {merged_path}")
        print(f"Copied annotation to: {new_annotation_path}")


Merged and saved: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000.PNG
Copied annotation to: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000.txt
Merged and saved: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000_1.PNG
Copied annotation to: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000_1.txt
Merged and saved: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000_2.PNG
Copied annotation to: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000_2.txt
Merged and saved: D:\FlagDetectionDatasets\ExportedDatasetsReduced\Augmentation\FlagsWithYearOCR\OCRNewYearMergedBack\frame_000000_3.PNG
Copied annotation to: D:\FlagDete