In [None]:
import xml.etree.ElementTree as ET
from PIL import Image
import os

def find_folders_with_keyword(base_dir, keyword):
    """Find folders within base_dir that contain the keyword in their name."""
    return [os.path.join(base_dir, d) for d in os.listdir(base_dir) if keyword in d and os.path.isdir(os.path.join(base_dir, d))]

def parse_annotation(xml_file):
    """Parse the XML file to get bounding boxes."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []
    for member in root.findall('.//object'):
        xmin = int(member.find('.//bndbox/xmin').text)
        ymin = int(member.find('.//bndbox/ymin').text)
        xmax = int(member.find('.//bndbox/xmax').text)
        ymax = int(member.find('.//bndbox/ymax').text)
        boxes.append((xmin, ymin, xmax, ymax))
    return boxes

def crop_and_save(image_path, boxes, save_dir, image_name):
    """Crop the image based on bounding boxes and save them."""
    image = Image.open(image_path)
    for i, (xmin, ymin, xmax, ymax) in enumerate(boxes, start=1):
        cropped_image = image.crop((xmin, ymin, xmax, ymax))
        save_path = os.path.join(save_dir, f"{image_name}_object{i}.jpg")
        cropped_image.save(save_path)
        print(f"Saved: {save_path}")

# Directories
raw_images_base_dir = 'raw-images'
annotations_base_dir = 'annotations'
cropped_images_dir = 'cropped-images-visual'

# Create the directory for cropped images if it doesn't exist
os.makedirs(cropped_images_dir, exist_ok=True)

# Process all images with associated annotations
for raw_folder in find_folders_with_keyword(raw_images_base_dir, 'rgb'):
    folder_name = os.path.basename(raw_folder)
    annotations_folder = os.path.join(annotations_base_dir, folder_name)
    for image_name in os.listdir(raw_folder):
        base_name, ext = os.path.splitext(image_name)
        if ext.lower() not in ['.jpg', '.png']:
            continue
        image_path = os.path.join(raw_folder, image_name)
        annotation_path = os.path.join(annotations_folder, base_name + '.xml')
        if os.path.exists(annotation_path):
            boxes = parse_annotation(annotation_path)
            crop_and_save(image_path, boxes, cropped_images_dir, base_name)
        else:
            print(f"Annotation file does not exist for {image_name}, skipping...")
