<a href="https://colab.research.google.com/github/MGJamJam/calamari_kurrent_model/blob/main/ColabNotebooks/LineSegmentationPageXML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import os
from PIL import Image, ImageDraw
import numpy as np
import xml.etree.ElementTree as ET

def parse_points(points_str):
    """Parse the 'points' attribute from PAGE XML to a list of (x, y) tuples."""
    points = []
    for point in points_str.split():
        x, y = map(int, point.split(','))
        points.append((x, y))
    return points

def draw_rectangle(image, points):
    """Draw a red rectangle around the given points."""
    draw = ImageDraw.Draw(image)
    # Draw a rectangle using the bounding box of the polygon
    x_coords = [x for x, y in points]
    y_coords = [y for x, y in points]
    draw.rectangle([min(x_coords), min(y_coords), max(x_coords), max(y_coords)], outline="red", width=3)

def crop_polygon(image, polygon):
    """Crop the polygon area from the image."""
    # Create a mask image
    mask = Image.new('L', image.size, 0)
    ImageDraw.Draw(mask).polygon(polygon, outline=1, fill=1)
    mask = np.array(mask)

    # Convert the input image to a numpy array and mask it
    image_array = np.array(image)
    new_image_array = np.empty(image_array.shape, dtype='uint8')
    new_image_array[:, :, :3] = image_array[:, :, :3]
    new_image_array[:, :, 3] = mask * 255

    # Create the final image from the masked array
    result = Image.fromarray(new_image_array, 'RGBA')
    bbox = result.getbbox()
    if bbox:
        result = result.crop(bbox)
    return result


def process_pagexml_draw_polygons(xml_path, output_dir):
    # Parse the PAGE XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # # Get the namespace (in case it is used)
    namespace = {'ns': root.tag.split('}')[0].strip('{')}

    # Find the Page element and get the image filename
    page_elem = root.find('.//ns:Page', namespaces=namespace)
    image_filename = page_elem.attrib['imageFilename']

    # Load the corresponding image
    image_dir = os.path.dirname(xml_path)  # Assume image is in the same directory as the XML
    image_path = os.path.join(image_dir, image_filename)
    image = Image.open(image_path).convert("RGBA")

    # Iterate through all TextLine elements
    for textline in root.findall('.//ns:TextLine', namespaces=namespace):
        # Get the TextLine ID
        textline_id = textline.attrib.get('id', None)

        # Log the ID for debugging
        print(f"Processing TextLine with ID: {textline_id}")

        # Get the coordinates for the TextLine
        coords = textline.find('.//ns:Coords', namespaces=namespace).attrib['points']
        points = parse_points(coords)

        # Draw a rectangle around the TextLine
        draw_rectangle(image, points)

    # Save the image with the drawn rectangles
    output_image_path = os.path.join(output_dir, f"annotated_{os.path.basename(image_path)}")
    image.save(output_image_path)
    print(f"Annotated image saved as: {output_image_path}")

def process_pagexml_crop_polygons(xml_path, output_dir):
    # Parse the PAGE XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Get the namespace (in case it is used)
    namespace = {'ns': root.tag.split('}')[0].strip('{')}

    # Find the Page element and get the image filename
    page_elem = root.find('.//ns:Page', namespaces=namespace)
    image_filename = page_elem.attrib['imageFilename']

    # Load the corresponding image
    image_dir = os.path.dirname(xml_path)  # Assume image is in the same directory as the XML
    image_path = os.path.join(image_dir, image_filename)
    image = Image.open(image_path).convert("RGBA")

    # Iterate through all TextLine elements
    for textline in root.findall('.//ns:TextLine', namespaces=namespace):
        # Get the TextLine ID
        textline_id = textline.attrib.get('id', None)

        # Log the ID for debugging
        print(f"Processing TextLine with ID: {textline_id}")

        # Get the coordinates for the TextLine
        coords = textline.find('.//ns:Coords', namespaces=namespace).attrib['points']
        points = parse_points(coords)

        # Crop the image to the polygon defined in the coords of the TextLine
        cropped_image = crop_polygon(image, points)

        # Ensure the ID is present before saving
        if textline_id is not None:
            # New files are saved as <filename_textline_id>.png
            cropped_image_name = image_filename.strip('.png') + '_' + textline_id
            output_path = os.path.join(output_dir, f"{cropped_image_name}.png")
            cropped_image.save(output_path)
            print(f"Saved: {output_path}")
        else:
            print("Warning: No ID found for TextLine. Skipping this element.")

In [24]:
# Path to the PAGE XML file
xml_path = "bayerische-gesandtschaft-paepstlicher-stuhl-180-1824.xml"
output_dir = "."

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process the PAGE XML
# draw rectangles
process_pagexml_draw_polygons(xml_path, output_dir)
# crop TextLines
process_pagexml_crop_polygons(xml_path, output_dir)

Processing TextLine with ID: l
Processing TextLine with ID: tr_1_tl_1
Processing TextLine with ID: tr_1_tl_2
Processing TextLine with ID: tr_1_tl_3
Processing TextLine with ID: tr_1_tl_4
Processing TextLine with ID: tr_1_tl_5
Processing TextLine with ID: tr_1_tl_6
Processing TextLine with ID: tr_1_tl_7
Processing TextLine with ID: tr_1_tl_8
Processing TextLine with ID: tr_1_tl_9
Processing TextLine with ID: tr_1_tl_10
Processing TextLine with ID: tr_1_tl_11
Processing TextLine with ID: tr_1_tl_12
Processing TextLine with ID: tr_1_tl_13
Processing TextLine with ID: tr_1_tl_14
Processing TextLine with ID: tr_1_tl_15
Processing TextLine with ID: tr_1_tl_16
Processing TextLine with ID: tr_1_tl_17
Processing TextLine with ID: tr_1_tl_18
Processing TextLine with ID: tr_1_tl_19
Processing TextLine with ID: tr_1_tl_20
Processing TextLine with ID: tr_1_tl_21
Processing TextLine with ID: tr_1_tl_22
Annotated image saved as: ./annotated_bayerische-gesandtschaft-paepstlicher-stuhl-180-1824.png
Pro