### Polygon json Visaulaisation

In [2]:

#!/usr/bin/env python3
"""
Polygon Visualiser
Author: Your Name
Description:
    Reads a polygon JSON file and an image.
    Draws bounding boxes for each cell based on polygon points.
    Annotates each box with its row and column IDs.
"""

import os
import json
import cv2
import numpy as np


class PolygonVisualizer:
    """
    Class to handle polygon parsing and visualisation.
    """

    def __init__(self, polygon_path: str, image_path: str):
        self.polygon_path = polygon_path
        self.image_path = image_path
        self.cells = []

    def load_polygon_data(self):
        """
        Load polygon data from JSON file.
        Each cell includes id, row, col, and points.
        """
        if not os.path.exists(self.polygon_path):
            raise FileNotFoundError(f"Polygon file not found: {self.polygon_path}")

        with open(self.polygon_path, "r", encoding="utf-8") as file:
            try:
                data = json.load(file)
                if not isinstance(data, list):
                    raise ValueError("Polygon JSON must be a list of objects.")
                self.cells = data
            except json.JSONDecodeError as e:
                raise ValueError(f"Invalid JSON format: {e}")

    def draw_visualisation(self):
        """
        Draw bounding boxes and annotations on the image.
        """
        if not os.path.exists(self.image_path):
            raise FileNotFoundError(f"Image file not found: {self.image_path}")

        image = cv2.imread(self.image_path)
        if image is None:
            raise ValueError("Failed to load image. Check file format and path.")

        for cell in self.cells:
            points = cell.get("points", [])
            if len(points) >= 4:
                # Convert points to NumPy array for OpenCV
                pts = np.array(points, dtype=np.int32).reshape((-1, 1, 2))

                # Draw polygon
                cv2.polylines(image, [pts], True, (0, 255, 0), 2)

                # Compute text position (top-left corner)
                x, y = points[0]
                label = f"r:{cell.get('row')} c:{cell.get('col')}"
                cv2.putText(image, label, (x + 10, y + 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2)

        return image


def main():
    """
    Main function to demonstrate polygon visualisation.
    Example usage:
        python polygon_visualiser.py
    """
    print("Starting polygon visualisation demo...\n")

    # Example files (replace with your paths)
    polygon_file = "../data/labels/polygons/NL-HaNA_2.10.50_45_0110.jpg.polygons.json"  # JSON file with polygon data
    image_file = "../data/images/NL-HaNA_2.10.50_45_0110.jpg"  # Image file to draw on

    try:
        visualizer = PolygonVisualizer(polygon_file, image_file)
        visualizer.load_polygon_data()
        result_image = visualizer.draw_visualisation()

        # # Display the image
        # cv2.imshow("Polygon Visualisation", result_image)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()

        # Optionally save the output
        output_path = "polygon_visualisation_output.jpg"
        cv2.imwrite(output_path, result_image)
        print(f"✅ Visualisation saved to {output_path}")

    except Exception as e:
        error_type = type(e).__name__
        print(f"❌ [ERROR] Failed to process polygon visualisation.")
        print(f"   → Error Type: {error_type}")
        print(f"   → Details: {e}")
        print("   → Suggested Fix: Check file paths and JSON structure.\n")


if __name__ == "__main__":
    main()


Starting polygon visualisation demo...

✅ Visualisation saved to polygon_visualisation_output.jpg


### PageXML Visualisation

In [5]:

#!/usr/bin/env python3
"""
PageXML Visualiser
Author: Your Name
Description:
    Reads a PageXML file and its associated image.
    Draws bounding boxes for each TableCell based on Coords points.
    Annotates each box with row, col, rowSpan, and colSpan.
"""

import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET


class PageXMLVisualizer:
    """
    Class to handle PageXML parsing and visualisation.
    """

    def __init__(self, pagexml_path: str, image_path: str):
        self.pagexml_path = pagexml_path
        self.image_path = image_path
        self.cells = []

    def parse_pagexml(self):
        """
        Parse PageXML and extract TableCell information.
        Each cell includes coords, row, col, rowSpan, colSpan.
        """
        if not os.path.exists(self.pagexml_path):
            raise FileNotFoundError(f"PageXML file not found: {self.pagexml_path}")

        tree = ET.parse(self.pagexml_path)
        root = tree.getroot()

        # Namespace handling
        ns = {"pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15"}

        for cell in root.findall(".//pc:TableCell", ns):
            coords_elem = cell.find("pc:Coords", ns)
            coords_points = coords_elem.get("points") if coords_elem is not None else ""
            row = cell.get("row", "")
            col = cell.get("col", "")
            row_span = cell.get("rowSpan", "")
            col_span = cell.get("colSpan", "")

            # Convert coords to list of tuples
            points = []
            if coords_points.strip():
                for pt in coords_points.split():
                    x, y = pt.split(",")
                    points.append((int(float(x)), int(float(y))))

            self.cells.append({
                "points": points,
                "row": row,
                "col": col,
                "rowSpan": row_span,
                "colSpan": col_span
            })

    def draw_visualisation(self):
        """
        Draw bounding boxes and annotations on the image.
        """
        if not os.path.exists(self.image_path):
            raise FileNotFoundError(f"Image file not found: {self.image_path}")

        image = cv2.imread(self.image_path)
        if image is None:
            raise ValueError("Failed to load image. Check file format and path.")

        for cell in self.cells:
            points = cell["points"]
            if len(points) >= 4:
                # Convert points to NumPy array for OpenCV
                pts = np.array(points, dtype=np.int32).reshape((-1, 1, 2))

                # Draw polygon
                cv2.polylines(image, [pts], True, (0, 255, 0), 2)

                # Compute text position (top-left corner)
                x, y = points[0]
                label = f"r:{cell['row']} c:{cell['col']} rs:{cell['rowSpan']} cs:{cell['colSpan']}"
                cv2.putText(image, label, (x + 5, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

        return image

In [6]:
image_filename = "../data/images/NL-HaNA_2.10.50_45_0110.jpg"
pagexml_file = "../data/tables/pagexml/NL-HaNA_2.10.50_45_0110.jpg.xml"

visualizer = PageXMLVisualizer(pagexml_file, image_filename)
visualizer.parse_pagexml()
result_image = visualizer.draw_visualisation()
# Display the image
# cv2.imshow("PageXML Visualisation", result_image)
# cv2.waitKey()
# cv2.destroyAllWindows()

# save the  visualisation
output_path = "polygon_visualisation_output.jpg"
cv2.imwrite(output_path, result_image)
print(f"✅ Visualisation saved to {output_path}")

✅ Visualisation saved to polygon_visualisation_output.jpg
