In [1]:
# Set project root
import os
import sys

# Manually set the path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import json
from pathlib import Path
from typing import Callable

import matplotlib.pyplot as plt
import numpy as np

from src.geometry import (
    compute_distance_from_homography,
    derive_metric_homography,
    recover_pose_from_homography_v2,
)
from src.matching import template_match
from src.measurement_data import Template, load_measurements_from_yaml
from src.utils import load_calibration_json

In [3]:
class WebVisualizer:
    """
    A lightweight visualizer for 3D poses of planar templates and camera position,
    generating an HTML file with Three.js for interactive viewing.
    """

    def __init__(self) -> None:
        """
        Initialize the web visualizer.

        Args:
            template_size (tuple): (width, height) of each planar template in world units.
        """

        self.meshes = []       # List of mesh dicts for templates
        self.lines = []        # List of line-set dicts for camera frustum & axes

    def create_template_quad(
        self,
        R: np.ndarray,
        t: np.ndarray,
        width: float,
        height: float,
        color: tuple[float, float, float] = (0.7, 0.7, 0.9),
        texture_path: str | None = None
    ) -> dict:
        """
        Create a mesh dict representing a template quad at pose (R, t).

        Args:
            R (np.ndarray): 3x3 rotation matrix (camera to template).
            t (np.ndarray): 3-element translation vector.
            width (float): Width of the template in world units.
            height (float): Height of the template in world units.
            color (tuple): RGB tuple for face color.
            texture_path (str): Optional path to texture image.
            
        Returns:
            Dict with 'vertices', 'triangles', 'color', and optional 'texture'.
        """
        # Local quad vertices centered at origin
        local = np.array([
            [-width / 2, -height / 2, 0],
            [ width / 2, -height / 2, 0],
            [ width / 2,  height / 2, 0],
            [-width / 2,  height / 2, 0]
        ])  # shape (4,3)

        # Transform to world coords
        verts = (R @ local.T).T + np.asarray(t).reshape(1, 3)

        mesh = {
            'name': 'template',
            'vertices': verts.tolist(),
            'triangles': [[0,1,2], [0,2,3]],
            'color': list(color)
        }
        if texture_path:
            mesh['texture'] = texture_path

        return mesh

    def create_camera_frustum(self, focal_length: float = 1.0, image_size: tuple = (640, 480), scale: float = 0.3, color: tuple = (1.0, 0.0, 0.0)):
        """
        Create a line-set dict representing the camera frustum.

        Args:
            focal_length (float): focal length in pixels.
            image_size (tuple): (width, height) in pixels.
            scale (float): overall scale for near/far planes.
            color (tuple): RGB tuple for line color.

        Returns:
            Dict with 'points', 'lines', 'color'.
        """
        w, h = image_size

        # Define near and far planes based on focal length and scale
        near, far = 0.1 * scale, 2.0 * scale
        near_x = near * (w / 2) / focal_length
        near_y = near * (h / 2) / focal_length
        far_x  = far * (w / 2) / focal_length
        far_y  = far * (h / 2) / focal_length

        # Define frustum points and connecting lines
        points = [
            [0, 0, 0],
            [-near_x, -near_y, -near], [near_x, -near_y, -near],
            [near_x, near_y, -near], [-near_x, near_y, -near],
            [-far_x, -far_y, -far], [far_x, -far_y, -far],
            [far_x, far_y, -far], [ -far_x, far_y, -far]
        ]

        lines = [
            [0, 1], [0, 2], [0, 3], [0, 4],
            [1, 2], [2, 3], [3, 4], [4, 1],
            [1, 5], [2, 6], [3, 7], [4, 8],
            [5, 6], [6, 7], [7, 8], [8, 5]
        ]

        return {'name':'camera_frustum', 'points': points, 'lines': lines, 'color': list(color)}
    
    def add_scene_results(
        self,
        scene_results: dict,
        K: np.ndarray,
        recover_pose_fn: Callable[[np.ndarray, np.ndarray], tuple[np.ndarray,np.ndarray]],
        template_metadata: dict[str, Template]
    ) -> None:
        """
        Turn your analyze_scene output into meshes.

        Args:
            scene_results (dict): results of scene analysis.
            K (np.ndarray): camera intrinsics matrix
            recover_pose_fn (callable): function H, K â†’ (R, t)
            template_metadata (Template): Template instance containing width, height, and texture paths.
        """
        # Create a mesh for each template found in the scene
        for templ_id, info in scene_results.items():
            H = np.array(info['homography'])

            # Recover the camera pose from the homography
            R, t = recover_pose_fn(H, K)

            # TODO change of coordinate system?

            # Get the template metadata
            template = template_metadata[templ_id]
            width = template.width
            height = template.height
            texture_path = template.path

            # Create the mesh for this template
            mesh = self.create_template_quad(R, t, width, height, texture_path=texture_path)
            self.meshes.append(mesh)

    def visualize(self, camera_params: dict = None, filename: str = 'template_visualization.html'):
        """
        Generate the HTML file with Three.js embedding of meshes and lines.

        Args:
            camera_params (dict): dict with 'focal_length', 'image_size', 'scale'.
            filename (str): output HTML file path. Defaults to 'template_visualization.html'.
        """
        if camera_params is None:
            camera_params = {'focal_length': 500.0, 'image_size': (640, 480), 'scale': 0.5}
        frustum = self.create_camera_frustum(**camera_params)
        self.lines.append(frustum)

        # Prepare JSON data
        html = self._generate_html(self.meshes, self.lines)
        Path(filename).write_text(html)
        print(f"HTML visualization saved to {filename}")

    def _generate_html(self, meshes: list[dict], lines: list[dict]) -> str:
        """
        Inject meshes and lines JSON into the HTML template.

        Args:
            meshes: list of mesh dicts.
            lines: list of line-set dicts.

        Returns:
            Complete HTML content as a string.
        """
        tpl_path = Path(os.path.join(project_root, 'assets', 'visualization_template.html.tpl'))
        tpl = tpl_path.read_text()
        return tpl.replace('{ meshes_json }', json.dumps(meshes)) \
                  .replace('{ lines_json }', json.dumps(lines))

## **Analysis**

In [4]:
# Load measurement data
data = load_measurements_from_yaml("../assets/measurements.yaml")

# Load camera calibration
K, dist_coeffs, image_size = load_calibration_json("../assets/camera_calibration.json")

scenes = data.get_all_scenes()

In [5]:
def analyze_scene(scene_id, scenes, K):
    """
    This function analyzes a scene by executing the following steps:

    1. Load the scene and the corresponding templates.
    2. For each template, perform template matching against the scene image.
    3. Compute the homography and recover the camera pose.
    4. Compute the distance from the center of each template to the camera.
    5. Return the results including the template ID, homography, camera pose, and distance.
    """
    # Load scene and templates
    scene = data.get_scene(scene_id)
    templates = [data.get_template(t_id) for t_id in scenes[scene_id]]

    # Iterate over templates
    results = {}
    for template in templates:
        scene_path = os.path.join(project_root, scene.path)
        template_path = os.path.join(project_root, template.path)

        # Compute pixel-pixel homography
        H, mask, t_shape, reprojection_error = template_match(template_path, scene_path, extract_method='SIFT', match_method='BF', plot=False)

        template_size_px = plt.imread(template_path).shape[:2]

        # Derive the metric-pixel homography
        H_metric = derive_metric_homography(
            H_px=H,
            template_size_px=template_size_px,
            template_size_metric=(template.height, template.width)
        )

        # Template center point
        template_center_mm = np.array([template.width / 2, template.height / 2])

        # Compute the distance from the camera to the template center
        distance_pred = compute_distance_from_homography(
            H_mm2img=H_metric,
            K=K,
            point_mm=template_center_mm
        )

        # True distance and error
        distance_true = scene.get_distance('Camera', template.id).distance
        error = np.abs(distance_pred - distance_true)
        error_percent = (error / distance_true) * 100

        results[template.id] = {
            'homography': H_metric.tolist(),
            'distance_pred': distance_pred,
            'distance_true': distance_true,
            'error': error,
            'error_percent': error_percent,
        }

    return results

In [6]:
# Analyze scene
scene_id = 'S12'
results = analyze_scene(scene_id, scenes, K)

In [7]:
# Create the visualizer
visualizer = WebVisualizer()

# Load template metadata
metadata = {t_id: data.get_template(t_id) for t_id in scenes[scene_id]}

# Add results to the visualizer
# TODO check recover pose function!
visualizer.add_scene_results(
    scene_results=results,
    K=K,
    recover_pose_fn=recover_pose_from_homography_v2,
    template_metadata=metadata
)

# Visualize the results
visualizer.visualize(
    camera_params={'focal_length': K[0, 0], 'image_size': image_size, 'scale': 0.5},
    filename='template_visualization.html'
)

HTML visualization saved to template_visualization.html
