# Step 4: Assessment (Granular) with Bounding Boxes

This notebook demonstrates the **granular assessment** approach with **bounding box extraction** for evaluating extraction confidence using AWS Bedrock.

**Key Features:**
- Multiple focused inferences instead of single large inference
- **Spatial localization with bounding boxes**
- **Visual annotation of extracted fields**
- Prompt caching for cost optimization
- Parallel processing for reduced latency
- Better handling of complex documents with many attributes

**Inputs:**
- Document object with extraction results from Step 3
- Assessment configuration with bounding boxes enabled
- Document classes with confidence thresholds

**Outputs:**
- Document with enhanced assessment results including geometry data
- Detailed confidence scores and reasoning for each attribute
- **Bounding box coordinates for spatial localization**
- **Visual annotation of document pages with extracted fields**
- Performance metrics and JSON structure examples

## 0. Package Installation

First, let's ensure we have the latest version of the IDP common package with bounding box support:

In [None]:
ROOTDIR = "../.."

# Let's make sure that modules are autoreloaded
%load_ext autoreload
%autoreload 2

# First uninstall existing package (to ensure we get the latest version)
%pip uninstall -y idp_common

# Install the IDP common package with all components in development mode
%pip install -q -e "{ROOTDIR}/lib/idp_common_pkg[dev, all]"

# Check installed version
%pip show idp_common | grep -E "Version|Location"


## 1. Load Libraries and Data

In [None]:
import os
import json
import time
import logging
import boto3
import yaml
from pathlib import Path

# Import IDP libraries
from idp_common.models import Document, Status
from idp_common import assessment

# Import visualization libraries
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import io
import numpy as np

# Configure logging
logging.basicConfig(level=logging.WARNING)
logging.getLogger("idp_common.assessment.service").setLevel(logging.INFO)
logging.getLogger("idp_common.bedrock.client").setLevel(logging.INFO)

print("Libraries imported successfully")
print("Assessment with bounding box support logging enabled")


In [None]:
# Load document from previous step
extraction_data_dir = Path(".data/step3_extraction")

# Load document object from JSON
document_path = extraction_data_dir / "document.json"
with open(document_path, "r") as f:
    document = Document.from_json(f.read())

# Load configuration - use bounding box enabled config
config_dir = Path("config")
CONFIG = {}

config_files = ["assessment_with_bounding_boxes.yaml", "classes.yaml"]

for config_file in config_files:
    config_path = config_dir / config_file
    if config_path.exists():
        with open(config_path, "r") as f:
            file_config = yaml.safe_load(f)
            CONFIG.update(file_config)
        print(f"Loaded {config_file}")
    else:
        print(f"Warning: {config_file} not found")

# Load environment info
env_path = extraction_data_dir / "environment.json"
with open(env_path, "r") as f:
    env_info = json.load(f)

# Set environment variables
os.environ["AWS_REGION"] = env_info["region"]
os.environ["METRIC_NAMESPACE"] = "IDP-Modular-Pipeline"

print(f"Document loaded: {document.id}")
print(f"Document status: {document.status.value}")
print(f"Number of sections: {len(document.sections) if document.sections else 0}")
print(f"Configuration sections: {list(CONFIG.keys())}")


## 2. Configure Assessment Service with Bounding Boxes

In [None]:
# Create assessment service
assessment_service = assessment.AssessmentService(config=CONFIG)

print(f"Assessment service initialized: {type(assessment_service).__name__}")

# Check bounding box configuration
assessment_config = CONFIG.get("assessment", {})
bbox_config = assessment_config.get("bounding_boxes", {})
bbox_enabled = bbox_config.get("enabled", False)

print(f"\n=== Bounding Box Configuration ===")
print(f"Bounding boxes enabled in config: {'✅ YES' if bbox_enabled else '❌ NO'}")
print(f"Model: {assessment_config.get('model')}")
print(f"Temperature: {assessment_config.get('temperature')}")
print(
    f"Default Confidence Threshold: {assessment_config.get('default_confidence_threshold')}"
)


## 3. Helper Functions

In [None]:
def parse_s3_uri(uri):
    parts = uri.replace("s3://", "").split("/")
    bucket = parts[0]
    key = "/".join(parts[1:])
    return bucket, key


def load_json_from_s3(uri):
    s3_client = boto3.client("s3")
    bucket, key = parse_s3_uri(uri)
    response = s3_client.get_object(Bucket=bucket, Key=key)
    content = response["Body"].read().decode("utf-8")
    return json.loads(content)


def load_image_from_s3(uri):
    s3_client = boto3.client("s3")
    bucket, key = parse_s3_uri(uri)
    response = s3_client.get_object(Bucket=bucket, Key=key)
    image_data = response["Body"].read()
    return Image.open(io.BytesIO(image_data))


print("Helper functions defined")


## 4. Run Assessment with Bounding Boxes

In [None]:
print("Running assessment with bounding box extraction...")

if not document.sections:
    print("No sections found in document")
else:
    assessment_results = []
    sections_with_extractions = [
        s
        for s in document.sections
        if hasattr(s, "extraction_result_uri") and s.extraction_result_uri
    ]
    n = min(1, len(sections_with_extractions))  # Process 1 section for demo

    print(f"Found {len(sections_with_extractions)} sections with extraction results")
    print(f"Processing {n} section with bounding box assessment...")

    for i, section in enumerate(sections_with_extractions[:n]):
        print(
            f"\n--- Section {i + 1}: {section.section_id} ({section.classification}) ---"
        )

        # Run assessment
        start_time = time.time()
        document = assessment_service.process_document_section(
            document=document, section_id=section.section_id
        )
        processing_time = time.time() - start_time

        print(f"✅ Assessment completed in {processing_time:.2f} seconds")

        assessment_results.append(
            {
                "section_id": section.section_id,
                "classification": section.classification,
                "processing_time": processing_time,
                "extraction_result_uri": section.extraction_result_uri,
            }
        )

    print(f"\n🎉 Assessment complete for {n} section(s)")


## 5. Display Raw JSON Output with Geometry

In [None]:
print("\n=== Raw JSON Assessment Output with Bounding Boxes ===")

if document.sections:
    sections_with_extractions = [
        s
        for s in document.sections
        if hasattr(s, "extraction_result_uri") and s.extraction_result_uri
    ]

    if sections_with_extractions:
        section = sections_with_extractions[0]
        print(f"Section: {section.section_id} ({section.classification})")

        try:
            # Load assessment results
            extraction_data = load_json_from_s3(section.extraction_result_uri)
            explainability_info = extraction_data.get("explainability_info", [])

            if explainability_info:
                print("\n🔍 explainability_info JSON structure:")
                assessment_data = (
                    explainability_info[0]
                    if isinstance(explainability_info, list)
                    else explainability_info
                )

                # Show sample of the structure
                sample_data = {}
                geometry_count = 0

                for i, (attr_name, attr_data) in enumerate(assessment_data.items()):
                    sample_data[attr_name] = attr_data
                    if isinstance(attr_data, dict) and "geometry" in attr_data:
                        geometry_count += 1
                    if i >= 40:  # Show first 40 attributes
                        if len(assessment_data) > 3:
                            sample_data["..."] = (
                                f"and {len(assessment_data) - 3} more attributes"
                            )
                        break

                print(json.dumps(sample_data, indent=2))

                print(f"\n📊 Summary:")
                print(f"  • Total attributes: {len(assessment_data)}")
                print(f"  • Attributes with geometry: {geometry_count}")

                # Show geometry example
                for attr_name, attr_data in assessment_data.items():
                    if isinstance(attr_data, dict) and "geometry" in attr_data:
                        print(f"\n🎯 Geometry Example ({attr_name}):")
                        geom_data = {
                            "confidence": attr_data.get("confidence"),
                            "confidence_threshold": attr_data.get(
                                "confidence_threshold"
                            ),
                            "geometry": attr_data.get("geometry"),
                        }
                        print(json.dumps(geom_data, indent=2))

                        # Explain coordinates
                        if geom_data["geometry"]:
                            bbox = geom_data["geometry"][0]["boundingBox"]
                            page = geom_data["geometry"][0]["page"]
                            print(f"\n📐 Coordinate meaning:")
                            print(
                                f"  • Page {page}: top={bbox['top'] * 100:.1f}%, left={bbox['left'] * 100:.1f}%"
                            )
                            print(
                                f"  • Size: {bbox['width'] * 100:.1f}% wide, {bbox['height'] * 100:.1f}% tall"
                            )
                        break
            else:
                print("No explainability_info found")
        except Exception as e:
            print(f"Error loading results: {e}")


## 6. Visualize Bounding Boxes on Document

In [None]:
def draw_bounding_boxes(
    image, bbox_data_list, page_num, title="Document with Bounding Boxes"
):
    """Draw bounding boxes on document image with color-coded confidence"""
    fig, ax = plt.subplots(1, 1, figsize=(12, 16))
    ax.imshow(image)
    ax.set_title(f"{title} - Page {page_num}", fontsize=14, fontweight="bold")
    ax.axis("off")

    img_width, img_height = image.size
    colors_used = set()

    for bbox_info in bbox_data_list:
        attr_name = bbox_info["attr_name"]
        confidence = bbox_info["confidence"]
        threshold = bbox_info["threshold"]
        bbox = bbox_info["geometry"]["boundingBox"]

        # Convert normalized to pixel coordinates
        left = bbox["left"] * img_width
        top = bbox["top"] * img_height
        width = bbox["width"] * img_width
        height = bbox["height"] * img_height

        # Color based on confidence
        if confidence >= threshold:
            color = "green"
        elif confidence >= 0.7:
            color = "orange"
        else:
            color = "red"
        colors_used.add(color)

        # Draw rectangle
        rect = patches.Rectangle(
            (left, top),
            width,
            height,
            linewidth=2,
            edgecolor=color,
            facecolor="none",
            alpha=0.8,
        )
        ax.add_patch(rect)

        # Add label
        label = f"{attr_name} ({confidence:.2f})"
        ax.text(
            left,
            max(0, top - 10),
            label,
            fontsize=9,
            color=color,
            fontweight="bold",
            bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8),
        )

    # Add legend
    if colors_used:
        legend_elements = []
        if "green" in colors_used:
            legend_elements.append(
                patches.Patch(color="green", label="High Confidence")
            )
        if "orange" in colors_used:
            legend_elements.append(
                patches.Patch(color="orange", label="Medium Confidence")
            )
        if "red" in colors_used:
            legend_elements.append(patches.Patch(color="red", label="Low Confidence"))
        ax.legend(handles=legend_elements, loc="upper right")

    plt.tight_layout()
    plt.show()
    return len(bbox_data_list)


print("Bounding box visualization function defined")


In [None]:
# Create visualizations
print("\n=== Creating Bounding Box Visualizations ===")

if document.sections:
    sections_with_extractions = [
        s
        for s in document.sections
        if hasattr(s, "extraction_result_uri") and s.extraction_result_uri
    ]

    if sections_with_extractions:
        section = sections_with_extractions[0]

        try:
            # Load assessment results
            extraction_data = load_json_from_s3(section.extraction_result_uri)
            explainability_info = extraction_data.get("explainability_info", [])

            if explainability_info:
                assessment_data = explainability_info[0]

                # Collect geometry data by page
                geometry_by_page = {}
                for attr_name, attr_data in assessment_data.items():
                    if isinstance(attr_data, dict) and "geometry" in attr_data:
                        for geom in attr_data["geometry"]:
                            page = geom["page"]
                            if page not in geometry_by_page:
                                geometry_by_page[page] = []
                            geometry_by_page[page].append(
                                {
                                    "attr_name": attr_name,
                                    "confidence": attr_data.get("confidence", 0),
                                    "threshold": attr_data.get(
                                        "confidence_threshold", 0.9
                                    ),
                                    "geometry": geom,
                                }
                            )

                # Create visualizations for each page
                for page_num, bbox_list in geometry_by_page.items():
                    print(
                        f"\n--- Visualizing Page {page_num} ({len(bbox_list)} bounding boxes) ---"
                    )

                    page_id = str(page_num)
                    if page_id in document.pages:
                        page = document.pages[page_id]

                        try:
                            image = load_image_from_s3(page.image_uri)
                            print(
                                f"Image loaded: {image.size[0]}x{image.size[1]} pixels"
                            )

                            bbox_count = draw_bounding_boxes(
                                image, bbox_list, page_num, section.classification
                            )
                            print(f"✅ Visualized {bbox_count} bounding boxes")

                        except Exception as e:
                            print(f"❌ Visualization error: {e}")
                    else:
                        print(f"⚠️  Page {page_num} not found")

                if not geometry_by_page:
                    print("📍 No geometry data found for visualization")
                    print("This might indicate:")
                    print("  - Bounding boxes disabled in config")
                    print("  - LLM did not return bbox data")

            else:
                print("No explainability_info available")
        except Exception as e:
            print(f"Error processing section: {e}")


## 7. Summary

In [None]:
print("=== Assessment with Bounding Boxes Complete ===")
print(f"✅ Document: {document.id}")
print(
    f"✅ Sections processed: {len(assessment_results) if 'assessment_results' in locals() else 0}"
)
print(f"✅ Bounding boxes enabled: {bbox_config.get('enabled', False)}")
print(f"✅ Model used: {assessment_config.get('model')}")

print("\n📋 Features Demonstrated:")
print("  • Enhanced confidence assessment with spatial localization")
print("  • Raw JSON output showing explainability_info with geometry")
print("  • Visual bounding box annotation on document pages")
print("  • Color-coded confidence visualization")
print("  • UI-compatible geometry format output")
print("  • Integration with existing assessment workflow")

print("\n🎉 Bounding box integration successfully demonstrated!")
