# 🧠 Prompt Routing Engine - Demo

In [None]:
from IPython.display import Image, display
import os

# Set your document type: 'form', 'report', 'assessment', 'correspondence'
doc_type = "correspondence"

# Load image
img_path = f"../notebooks/assets/{doc_type}_output_view_full.png"
display(Image(img_path))

print(f"Loaded document image for: {doc_type.capitalize()}")


Loaded document image for: Correspondence

In [None]:
import yaml

# Load YAML prompt
prompt_path = f"../prompts/{doc_type}_prompt.yaml"
with open(prompt_path) as f:
    prompt_yaml = yaml.safe_load(f)

prompt = prompt_yaml["prompt"]
print("Loaded Prompt Template:")
print("="*50)
print(prompt)


Loaded Prompt Template:
--------------------------------------------------
You are an expert document classifier. Your task is to analyze a 2-page document...

In [None]:
from routing_engine.rules import extract_text_and_layout
import json

# Load the parsed Textract-like layout data
json_path = f"../data/textract_outputs/{doc_type}.json"
with open(json_path) as f:
    textract_data = json.load(f)

# Extract text lines from the layout
text_lines, _ = extract_text_and_layout(textract_data)
doc_length = len(text_lines)

print(f"📝 Document Length (line count): {doc_length}")

# Apply logic for Bedrock call:
# - Call Bedrock if correspondence and length < 10
# - OR if report and length >= 10
should_call_bedrock = False
if doc_type == "correspondence" and doc_length < 10:
    should_call_bedrock = True
elif doc_type == "report" and doc_length >= 10:
    should_call_bedrock = True

print(f"🚦 Should call Bedrock: {should_call_bedrock}")

# Optional: Only proceed with the call if condition is met
if should_call_bedrock:
    print("✅ Proceeding with Bedrock call...")
    # (the Sonnet call cell follows this)
else:
    print("❌ Skipping Bedrock call due to document length criteria.")


📝 Document Length (line count): 8
🚦 Should call Bedrock: True
✅ Proceeding with Bedrock call...

In [None]:
# Simulated call to AWS Bedrock Claude Sonnet (pseudocode placeholder)
# In real setup, you would use boto3 client with bedrock-runtime and pass modelId

def call_bedrock_claude(prompt, image_bytes=None):
    # Pseudocode placeholder for Bedrock call
    print("Calling AWS Bedrock Claude Sonnet...")
    print("Prompt sent:")
    print(prompt)
    # Here, you would encode image and send along with prompt
    # Return mock output for now
    return {"completion": "Category: [\"Correspondence\", \"20-03-2024\"]"}

# Simulate image bytes if needed
image_bytes = open(img_path, "rb").read()

response = call_bedrock_claude(prompt, image_bytes=image_bytes)
print("\nModel Output:")
print(response["completion"])


In [None]:
from PIL import Image
import io

# Resize image if it exceeds 8000x8000 pixels
MAX_DIM = 8000

with open(img_path, "rb") as img_file:
    image = Image.open(img_file)
    width, height = image.size
    print(f"🖼️ Original image size: {width}x{height}")

    if width > MAX_DIM or height > MAX_DIM:
        resize_ratio = min(MAX_DIM / width, MAX_DIM / height)
        new_size = (int(width * resize_ratio), int(height * resize_ratio))
        image = image.resize(new_size, Image.ANTIALIAS)
        print(f"⚠️ Image resized to: {new_size[0]}x{new_size[1]}")
    else:
        print("✅ Image is within size limits.")

    # Convert image to base64
    buffer = io.BytesIO()
    image.save(buffer, format="JPEG")
    img_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")

images = [img_b64]


🖼️ Original image size: 1200x900
✅ Image is within size limits.

In [None]:
import json
import base64

# Simulated system prompt and content prompt
system_prompt = (
    "You are a professional document analyst specializing in extracting meaningful insights "
    "from images and text. Your responses should be concise, structured, and highly accurate."
)

# Simulated image base64 encoding
with open(img_path, "rb") as img_file:
    img_b64 = base64.b64encode(img_file.read()).decode("utf-8")

images = [img_b64]  # list of images in base64

# Claude Sonnet prompt content
content_prompt = prompt  # loaded in previous cell

# Construct messages for Claude
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/jpeg",
                    "data": img,
                },
            }
            for img in images
        ] + [{"type": "text", "text": content_prompt}],
    }
]

# Final body to send to AWS Bedrock Sonnet model
body = json.dumps(
    {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 1500,
        "system": system_prompt,
        "top_p": 0.7,
        "temperature": 0.7,
        "messages": messages,
        "stop_sequences": ["### END"],
    }
)

print("Prepared payload for Bedrock Claude Sonnet:")
print(json.dumps(json.loads(body), indent=2))


{
  "anthropic_version": "bedrock-2023-05-31",
  "max_tokens": 1500,
  ...

In [None]:
import re
from datetime import datetime

def post_process_model_output(output_str):
    """
    Extracts category and optional date from model output string.
    Returns:
        - category: str
        - extracted_date: str or None
        - is_other: int (0 if classified as a known tag, 1 if 'Other')
    """
    try:
        # Extract list from output string
        match = re.search(r'Category:\s*\[(.*?)\]', output_str)
        if match:
            raw = match.group(1).replace('"', '').replace("'", "")
            parts = [part.strip() for part in raw.split(",")]

            category = parts[0] if parts else "Other"
            is_other = 1 if category.lower() == "other" else 0

            # Try to extract a date
            extracted_date = None
            if len(parts) > 1:
                try:
                    extracted_date = datetime.strptime(parts[1], "%d-%m-%Y").strftime("%d-%m-%Y")
                except Exception:
                    extracted_date = None

            return {
                "category": category,
                "date": extracted_date,
                "is_other": is_other
            }

    except Exception as e:
        print("Error during post-processing:", e)

    return {"category": "Other", "date": None, "is_other": 1}

# Example usage:
output_str = response["completion"]
result = post_process_model_output(output_str)
print("\nPost-Processed Result:")
print(result)


{
  'category': 'Correspondence',
  'date': '20-03-2024',
  'is_other': 0
}