In [None]:
%pip install anthropic IPython

In [None]:
import boto3
import json
import base64
from anthropic import AnthropicBedrock
from IPython.display import Image, JSON
from io import BytesIO
from botocore.config import Config

config = Config(read_timeout=1000)
session = boto3.Session() # create a boto3 session to dynamically get and set the region name
bedrock = session.client(service_name='bedrock-runtime', config=config) #creates a Bedrock client

In [None]:
#get a base64-encoded string from file bytes
def get_base64_from_bytes(image_bytes):
    resized_io = BytesIO(image_bytes)
    img_str = base64.b64encode(resized_io.getvalue()).decode("utf-8")
    return img_str

def transcribe_documents(user_prompt, system_prompt=None, tools_use=None, tool_choice=None, image_bytes=None):
    
    body = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 4200,
        "temperature": 0,
        "top_k": 250,
        "top_p": 0.999,
        "tools": tools_use,
        "tool_choice": tool_choice,
        #"system": system_prompt,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/jpeg",
                            "data": get_base64_from_bytes(image_bytes),
                        },
                    },
                    {
                        "type": "text",
                        "text": user_prompt
                    }
                ],
            }
        ],
    }
    if system_prompt is not None:
        body.update({"system": system_prompt})
    
    return bedrock.invoke_model(body=json.dumps(body), modelId="anthropic.claude-3-sonnet-20240229-v1:0")

# Approach
## Transcribe documents
- Document JSON schema with `"$id": "/schemas/document"` and instructions in the system prompt
- Function calling performs document transcription by referencing document JSON schema using construct `"$ref":"/schemas/document"`
- Document JSON schema contains `dependentSchemas` construct to extend `/schemas/document` with extra properties (`"inference"` and `"source"` for inference analysis) when a given property `"value"` is present (for each document field).
- Document field definition requires `"value"` property in order for 'dependentSchemas' to apply inference analysis:
`"field": { "properties":{"value":{"type":"string"}}, "description": "…" }`
- Additionally, system prompt should contain oinstruction `Apply dependentSchemas to all <document/> fields` to enforce `dependentSchemas`
- Instruct Claude to use a specific tool in the response to the user’s query by specifying the tool in the tool_choice: `tool_choice = {"type": "tool", "name": "transcribe_documents"}`


In [None]:
tools_use = [{
            "name": "transcribe_documents",
            "description": "Extract all <document/> fields with the highest accuracy following <instructions/>",
            "input_schema": {
                "type": "object",
                "properties": {
                    "documents": {
                        "type": "array",
                        "items": {
                            "$ref":"/schemas/document"
                        }
                    }
                }
            }
        }
        ]
tool_choice = {"type": "tool", "name": "transcribe_documents"}

system_prompt = """
<instructions>
  - Ensure to escape quotes in the JSON response
  - Return "" for missing field values
  - Apply dependentSchemas to all <document/> fields
</instructions>

<document>
{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "/schemas/document",
    "type": "object",
    "description": "A document with the fields to transcribe",
    "properties": {
        "doc_type": { "properties":{"value":{"type":"string"}}, "description": "Type of Document: Receipt" },
        "receipt_number": { "properties":{"value":{"type":"string"}}, "description": "The receipt number or other identifier number" },
        "doc_amount_total": { "properties":{"value":{"type":"number"}}, "description": "The total receipt amount" },
        "currency": { "properties":{"value":{"type":"string"}}, "description": "AUD/USD/CAD" },
        "vendor_business_number": { "properties":{"value":{"type":"string"}}, "description": "Vendor's business identification number e.g. ABN" },
        "vendor_name": { "properties":{"value":{"type":"string"}}, "description": "Business name issueing the receipt" },
        "vendor_address": { "properties":{"value":{"type":"string"}}, "description": "Vendor's site address" },
        "vendor_phone": { "properties":{"value":{"type":"string"}}, "description": "Vendor's phone number" },
        "payment_method": { "properties":{"value":{"type":"string"}}, "description": "The payment type, e.g. EFTPOS, Card" },
        "date_issued": { "properties":{"value":{"format": "YYYY-MM-DDThh:mm:ss"}}, "description": "Date document was issued"},
        "line_items_amount_total": { "properties":{"value":{"type":"number"}}, "description": "Calculated sum of line item's line_amount fields" },
        "line_items": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "line_description": { "properties":{"value":{"type":"string"}}, "description": "Line item description" },
                    "line_quantity": { "properties":{"value":{"type":"number"}}, "description": "Item quantity" },
                    "line_unit_price": { "properties":{"value":{"type":"number"}}, "description": "Item price per unit" },
                    "line_amount": { "properties":{"value":{"type":"number"}}, "Line item $ amount" type="currency" },
                }
            }
        },
    },
    "dependentSchemas": {
        "value": {
            "properties": {
                "inference": { "type": "integer", "description": "0=EXPLICIT|1=DERIVED|2=MISSING|3=OTHER" },
                "source": { "type": "string", "description": "Source field only if and for explicit and derived fields" }
            }
        }
    }
}
<document/>
"""

regular_prompt = """
Describe documents
"""

image_bytes = bytearray(open("20240528_194949.jpg", 'rb').read())  
response = transcribe_documents(regular_prompt, system_prompt, tools_use, tool_choice, image_bytes)

response_body = json.loads(response.get('body').read()) 

display(response_body)
display(JSON(response_body['content'][0]['input'], root='content')) 


## Classify documents
- JSON schema for document classification is defined in the tool definition
- The enum keyword in the JSON schema can be used to restrict a value to a fixed set of values.
- System prompt can be omitted 
- Instruct Claude to use a specific tool in the response to the user’s query by specifying the tool in the tool_choice: `tool_choice = {"type": "tool", "name": "classify_documents"}`


In [None]:
tools_use = [{
            "name": "classify_documents",
            "description": "Perform document classification",
            "input_schema": {
                "type": "object",
                "properties": {
                    "documents": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "type": { "description": "Type of document", "enum": ["receipt", "invoice"] },
                                "country_code": { "description": "Country code e.g. AU/US/UK" },
                                "document_number": { "description": "The document number or other identifier number" }

                            }
                        }
                    }
                }
            }
        }]
tool_choice = {"type": "tool", "name": "classify_documents"}

regular_prompt = """
Classify documents
"""

image_bytes = bytearray(open("20240528_194949.jpg", 'rb').read())  
response = transcribe_documents(regular_prompt, None, tools_use, tool_choice, image_bytes)

response_body = json.loads(response.get('body').read()) 

display(response_body)
display(JSON(response_body['content'][0]['input'], root='content')) 
