In [1]:
import os
from dotenv import load_dotenv
import boto3
import json
import base64

load_dotenv()

True

In [2]:
AWS_REGION_NAME = 'us-west-2'
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')

# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html
bedrock = boto3.client(
    service_name='bedrock-runtime',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=AWS_REGION_NAME
)

In [3]:
# trouble-shooting: use a different client with service_name 'bedrock', not 'bedrock-runtime'
# https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock.html

# client = boto3.client(
#     service_name='bedrock',
#     aws_access_key_id=aws_access_key_id,
#     aws_secret_access_key=aws_secret_access_key,
#     region_name=AWS_REGION_NAME
# )

# summ = client.list_foundation_models()['modelSummaries']
# [model for model in summ if 'Sonnet' in model['modelName']]

In [4]:
# [m for m in dir(bedrock) if not m.startswith('_')]
# help(bedrock.converse)

In [5]:
if False:
    message_list = []

    initial_message = {
        "role": "user",
        "content": [
            { "text": "How are you today?" } 
        ],
    }

    message_list.append(initial_message)

    response = bedrock.converse(
        modelId="anthropic.claude-3-sonnet-20240229-v1:0",
        messages=message_list,
        inferenceConfig={
            "maxTokens": 2000,
            "temperature": 0
        },
    )

    response_message = response['output']['message']
    print(json.dumps(response_message, indent=4))

In [6]:
tool_list = [
    {
        "toolSpec": {
            "name": "summarize_document",
            "description": "Summarize document content.",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        # "summary": {
                        #     "type": "string",
                        #     "description": "A brief one-line or two-line summary of the document."
                        # },
                        # "escalate_complaint": {
                        #     "type": "boolean",
                        #     "description": "Indicates if this email is serious enough to be immediately escalated for further review."
                        # },
                        # "level_of_concern": {
                        #     "type": "integer",
                        #     "description": "Rate the level of concern for the above content on a scale from 1-10",
                        #     "minimum": 1,
                        #     "maximum": 10
                        # },
                        # "overall_sentiment": {
                        #     "type": "string",
                        #     "description": "The sender's overall sentiment.",
                        #     "enum": ["Positive", "Neutral", "Negative"]
                        # },
                        # "supporting_business_unit": {
                        #     "type": "string",
                        #     "description": "The internal business unit that this email should be routed to.",
                        #     "enum": ["Sales", "Operations", "Customer Service", "Fund Management"]
                        # },
                        # "customer_names": {
                        #     "type": "array",
                        #     "description": "An array of customer names mentioned in the email.",
                        #     "items": { "type": "string" }
                        # },
                        "key_statistics": {
                            "type": "object",
                            "properties": {
                                "revenue": {
                                    "type": "string",
                                    "description": "The annual revenue."
                                },
                                "historical_revenue_growth": {
                                    "type": "string",
                                    "description": "Historical revenue growth."
                                },
                                "projected_revenue_growth": {
                                    "type": "string",
                                    "description": "Projected revenue growth."
                                },
                                "profit margins": {
                                    "type": "string",
                                    "description": "Profit margins."
                                },
                                # "sentiment": {
                                #     "type": "string",
                                #     "description": "The sender's sentiment towards the employee.",
                                #     "enum": ["Positive", "Neutral", "Negative"]
                                # }
                            }
                        },
                        "executive_summary": {
                            "type": "string",
                            "description": "Executive summary."
                        },

                    },
                    "required": [
                        # "summary",
                        "key_statistics",
                        "executive_summary",
                        # "escalate_complaint",
                        # "overall_sentiment",
                        # "supporting_business_unit",
                        # "level_of_concern",
                        # "customer_names",
                        # "sentiment_towards_employees"
                    ]
                }
            }
        }
    }
]


In [7]:
filename = "IT Consulting in the US.pdf"
content = open(filename, 'rb').read()
encoded = base64.b64encode(content)

# template = open('industry-at-a-glance-template.md').read()
# prompt_old = f"""
# You are an expert in extracting market and financial data from documents.

# Use the following template to extract data from the enclosed document using up to 500 words.

# Return the result in JSON format. Only return JSON and nothing else.

# Template:

# {template}
# """

prompt = """You are an expert in extracting market and financial data from documents."""

initial_message = {
    "role": "user",
    "content": [
        {
            "text": prompt,
        },
        {
            "document": {
                "format": "pdf",
                "name": 'document',
                "source": {
                    "bytes": content
                }
            }
        }
    ],
}

response = bedrock.converse(
    modelId="anthropic.claude-3-sonnet-20240229-v1:0",
    # modelId="meta.llama3-1-405b-instruct-v1:0",
    messages=[initial_message],
    inferenceConfig={
        "maxTokens": 4000,
        "temperature": 0
    },
    toolConfig={
        "tools": tool_list,
        "toolChoice": {
            "tool": {
                "name": "summarize_document"
            }
        }
    }
)

KeyError: 'properties'

In [11]:
response_message = response['output']['message']
print(json.dumps(response_message, indent=4))

core_response = response_message['content'][0]['toolUse']["input"]
print(json.dumps(core_response, indent=2))

KeyError: 'output'