In [1]:
%pip install --upgrade -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Basic Bedrock Integration

In [2]:
import boto3
import json
import os
from dotenv import load_dotenv

from payi import Payi
from payi.lib.instrument import payi_instrument

load_dotenv()

# Read the API KEYs from the environment, replace the default values (the second argument) with your own keys if needed
payi_api_key = os.getenv("PAYI_API_KEY", "YOUR_PAYI_API_KEY")

payi_client = Payi(
    api_key=payi_api_key
)

use_case_name = "bedrock_use_case"

payi_client.use_cases.definitions.create(name=use_case_name, description="Bedrock use case")
payi_instrument(config={"proxy": True, "use_case_name": use_case_name})

# Substitute the region for your regional deployment
region_name = "us-west-2"

bedrock = boto3.client(
    'bedrock-runtime',
    region_name=region_name,
    )

request_dict = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 512,
    "temperature": 0.5,
    "messages": [
        {
            "role": "user",
            "content": [{"type": "text", "text": "this is a test"}],
        }
    ],
}

# Convert the request to JSON
request_body = json.dumps(request_dict)
model_id = 'anthropic.claude-3-haiku-20240307-v1:0'

# Invoke the model with the request.
invoke_response = bedrock.invoke_model(
    modelId=model_id,
    body=request_body,
    )

# Decode the response body.
response = invoke_response["body"].read()

response_json = json.loads(response)
print(json.dumps(response_json, indent=4))

xproxy_result = response_json['xproxy_result']
print("xproxy_result:")
print(json.dumps(xproxy_result, indent=4))


{
    "id": "msg_bdrk_01VeLZUkMMq2yoAv56kpX2Hx",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Okay, I'm ready for your test. What would you like me to do?"
        }
    ],
    "stop_reason": "end_turn",
    "stop_sequence": null,
    "usage": {
        "input_tokens": 11,
        "output_tokens": 21
    },
    "xproxy_result": {
        "request_id": "5673972",
        "experience_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "use_case_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "resource_id": "75",
        "cost": {
            "currency": "usd",
            "input": {
                "base": 2.75e-06
            },
            "output": {
                "base": 2.625e-05
            },
            "total": {
                "base": 2.9e-05
            }
        }
    }
}
xproxy_result:
{
    "request_id": "5673972",
    "experience_id": "3950c164-fe

Invoke stream invocation with pay-i as the proxy.

In [3]:
response = bedrock.invoke_model_with_response_stream(
    body=request_body,
    modelId=model_id, 
)

message = ""
input_tokens = None
output_tokens = None
invoke_id = None

stream = response.get('body')

for event in stream:
    chunk = event.get('chunk')
    if not chunk:
        continue

    decode = json.loads(chunk.get('bytes').decode())

    match decode['type']:
        case "message_start":
            input_tokens = decode['message']['usage']['input_tokens']
            invoke_id = decode['message']['id']
        case "content_block_start":
            message += decode['content_block']['text']
        case "content_block_delta":
            message += decode['delta']['text']
        case "message_delta":
            output_tokens = decode['usage']['output_tokens']
        case "content_block_stop" | "message_stop":
            ...

print(message)

Understood, this is a test. I'm ready to assist you with any questions or tasks you may have.


Converse stream invocation with pay-i as the proxy.

In [4]:
converse_request_dict=[
    {
        "role": "user",
        "content": [
            {
                "text": "this is a test"
            }
        ]
    }
]
converse_request_inference_config={
    "temperature": 0.5,
    "maxTokens": 512,
}

converse_response = bedrock.converse_stream(
    modelId=model_id,
    messages=converse_request_dict,
    inferenceConfig=converse_request_inference_config
)

stream = converse_response['stream']

if stream:
    for event in stream:
        # print(f'{json.dumps(event, indent=2)}')

        # decode = json.loads(event)
        
        if 'contentBlockDelta' in event:
            message += event['contentBlockDelta']['delta']['text']
        elif 'metadata' in event:
            input_tokens = event['metadata']['usage']['inputTokens']
            output_tokens = event['metadata']['usage']['outputTokens']

print(message)


Understood, this is a test. I'm ready to assist you with any questions or tasks you may have.Okay, this is a test. I'm ready to assist you with any questions or tasks you may have.


Use the Pay-i SDK context manager track_context() to send a request with request tags

In [5]:
from payi.lib.instrument import track_context

with track_context(request_tags=["x", "y"]):
    invoke_response = bedrock.invoke_model(
        modelId=model_id,
        body=request_body,
    )

response = invoke_response["body"].read()
response_json = json.loads(response)
print(json.dumps(response_json, indent=4))

xproxy_result = response_json['xproxy_result']
print("xproxy_result:")
print(json.dumps(xproxy_result, indent=4))

{
    "id": "msg_bdrk_01QUvbFgLu3ZzrLjE5QHuSco",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Okay, this is a test. I'm ready to assist you with whatever you need."
        }
    ],
    "stop_reason": "end_turn",
    "stop_sequence": null,
    "usage": {
        "input_tokens": 11,
        "output_tokens": 22
    },
    "xproxy_result": {
        "request_id": "5673975",
        "request_tags": [
            "x",
            "y"
        ],
        "experience_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "use_case_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "resource_id": "75",
        "cost": {
            "currency": "usd",
            "input": {
                "base": 2.75e-06
            },
            "output": {
                "base": 2.75e-05
            },
            "total": {
                "base": 3.025e-05
            }
        }
    }
}

Create a limit and make a request with that limit

In [6]:
# Create a limit
limit_response = payi_client.limits.create(
    # As long as the limit configuration remains the same across creates, the same limit name can be used repeatedly
    limit_name='Bedrock quickstart allow limit',
    max=12.50, #$12.50 USD
    limit_type="Allow",
    limit_tags=["example_limit"]
)

limit_name = limit_response.limit.limit_name
limit_id = limit_response.limit.limit_id

print("Limit Created")
print(f"Limit Name: {limit_name}")
print(f"Limit ID: {limit_id}")

with track_context(request_tags=["x", "y"], limit_ids=[limit_id]):
    invoke_response = bedrock.invoke_model(
        modelId=model_id,
        body=request_body,
    )

response = invoke_response["body"].read()
response_json = json.loads(response)
print(json.dumps(response_json, indent=4))

xproxy_result = response_json['xproxy_result']
print(json.dumps(xproxy_result, indent=4))

Limit Created
Limit Name: Bedrock quickstart allow limit
Limit ID: 2579
{
    "id": "msg_bdrk_019GBrjqGF8cxUGXga7MJm4c",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Okay, this is a test. I'm ready to assist you with any questions or tasks you may have."
        }
    ],
    "stop_reason": "end_turn",
    "stop_sequence": null,
    "usage": {
        "input_tokens": 11,
        "output_tokens": 26
    },
    "xproxy_result": {
        "request_id": "5673976",
        "request_tags": [
            "x",
            "y"
        ],
        "limits": {
            "2579": {
                "state": "ok"
            }
        },
        "experience_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "use_case_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "resource_id": "75",
        "cost": {
            "currency": "usd",
            "input": {
                "base

See limit status

In [7]:
invoke_response = payi_client.limits.retrieve(limit_id=limit_id)
print(f"Limit Name: {invoke_response.limit.limit_name}")
print(f"Limit ID: {invoke_response.limit.limit_id}")
print(f"Limit Creation Timestamp: {invoke_response.limit.limit_creation_timestamp}")
print(f"Limit Tags: {invoke_response.limit.limit_tags}")
print(f"Limit Input Base Cost: {invoke_response.limit.totals.cost.input.base}")
print(f"Limit Output Base Cost: {invoke_response.limit.totals.cost.output.base}")
print(f"Limit Total Base Cost: {invoke_response.limit.totals.cost.output.base}")

Limit Name: Bedrock quickstart allow limit
Limit ID: 2579
Limit Creation Timestamp: 2025-05-01 16:39:38.999504+00:00
Limit Tags: ['example_limit']
Limit Input Base Cost: 2.75e-06
Limit Output Base Cost: 3.25e-05
Limit Total Base Cost: 3.25e-05


Make an ingest call with pre-computed token values

In [None]:
from payi.lib.helpers import PayiCategories
invoke_response = payi_client.ingest.units(
    category=PayiCategories.aws_bedrock,
    resource=model_id,
    units={ "text": { "input": 50, "output": 100 } },
    limit_ids=[limit_id],
    request_tags=["a", "b"]
)

print(f"Ingest request ID: {invoke_response.request_id}")
print(f"Input Base Cost: {invoke_response.xproxy_result.cost.input.base}")
print(f"Output Base Cost: {invoke_response.xproxy_result.cost.output.base}")
print(f"Total Base Cost: {invoke_response.xproxy_result.cost.total.base}")

Ingest request ID: 5673977
Input Base Cost: 1.25e-05
Output Base Cost: 0.000125
Total Base Cost: 0.0001375


Reset a limit back to zero tracked cost

In [9]:
invoke_response = payi_client.limits.reset(limit_id=limit_id)
print(invoke_response.message)
print("State prior to reset: ")
print(f"Limit Name: {invoke_response.limit_history.limit_name}")
print(f"Limit ID: {invoke_response.limit_history.limit_id}")
print(f"Limit Tags: {invoke_response.limit_history.limit_tags}")
print(f"Limit Reset Timestamp: {invoke_response.limit_history.limit_reset_timestamp}")
print(f"Limit Input Base Cost: {invoke_response.limit_history.totals.cost.input.base}")
print(f"Limit Output Base Cost: {invoke_response.limit_history.totals.cost.output.base}")
print(f"Limit Total Base Cost: {invoke_response.limit_history.totals.cost.total.base}")

print("\nState after reset:")
invoke_response = payi_client.limits.retrieve(limit_id=limit_id)
print(f"Limit Name: {invoke_response.limit.limit_name}")
print(f"Limit ID: {invoke_response.limit.limit_id}")
print(f"Limit Creation Timestamp: {invoke_response.limit.limit_creation_timestamp}")
print(f"Limit Tags: {invoke_response.limit.limit_tags}")
print(f"Limit Input Base Cost: {invoke_response.limit.totals.cost.input.base}")
print(f"Limit Output Base Cost: {invoke_response.limit.totals.cost.output.base}")
print(f"Limit Total Base Cost: {invoke_response.limit.totals.cost.total.base}")

Limit has been successfully reset.
State prior to reset: 
Limit Name: Bedrock quickstart allow limit
Limit ID: 2579
Limit Tags: ['example_limit']
Limit Reset Timestamp: 2025-05-01 16:39:40.503597+00:00
Limit Input Base Cost: 1.525e-05
Limit Output Base Cost: 0.0001575
Limit Total Base Cost: 0.00017275

State after reset:
Limit Name: Bedrock quickstart allow limit
Limit ID: 2579
Limit Creation Timestamp: 2025-05-01 16:39:38.999504+00:00
Limit Tags: ['example_limit']
Limit Input Base Cost: 0.0
Limit Output Base Cost: 0.0
Limit Total Base Cost: 0.0


Create a small blocking limit that will prevent calls from happening that exceed the maximum, then capture the output.

In [10]:
limit_response = payi_client.limits.create(
    #As long as the limit configuration remains the same across creates, the same limit name can be used repeatedly
    limit_name='Bedrock quickstart block limit',
    max=0.00000001, 
    limit_type="block",
    limit_tags=["limit_block_example"]
)
block_limit = limit_response.limit.limit_id

print("Limit Created")
print(f"Limit Name: {limit_response.limit.limit_name}")
print(f"Limit ID: {limit_response.limit.limit_id}")

try:
    with track_context(request_tags=["x", "y"], limit_ids=[block_limit]):
        longer_request_1_dict = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": 512,
            "temperature": 0.5,
            "messages": [
                {
                    "role": "user",
                    "content": [{"type": "text", "text": "provide me a list of toys for children 5 and under"}],
                }
            ],
        }
        longer_request_1_body =json.dumps(longer_request_1_dict)

        invoke_response = bedrock.invoke_model(
            modelId=model_id,
            body=longer_request_1_body,
        )

        response = invoke_response["body"].read()
        response_json = json.loads(response)
        print(json.dumps(response_json, indent=4))

        longer_request_2_dict = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": 512,
            "temperature": 0.5,
            "messages": [
                {
                    "role": "user",
                    "content": [{"type": "text", "text": "tell me a short story about a toy"}],
                }
            ],
        }
        longer_request_2_body =json.dumps(longer_request_2_dict)

        invoke_response = bedrock.invoke_model(
            modelId=model_id,
            body=longer_request_2_body,
        )

        # This will note execute as invoke_model call will raise an exception due to the blocking limit returning with a 4xx HTTP status code
        response = invoke_response["body"].read()
        response_json = json.loads(response)
        print(json.dumps(response_json, indent=4))

except Exception as e:
    print(json.dumps(e.response, indent=4))

Limit Created
Limit Name: Bedrock quickstart block limit
Limit ID: 2580
{
    "id": "msg_bdrk_0142Ad3YDZTatL6HQi7y12xG",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Here is a list of toys that are suitable for children 5 and under:\n\n1. Building blocks (e.g., Lego, Duplo)\n2. Puzzles (jigsaw, shape sorters)\n3. Play dough and clay\n4. Art supplies (crayons, markers, paints, coloring books)\n5. Stuffed animals and dolls\n6. Ride-on toys (tricycles, scooters, wagons)\n7. Musical instruments (toy drums, xylophones, maracas)\n8. Picture books and storybooks\n9. Pretend play sets (kitchen sets, tool benches, dress-up clothes)\n10. Balls (bouncy balls, rubber balls, playground balls)\n11. Outdoor toys (sandbox toys, sidewalk chalk, bubble wands)\n12. Wooden toys (train sets, shape sorters, stacking cups)\n13. Educational toys (counting blocks, alphabet puzzles, shape sorte

Create a use case definition and send a request with it. Pay-i will auto generate a use case id that can be specified later.

In [11]:
# Create an use case definition
use_case_name="bedrock_quickstart_use_case"
use_case_response = payi_client.use_cases.definitions.create(
    name=use_case_name,
    description="An example of a use case"
)

# Make a request using the limit, request tags, and use case
with track_context(request_tags=["x", "y"], limit_ids=[limit_id], use_case_name=use_case_name):
    invoke_response = bedrock.invoke_model(
        modelId=model_id,
        body=request_body,
    )

response = invoke_response["body"].read()
response_json = json.loads(response)
print(json.dumps(response_json, indent=4))

xproxy_result = response_json['xproxy_result']
use_case_id = xproxy_result['use_case_id']
print("xproxy_result:")
print(json.dumps(xproxy_result, indent=4))

{
    "id": "msg_bdrk_01QmqWP662aWEuv1sJfAWbFG",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Okay, this is a test. I'm ready to assist you with whatever you need."
        }
    ],
    "stop_reason": "end_turn",
    "stop_sequence": null,
    "usage": {
        "input_tokens": 11,
        "output_tokens": 22
    },
    "xproxy_result": {
        "request_id": "5673980",
        "request_tags": [
            "x",
            "y"
        ],
        "limits": {
            "2579": {
                "state": "ok"
            }
        },
        "experience_id": "a83a60d0-7902-4afe-b80e-88b80b58580d",
        "use_case_id": "a83a60d0-7902-4afe-b80e-88b80b58580d",
        "resource_id": "75",
        "cost": {
            "currency": "usd",
            "input": {
                "base": 2.75e-06
            },
            "output": {
                "base": 2.75e-05
      

Send a request with a limit and user ID

In [12]:
# Make a request using the limit, request tags, and user id
# user id can be any string value
with track_context(request_tags=["x", "y"], limit_ids=[limit_id], user_id="example_user_id"):
    invoke_response = bedrock.invoke_model(
        modelId=model_id,
        body=request_body,
    )

response = invoke_response["body"].read()
response_json = json.loads(response)
print(json.dumps(response_json, indent=4))

xproxy_result = response_json['xproxy_result']
print(json.dumps(xproxy_result, indent=4))

{
    "id": "msg_bdrk_01F3pTHHP1NBPn48jKMuicAT",
    "type": "message",
    "role": "assistant",
    "model": "claude-3-haiku-20240307",
    "content": [
        {
            "type": "text",
            "text": "Okay, this is a test response."
        }
    ],
    "stop_reason": "end_turn",
    "stop_sequence": null,
    "usage": {
        "input_tokens": 11,
        "output_tokens": 12
    },
    "xproxy_result": {
        "request_id": "5673981",
        "request_tags": [
            "x",
            "y"
        ],
        "limits": {
            "2579": {
                "state": "ok"
            }
        },
        "experience_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "use_case_id": "3950c164-fedb-49f3-8ebc-60464e8241f9",
        "resource_id": "75",
        "user_id": "example_user_id",
        "cost": {
            "currency": "usd",
            "input": {
                "base": 2.75e-06
            },
            "output": {
                "base": 1.5e-05
        

List and then delete all limits

In [13]:
invoke_response = payi_client.limits.list()
for limit in invoke_response.items:
    print("Deleting limit with id:" + limit.limit_id)
    payi_client.limits.delete(limit.limit_id)

Deleting limit with id:2579
Deleting limit with id:2580
