In [None]:
%pip install --upgrade -r requirements.txt

Basic Azure OpenAI Integration

In [None]:
from payi.lib.helpers import payi_azure_openai_url, PayiHeaderNames
from openai import AzureOpenAI
from dotenv import load_dotenv
import json
import os

load_dotenv()

# Read the API KEY from the environment, replace the default values (the second argument) with your own keys if needed
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY", "YOUR_AZURE_OPENAI_API_KEY")

# Replace with the API version of your Azure OpenAI deployment
azure_openapi_version = os.getenv("AZURE_OPENAI_VERION", "2024-02-15-preview")

# Replace with your deployed Azure OpenAI endpoint URI
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT", "YOUR_AZURE_OPENAI_ENDPOINT")

# Replace with your Azure OpenAI Deployment Name, e.g. "test-4o"
azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT", "YOUR_AZURE_OPENAI_DEPLOYMENT")

# Replace with your Azure OpenAI Model Name, e.g. "gpt-4o-2024-05-13"
# Pay-i requires the model name to understand the mapping from deployment to model
azure_model = os.getenv("AZURE_OPENAI_MODEL", "YOUR_AZURE_OPENAI_MODEL")

#Read the API KEYs from the environment, replace the default values (the second argument) with your own keys if needed
payi_api_key = os.getenv("PAYI_API_KEY", "YOUR_PAYI_API_KEY")

# Replace with one of the following values: "global", "datazone", or "region" depending on your azure deployment type
azure_deployement_type = None

payi_headers = {
    PayiHeaderNames.api_key: payi_api_key,
    PayiHeaderNames.provider_base_uri: azure_endpoint,
    PayiHeaderNames.price_as_resource: azure_model,
}

if azure_deployement_type is not None:
    payi_headers[PayiHeaderNames.resource_scope] = azure_deployement_type

oai_client = AzureOpenAI(
    api_key=azure_openai_key,
    api_version=azure_openapi_version,
    azure_endpoint=payi_azure_openai_url(),
    default_headers=payi_headers
)

response = oai_client.chat.completions.create(
    model=azure_deployment,
    messages=[{"role": "user", "content": "Say 'this is a test'"}]
)

completion = response.choices[0].message.content
print(completion)

xproxy_result = response.xproxy_result
print(json.dumps(xproxy_result, indent=4))

Handle streaming calls. xproxy_result is returned as part of the last chunk.

In [None]:
stream = oai_client.chat.completions.create(
    model=azure_deployment,
    messages=[{"role": "user", "content": "tell me a very short story"}],
    stream=True,
)
for chunk in stream:
    if chunk.choices is not None and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")
    if 'xproxy_result' in chunk.model_extra:
        print()
        print(json.dumps(chunk.model_extra['xproxy_result'], indent=4))

Use the Pay-i SDK to send a request with a request tag

In [None]:
from payi.lib.helpers import create_headers

response = oai_client.chat.completions.create(
    model=azure_deployment,
    messages=[{"role": "user", "content": "Say 'this is a test'"}]
)

completion = response.choices[0].message.content
print(completion)

xproxy_result = response.xproxy_result
print(json.dumps(xproxy_result, indent=4))

Create a limit and make a request with that limit

In [None]:
from payi import Payi

payi_client = Payi(
    api_key=payi_api_key
)

# Create a limit
limit_response = payi_client.limits.create(
    # As long as the limit configuration remains the same across creates, the same limit name can be used repeatedly
    limit_name='Azure OpenAI quickstart allow limit', 
    max=12.50, # $12.50 USD
    limit_type="Allow",
    limit_tags=["example_limit"]
)

limit_name = limit_response.limit.limit_name
limit_id = limit_response.limit.limit_id

print("Limit Created")
print(f"Limit Name: {limit_name}")
print(f"Limit ID: {limit_id}")

# Make a request using the new limit
response = oai_client.chat.completions.create(
    model=azure_deployment, 
    messages=[{"role": "user", "content": "Say 'this is a test'"}],
    extra_headers=create_headers(
        limit_ids=[limit_id]
    )
)

completion = response.choices[0].message.content
print(completion)

xproxy_result = response.xproxy_result
print(json.dumps(xproxy_result, indent=4))

See limit status

In [None]:
response = payi_client.limits.retrieve(limit_id=limit_id)
print(f"Limit Name: {response.limit.limit_name}")
print(f"Limit ID: {response.limit.limit_id}")
print(f"Limit Creation Timestamp: {response.limit.limit_creation_timestamp}")
print(f"Limit Tags: {response.limit.limit_tags}")
print(f"Limit Input Base Cost: {response.limit.totals.cost.input.base}")
print(f"Limit Output Base Cost: {response.limit.totals.cost.output.base}")
print(f"Limit Total Base Cost: {response.limit.totals.cost.output.base}")

Make an ingest call with pre-computed token values

In [None]:
response = payi_client.ingest.units(
    category="system.openai",
    resource="gpt-4o",
    units={ "text": { "input": 50, "output": 100 } },
    limit_ids= [limit_id],
    properties = {"key1": "a", "key2": "b"}
)

print(f"Ingest request ID: {response.request_id}")
print(f"Input Base Cost: {response.xproxy_result.cost.input.base}")
print(f"Output Base Cost: {response.xproxy_result.cost.output.base}")
print(f"Total Base Cost: {response.xproxy_result.cost.total.base}")

Reset a limit back to zero tracked cost

In [None]:
response = payi_client.limits.reset(limit_id=limit_id)
print(response.message)
print("State prior to reset: ")
print(f"Limit Name: {response.limit_history.limit_name}")
print(f"Limit ID: {response.limit_history.limit_id}")
print(f"Limit Tags: {response.limit_history.limit_tags}")
print(f"Limit Reset Timestamp: {response.limit_history.limit_reset_timestamp}")
print(f"Limit Input Base Cost: {response.limit_history.totals.cost.input.base}")
print(f"Limit Output Base Cost: {response.limit_history.totals.cost.output.base}")
print(f"Limit Total Base Cost: {response.limit_history.totals.cost.total.base}")

print("\nState after reset:")
response = payi_client.limits.retrieve(limit_id=limit_id)
print(f"Limit Name: {response.limit.limit_name}")
print(f"Limit ID: {response.limit.limit_id}")
print(f"Limit Creation Timestamp: {response.limit.limit_creation_timestamp}")
print(f"Limit Tags: {response.limit.limit_tags}")
print(f"Limit Input Base Cost: {response.limit.totals.cost}")
print(f"Limit Output Base Cost: {response.limit.totals.cost.output.base}")
print(f"Limit Total Base Cost: {response.limit.totals.cost.total.base}")

Create a small blocking limit that will prevent calls from happening that exceed the maximum, then capture the output.

In [None]:
limit_response = payi_client.limits.create(
    # As long as the limit configuration remains the same across creates, the same limit name can be used repeatedly
    limit_name='Azure OpenAI quickstart block limit',
    max=0.0000001,
    limit_type="block",
    limit_tags=["limit_block_example"]
)
block_limit = limit_response.limit.limit_id

print("Limit Created")
print(f"Limit Name: {limit_response.limit.limit_name}")
print(f"Limit ID: {limit_response.limit.limit_id}")

try:
    response = oai_client.chat.completions.create(
        model=azure_deployment,
        messages=[{"role": "user", "content": "provide me a list of toys for children 5 and under"}],
        extra_headers=create_headers(
            limit_ids=[block_limit]
        )
    )

    completion = response.choices[0].message.content
    print(completion)

    response = oai_client.chat.completions.create(
        model=azure_deployment,
        messages=[{"role": "user", "content": "tell me a short story about a toy"}],
        extra_headers=create_headers(
            limit_ids=[block_limit]
        )
    )
except Exception as e:
    print(json.dumps(e.body, indent=4))

Create a use case definition and send a request with the use case. Pay-i will auto generate an use case id that can be specified later.

In [None]:
# Create a use case type
use_case_name="azure_openai_quickstart_use_case"
use_case_response = payi_client.use_cases.definitions.create(
    name=use_case_name,
    description="An example of a use case"
)

# Make a request using the new limit
response = oai_client.chat.completions.create(
    model=azure_deployment, 
    messages=[{"role": "user", "content": "Say 'this is a test'"}],
    extra_headers=create_headers(
        limit_ids=[limit_id],
        use_case_name=use_case_name
    )
)

completion = response.choices[0].message.content
print(completion)

xproxy_result = response.xproxy_result
use_case_id = xproxy_result['use_case_id']
print(json.dumps(xproxy_result, indent=4))

Send a request with a limit and user ID

In [None]:
# Make a request using the limit and user id
response = oai_client.chat.completions.create(
    model=azure_deployment, 
    messages=[{"role": "user", "content": "Say 'this is a test'"}],
    extra_headers=create_headers(
        limit_ids=[limit_id],
        # user id can be any string value
        user_id="example_user_id"
    )
)

completion = response.choices[0].message.content
print(completion)

xproxy_result = response.xproxy_result
print(json.dumps(xproxy_result, indent=4))

List and then delete all limits

In [None]:
response = payi_client.limits.list()
for limit in response.items:
    print("Deleting limit with id:" + limit.limit_id)
    payi_client.limits.delete(limit.limit_id)