In [None]:
%pip install --upgrade -r requirements.txt

Create a Payi client and limit

In [None]:
from payi import Payi
import os 

#Read the API KEYs from the environment, replace the default values (the second argument) with your own keys if needed
openai_key = os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_KEY")
payi_api_key = os.getenv("PAYI_API_KEY", "YOUR_PAYI_API_KEY")

payi_client = Payi(
    api_key=payi_api_key
)

#Create a limit
limit_response = payi_client.limits.create(
    #As long as the limit configuration remains the same across creates, the same limit name can be used repeatedly
    limit_name='Langchain quickstart allow limit', 
    max=12.50, #$12.50 USD
    limit_type="Allow",
    limit_tags=["example_limit"]
)

limit_name = limit_response.limit.limit_name
limit_id = limit_response.limit.limit_id

print("Limit Created")
print(f"Limit Name: {limit_name}")
print(f"Limit ID: {limit_id}")



Define the callback to be invoked when the LLM call ends

In [None]:
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.prompts import ChatPromptTemplate

class PayiHandler(BaseCallbackHandler):
    def __init__(self, client, params):
        self.name = "custom_handler"
        self.client = client
        self.params = {
            **params
        }

    def on_llm_end(self, response, **kwargs):
        llm_output = response.llm_output
        if llm_output and 'token_usage' in llm_output:
            token_usage = llm_output['token_usage']
            prompt_tokens = token_usage.get('prompt_tokens', 0)
            completion_tokens = token_usage.get('completion_tokens', 0)

            if not (prompt_tokens > 0 or completion_tokens > 0):
                print(f"{self.name}: no token usage in LLM output", response)
                return

            try:
                # comment either limit_ids or request_tags if you don't want to use them
                result = self.client.ingest.units(
                    category=self.params['category'],
                    resource=self.params['resource'],
                    units={ "text": { "input": prompt_tokens, "output": completion_tokens} },
                    limit_ids=self.params['limit_ids'], 
                    request_tags=self.params['request_tags']
                    )
                print(f'ingest result: {result.model_dump_json(indent=4)}')
            except Exception as e:
                print(f"{self.name}: error sending usage info", e)

Configure the parameters for the call to OpenAI and ingesting the token counts into Payi

In [None]:
from payi.lib.helpers import PayiCategories

# Configuration parameters for the Pay-i API
params = {
    'category': PayiCategories.openai,
    'resource': 'gpt-3.5-turbo',
    'limit_ids': [limit_id],  
    'request_tags': ['x', 'y']
}

Make an OpenAI request and register the callback handler

In [None]:
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_messages(["say this only: hi"])

# Define the handler
handler = PayiHandler(client=payi_client, params=params)

# Define the LLM and register the handler 
model = ChatOpenAI( 
    model=params['resource'],
    api_key=openai_key,
    callbacks=[handler]
    )

# Define the sequence
chain = prompt | model

# Run the sequence
response = chain.invoke({})

print(response.to_json())