This board showcases what the default LLM monitoring template looks like.

It logs synthetic data in the format used by the monitoring.openai integration.

See the openai_monitoring notebook for instructions for logging real OpenAI API calls.

In [1]:
import uuid
from datetime import timedelta
import weave
weave.use_frontend_devmode()
from weave import ops_arrow
from weave.monitoring import monitor

In [2]:
from weave.syndata_mon import random_predictions

In [3]:
preds = random_predictions(10)

# Convert synthetic data into the format used by the weave.monitoring.openai integration

# convert model_version in the synthetic data to an openai model version
# this makes it so that there's a new API key that has appeared in our logs recently, and that key
# has started using gpt-4 which makes a cost spike
versions = sorted(preds.column('model_version').unique())
version_map = {}
for i, v in enumerate(reversed(versions)):
    api_key = 'sk-U4...yK7z'
    model = 'gpt-3.5-turbo-0613'
    if i == 1 or i == 2:
        # second and third most recent versions use a different api key
        api_key = 'sk-U9...a22c'
    if i == 1:
        # second most recent version uses gpt-4
        model = 'gpt-4-0613'
    version_map[v] = (api_key, model)
    
spans = [] 
for i, pred in enumerate(preds):
    api_key, model = version_map[pred['model_version']]
    latency_mult = 1
    if model == 'gpt-4-0613':
        latency_mult = 3
    span = monitor.Span('openai.api_resources.chat_completion.type.create',
                 inputs={
                     'messages':[
                         {"role": "user", "content": pred['prompt']}
                     ]
                 },
                 output={
                     'id': 'chatcmpl-%s' % uuid.uuid4(),
                     'object': 'chat.completion',
                     'created': pred['timestamp'].timestamp(),
                     'model': model,
                     'choices': [
                         {
                             'index': 0,
                             'message': {
                                 'role': 'assistant',
                                 'content': pred['completion']
                             },
                             'finish_reason': 'stop'
                         }
                     ],

                 },
                 attributes={
                     'api_key': api_key,
                     'username': pred['username']
                 },
                 summary={
                     'prompt_tokens': pred['prompt_tokens'],
                     'completion_tokens': pred['completion_tokens'],
                     'total_tokens': (pred['prompt_tokens'] + pred['completion_tokens'])
                 })
    span.start_time = pred['timestamp']
    span.end_time = pred['timestamp'] + timedelta(seconds=pred['latency'] * latency_mult)
    spans.append({'timestamp': pred['timestamp'], **span.asdict()})

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 101/101 [00:00<00:00, 470.22it/s]


In [4]:
# Save as a local table
oai_data = weave.save(ops_arrow.to_arrow(spans), 'oai_data')

In [5]:
oai_data

In [6]:
# Use the llm monitoring template to visualize the data
from weave.panels_py import panel_llm_monitor
board = panel_llm_monitor.board.raw_resolve_fn(oai_data)
board

ERROR:segment:error uploading: HTTPSConnectionPool(host='api.segment.io', port=443): Max retries exceeded with url: /v1/batch (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x1695b45e0>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))
