In [None]:
# Key indicators:
# - Percentage verzoeken langer dan 3 seconden
# - Gemiddelde tijd tot reactie
# Gebruik:
    # Grafiek verbruik door de dag heen ( opties tot custom range)
# GPU geheugen belasting: grafiek door de dag heen (opties tot customs range)
# Parallel verzoeken
# aantal tokens per second in en out (current) (aggregated per minutes) (opties om custom aggregations and averages)

In [1]:
import ubiops
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from dateutil import parser
API_TOKEN_chat = "Token 9155a7c8fef85ff102bf6c3dddf6b3deb51a7586"
API_TOKEN_poc = "Token 481b206efdcee52b165f011605263baea8d6319a"
HOST = "https://api.demo.vlam.ai/v2.1"
PROJECT_NAME = 'chat'
# Custom time range selection (example: one day)
start_date = parser.parse("2025-07-22 09:00:00")
end_date = parser.parse("2025-07-23 09:00:00")

In [2]:
configuration = ubiops.Configuration()
configuration.api_key['Authorization'] = API_TOKEN_poc
configuration.host = HOST
api_client = ubiops.ApiClient(configuration)
api = ubiops.CoreApi(api_client)

In [3]:
DEPLOYMENT_METRICS = {
    "deployments.credits": {"unit": "credits (float)", "description": "Usage of Credits"},
    "deployments.instances": {"unit": "instances (float)", "description": "Average number of active deployment instances"},
    "deployments.input_volume": {"unit": "bytes (int)", "description": "Volume of incoming data in bytes"},
    "deployments.output_volume": {"unit": "bytes (int)", "description": "Volume of outgoing data in bytes"},
    "deployments.request_duration": {"unit": "seconds (float)", "description": "Average time in seconds for a request to complete"},
    "deployments.memory_utilization": {"unit": "bytes (int)", "description": "Peak memory used during a request"},
    "deployments.requests": {"unit": "requests (int)", "description": "Number of requests made to the object"},
    "deployments.failed_requests": {"unit": "requests (int)", "description": "Number of failed requests made to the object"},
    "deployments.express_queue_size": {"unit": "items (int)", "description": "Average number of queued express requests"},
    "deployments.batch_queue_size": {"unit": "items (int)", "description": "Average number of queued batch requests"},
    "deployments.express_queue_time": {"unit": "items (int)", "description": "Average time in seconds for an express request to start processing"},
    "deployments.batch_queue_time": {"unit": "items (int)", "description": "Average time in seconds for a batch request to start processing"},
    "deployments.network_in": {"unit": "bytes (int)", "description": "Inbound network traffic for a deployment version"},
    "deployments.network_out": {"unit": "bytes (int)", "description": "Outbound network traffic for a deployment version"},
    "deployments.instance_start_time": {"unit": "seconds (float)", "description": "Average duration from instance creation to start time"},
}
def get_deployment_metric(project_name, metric_name, start_date, end_date, aggregation_period=60, unit=None, description=False):
    """
    Fetch a deployment metric from UbiOps and return as a DataFrame.

    Args:
        metric_name (str): Name of the metric (e.g., 'deployments.credits').
        start_date (datetime): Start of the time range.
        end_date (datetime): End of the time range.
        aggregation_period (int): Aggregation period in seconds (default: 60).
        unit (str, optional): Unit of the metric. If None, will be looked up automatically.
        description (bool, optional): If True, print the metric's description and unit.

    Returns:
        pd.DataFrame: DataFrame with columns ['timestamp', 'value', 'unit'].
    """
    metric_info = DEPLOYMENT_METRICS.get(metric_name, {})
    if unit is None:
        unit = metric_info.get("unit", "unknown")
    if description:
        print(f"Metric: {metric_name}")
        print(f"Unit: {unit}")
        print(f"Description: {metric_info.get('description', 'No description available.')}")
        print()
    response = api.time_series_data_list(
        project_name=project_name,
        metric=metric_name,
        start_date=start_date.strftime("%Y-%m-%d %H:%M:%S.%f"),
        end_date=end_date.strftime("%Y-%m-%d %H:%M:%S.%f"),
        aggregation_period=aggregation_period
    )
    return pd.DataFrame([
        {
            "timestamp": pd.to_datetime(dp["start_date"] if isinstance(dp["start_date"], str) else dp["start_date"].isoformat()),
            "value": dp["value"],
            "unit": unit
        }
        for dp in response.to_dict()['data_points']
    ])

In [4]:
pipelines_requests_rate_response = api.time_series_data_list(
    project_name="poc",
    metric="custom.completion_tokens",
    start_date="2025-07-13 00:00:00.00",
    end_date="2025-07-23 09:00:19.65", # Max time range of 365 days
    aggregation_period = 3600, # 1 day
    labels="deployment_version_id:07736fa1-9999-44c1-9dbd-7de83f43663f"
)

In [5]:
pipelines_requests_rate_response

{'aggregation_period': 3600,
 'data_points': [{'end_date': datetime.datetime(2025, 7, 14, 7, 0, tzinfo=tzutc()),
                  'start_date': datetime.datetime(2025, 7, 14, 6, 0, tzinfo=tzutc()),
                  'value': 50.0},
                 {'end_date': datetime.datetime(2025, 7, 14, 8, 0, tzinfo=tzutc()),
                  'start_date': datetime.datetime(2025, 7, 14, 7, 0, tzinfo=tzutc()),
                  'value': 49.5},
                 {'end_date': datetime.datetime(2025, 7, 14, 9, 0, tzinfo=tzutc()),
                  'start_date': datetime.datetime(2025, 7, 14, 8, 0, tzinfo=tzutc()),
                  'value': 173.02083333},
                 {'end_date': datetime.datetime(2025, 7, 14, 10, 0, tzinfo=tzutc()),
                  'start_date': datetime.datetime(2025, 7, 14, 9, 0, tzinfo=tzutc()),
                  'value': 143.66666667},
                 {'end_date': datetime.datetime(2025, 7, 14, 12, 0, tzinfo=tzutc()),
                  'start_date': datetime.datetime(202

In [44]:
active_instances = api.project_instances_list(project_name = PROJECT_NAME)
active_instances.to_dict()['results']

[{'id': '50c39711-325a-487a-baa0-e42633d2b8a8',
  'status': 'running',
  'time_created': datetime.datetime(2025, 7, 22, 7, 20, 23, 325536, tzinfo=tzutc()),
  'time_updated': datetime.datetime(2025, 7, 22, 7, 21, 44, 927080, tzinfo=tzutc()),
  'instance_type': {'id': '32f9a8ba-b187-4200-953b-28870dc5b64c',
   'name': '65536mb_32vcpu_2xl40s',
   'display_name': '65536 MB + 32 vCPU + 2 x NVIDIA L40s'},
  'deployment': 'gemma-3-openai',
  'version': 'v1'},
 {'id': '386ec9a1-92e7-42c1-ac05-2f94a1d8f386',
  'status': 'running',
  'time_created': datetime.datetime(2025, 7, 22, 7, 20, 21, 457796, tzinfo=tzutc()),
  'time_updated': datetime.datetime(2025, 7, 22, 7, 21, 44, 83536, tzinfo=tzutc()),
  'instance_type': {'id': '32f9a8ba-b187-4200-953b-28870dc5b64c',
   'name': '65536mb_32vcpu_2xl40s',
   'display_name': '65536 MB + 32 vCPU + 2 x NVIDIA L40s'},
  'deployment': 'llama-3-3-4bit-awq-openai',
  'version': 'v1'},
 {'id': 'afb92fa4-837a-471c-9234-b4a07f44a4d6',
  'status': 'running',
  'ti

In [59]:
# Get the credit usage of all deployments in your project in intervals of a minute
get_deployment_metric("poc","deployments.credits", start_date, end_date, aggregation_period=3600, description=True)


Metric: deployments.credits
Unit: credits (float)
Description: Usage of Credits



Unnamed: 0,timestamp,value,unit
0,2025-07-22 09:00:00+00:00,0.15,credits (float)
1,2025-07-22 10:00:00+00:00,0.15,credits (float)
2,2025-07-22 11:00:00+00:00,0.15,credits (float)
3,2025-07-22 12:00:00+00:00,0.15,credits (float)
4,2025-07-22 13:00:00+00:00,0.146574,credits (float)
5,2025-07-22 14:00:00+00:00,0.15,credits (float)
6,2025-07-22 15:00:00+00:00,0.15,credits (float)
7,2025-07-22 16:00:00+00:00,0.15,credits (float)
8,2025-07-22 17:00:00+00:00,0.149074,credits (float)
9,2025-07-22 18:00:00+00:00,0.15,credits (float)


In [57]:
# List all deployments in your project
deployments = api.deployment_versions_list(project_name=PROJECT_NAME, deployment_name="gemma-3-openai")
print(deployments)

for version in deployments:
    version_name = version.to_dict()['version']
    # Try to fetch tokens_in and tokens_out metrics
    for metric in ['deployments.tokens_in', 'deployments.tokens_out']:
        try:
            df = get_deployment_metric(
                metric_name=metric,
                start_date=start_date,
                end_date=end_date,
                aggregation_period=60,
                description=True
            )
            print(f"Deployment: {deployment_name}, Version: {version_name}, Metric: {metric}")
            print(df.head())
        except Exception as e:
            print(f"Could not fetch {metric} for {deployment_name} {version_name}: {e}")

[{'active_revision': '8e101cb8-e6be-4717-b186-64ee941fd3df',
 'creation_date': datetime.datetime(2025, 4, 3, 9, 52, 46, 985287, tzinfo=tzutc()),
 'default': True,
 'deployment': 'gemma-3-openai',
 'description': '',
 'environment': 'python3-12-gemma-3-openai-v1-mymvgxsh',
 'environment_display_name': 'Python3.12 - gemma-3-openai - v1 - mymvgxsh',
 'id': 'dbb68df2-0593-4007-af1d-2cb270e3af7c',
 'instance_processes': 10,
 'instance_type': 'deprecated',
 'labels': {'type': 'openwebui'},
 'last_updated': datetime.datetime(2025, 7, 1, 12, 58, 52, 845133, tzinfo=tzutc()),
 'latest_revision': '8e101cb8-e6be-4717-b186-64ee941fd3df',
 'maximum_idle_time': 10,
 'maximum_instances': 1,
 'maximum_queue_size_batch': 100000,
 'maximum_queue_size_express': 100,
 'minimum_instances': 1,
 'ports': [],
 'request_retention_mode': 'metadata',
 'request_retention_time': 2419200,
 'restart_request_interruption': False,
 'scaling_strategy': 'default',
 'static_ip': False,
 'status': 'available',
 'version': 

In [None]:

# Fetch key metrics
avg_duration_df = fetch_metric("deployments.request_duration", start_date, end_date)
total_requests_df = fetch_metric("deployments.requests", start_date, end_date)
# gpu_mem_df = fetch_metric("deployments.memory_utilization", start_date, end_date)
concurrency_df = fetch_metric("deployments.instances", start_date, end_date)

print(avg_duration_df)
# Example visualization
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
plt.title("Average Request Duration (s)")
plt.plot(avg_duration_df["timestamp"], avg_duration_df["value"])

# plt.subplot(2, 2, 2)
# plt.title("GPU Memory Usage (bytes)")
# plt.plot(gpu_mem_df["timestamp"], gpu_mem_df["value"])

plt.subplot(2, 2, 3)
plt.title("Request Concurrency")
plt.plot(concurrency_df["timestamp"], concurrency_df["value"])

plt.subplot(2, 2, 4)
plt.title("Requests per Interval")
plt.plot(total_requests_df["timestamp"], total_requests_df["value"])

plt.tight_layout()
plt.show()
