Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions alphatrion/tracing/prometheus_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,87 +68,87 @@ def _init_metrics(self):
self.llm_tokens_total = Counter(
"llm_tokens_total",
"Total LLM tokens consumed",
["team_id", "user_id", "experiment_id", "model", "token_type"],
["org_id", "team_id", "user_id", "experiment_id", "model", "token_type"],
registry=self.registry,
)

self.llm_input_tokens_total = Counter(
"llm_input_tokens_total",
"Total LLM input tokens consumed",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_output_tokens_total = Counter(
"llm_output_tokens_total",
"Total LLM output tokens consumed",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_cache_read_input_tokens_total = Counter(
"llm_cache_read_input_tokens_total",
"Total LLM cache read input tokens",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_cache_creation_input_tokens_total = Counter(
"llm_cache_creation_input_tokens_total",
"Total LLM cache creation input tokens",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

# Cost metrics
self.llm_cost_total = Counter(
"llm_cost_total",
"Total LLM cost in USD",
["team_id", "user_id", "experiment_id", "model", "cost_type"],
["org_id", "team_id", "user_id", "experiment_id", "model", "cost_type"],
registry=self.registry,
)

self.llm_input_cost_total = Counter(
"llm_input_cost_total",
"Total LLM input cost in USD",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_output_cost_total = Counter(
"llm_output_cost_total",
"Total LLM output cost in USD",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_cache_read_cost_total = Counter(
"llm_cache_read_cost_total",
"Total LLM cache read cost in USD",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

self.llm_cache_creation_cost_total = Counter(
"llm_cache_creation_cost_total",
"Total LLM cache creation cost in USD",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
registry=self.registry,
)

# Request metrics
self.llm_requests_total = Counter(
"llm_requests_total",
"Total number of LLM requests",
["team_id", "user_id", "experiment_id", "model", "status"],
["org_id", "team_id", "user_id", "experiment_id", "model", "status"],
registry=self.registry,
)

# Latency metrics
self.llm_request_duration_seconds = Histogram(
"llm_request_duration_seconds",
"LLM request duration in seconds",
["team_id", "user_id", "experiment_id", "model"],
["org_id", "team_id", "user_id", "experiment_id", "model"],
buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0],
registry=self.registry,
)
Expand Down Expand Up @@ -189,6 +189,7 @@ def _process_span(self, span: ReadableSpan):
return

attributes = {k: str(v) for k, v in span.attributes.items()}
org_id = attributes.get("org_id", "unknown")
team_id = attributes.get("team_id", "unknown")
user_id = attributes.get("user_id", "unknown")
experiment_id = attributes.get("experiment_id", "unknown")
Expand All @@ -205,7 +206,7 @@ def _process_span(self, span: ReadableSpan):
self.llm_errors_total.labels(error_type=error_type).inc()

self._process_llm_span(
span, attributes, team_id, user_id, experiment_id, duration, status
span, attributes, org_id, team_id, user_id, experiment_id, duration, status
)

except Exception as e:
Expand Down Expand Up @@ -236,6 +237,7 @@ def _process_llm_span(
self,
span: ReadableSpan,
attributes: dict[str, str],
org_id: str,
team_id: str,
user_id: str,
experiment_id: str,
Expand All @@ -260,6 +262,7 @@ def _process_llm_span(

if total_tokens > 0:
self.llm_tokens_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -269,9 +272,10 @@ def _process_llm_span(

if input_tokens > 0:
self.llm_input_tokens_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(input_tokens)
self.llm_tokens_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -281,9 +285,10 @@ def _process_llm_span(

if output_tokens > 0:
self.llm_output_tokens_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(output_tokens)
self.llm_tokens_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -293,9 +298,10 @@ def _process_llm_span(

if cache_read_input_tokens > 0:
self.llm_cache_read_input_tokens_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(cache_read_input_tokens)
self.llm_tokens_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -305,9 +311,10 @@ def _process_llm_span(

if cache_creation_input_tokens > 0:
self.llm_cache_creation_input_tokens_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(cache_creation_input_tokens)
self.llm_tokens_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -329,6 +336,7 @@ def _process_llm_span(

if total_cost > 0:
self.llm_cost_total.labels(
org_id=org_id,
team_id=team_id,
user_id=user_id,
experiment_id=experiment_id,
Expand All @@ -338,35 +346,35 @@ def _process_llm_span(

if input_cost > 0:
self.llm_input_cost_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(input_cost)

if output_cost > 0:
self.llm_output_cost_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(output_cost)

if cache_read_cost > 0:
self.llm_cache_read_cost_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(cache_read_cost)

if cache_creation_cost > 0:
self.llm_cache_creation_cost_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).inc(cache_creation_cost)

except (ValueError, TypeError) as e:
logger.debug(f"No cost data available for span: {e}")

# Request count
self.llm_requests_total.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model, status=status
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model, status=status
).inc()

# Duration
self.llm_request_duration_seconds.labels(
team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
).observe(duration)

def _push_metrics(self):
Expand Down
52 changes: 36 additions & 16 deletions docs/prometheus-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,44 +81,44 @@ async with experiment.CraftExperiment.start(name="my_experiment") as exp:
### LLM Token Metrics

- **`llm_tokens_total`** - Total LLM tokens consumed
- Labels: `team_id`, `user_id`, `experiment_id`, `model`, `token_type` (input/output/cache_read_input/cache_creation_input/total)
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `token_type` (input/output/cache_read_input/cache_creation_input/total)

- **`llm_input_tokens_total`** - Total input tokens
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_output_tokens_total`** - Total output tokens
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_cache_read_input_tokens_total`** - Total cache read input tokens
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_cache_creation_input_tokens_total`** - Total cache creation input tokens
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

### LLM Cost Metrics (USD)

- **`llm_cost_total`** - Total LLM cost in USD
- Labels: `team_id`, `user_id`, `experiment_id`, `model`, `cost_type` (total)
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `cost_type` (total)

- **`llm_input_cost_total`** - Total input token cost in USD
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_output_cost_total`** - Total output token cost in USD
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_cache_read_cost_total`** - Total cache read cost in USD
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

- **`llm_cache_creation_cost_total`** - Total cache creation cost in USD
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`

### LLM Request Metrics

- **`llm_requests_total`** - Total number of LLM requests
- Labels: `team_id`, `user_id`, `experiment_id`, `model`, `status`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `status`

- **`llm_request_duration_seconds`** - LLM request duration histogram
- Labels: `team_id`, `user_id`, `experiment_id`, `model`
- Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
- Buckets: 0.1s, 0.5s, 1s, 2s, 5s, 10s, 30s, 60s

### Error Tracking
Expand Down Expand Up @@ -228,6 +228,18 @@ The platform dashboard provides a comprehensive view combining operational healt
Create your own panels with queries like:

```promql
# Cost by organization
sum by (org_id) (llm_cost_total{cost_type="total"})

# Cost by team within org
sum by (org_id, team_id) (llm_cost_total{cost_type="total"})

# Top 10 users by cost
topk(10, sum by (user_id) (llm_cost_total{cost_type="total"}))

# Specific user's cost
sum(llm_cost_total{user_id="user123", cost_type="total"})

# Token usage by experiment
sum by (experiment_id) (llm_tokens_total{token_type="total"})

Expand All @@ -238,7 +250,7 @@ sum by (experiment_id) (llm_cost_total{cost_type="total"})
rate(llm_requests_total{team_id="YOUR_TEAM_ID"}[5m])

# Average latency
rate(llm_duration_seconds_sum[5m]) / rate(llm_duration_seconds_count[5m])
rate(llm_request_duration_seconds_sum[5m]) / rate(llm_request_duration_seconds_count[5m])

# Success rate
sum(rate(llm_requests_total{status="OK"}[5m])) / sum(rate(llm_requests_total[5m]))
Expand All @@ -258,6 +270,9 @@ sum by (team_id) (llm_errors_total)
# Count unique experiments (derived metric)
count(sum by (experiment_id) (llm_requests_total))

# Per-user cost within a specific org
sum by (user_id) (llm_cost_total{org_id="org123", cost_type="total"})

# Count unique teams (derived metric)
count(sum by (team_id) (llm_requests_total))

Expand Down Expand Up @@ -306,17 +321,22 @@ tracer_provider.add_span_processor(BatchSpanProcessor(prometheus_exporter))

The implementation balances observability with Prometheus performance. Metrics are aggregated by:

- `team_id` - Organization/team level (low cardinality)
- `org_id` - Organization level (very low cardinality)
- `team_id` - Team level within org (low cardinality)
- `user_id` - User level for per-user cost tracking (medium cardinality)
- `experiment_id` - Experiment level (medium-high cardinality)
- `model` - AI model being used (low cardinality)
- Other minimal dimensions (`status`, `token_type`)

**Cardinality Considerations:**
- `user_id` is included to enable per-user cost tracking and billing
- In high-user environments (1000+ users), consider aggregating costs by team in Prometheus and using ClickHouse for detailed per-user breakdowns
- `org_id` enables multi-tenant deployments and per-organization billing
- `team_id` allows tracking within organizations
- `user_id` enables per-user cost tracking and billing within teams
- In high-user environments (1000+ users per org), consider aggregating costs by team/org in Prometheus and using ClickHouse for detailed per-user breakdowns
- Labels like `run_id`, `span_kind`, and `semantic_kind` are intentionally excluded

**Label Hierarchy:** `org_id` > `team_id` > `user_id` > `experiment_id`

For detailed trace analysis and span classification, use the ClickHouse trace store which is optimized for high-cardinality data.

## Troubleshooting
Expand Down
Loading