InftyAI · InftyAI-Agent · Apr 7, 2026 · Apr 7, 2026
diff --git a/alphatrion/tracing/prometheus_exporter.py b/alphatrion/tracing/prometheus_exporter.py
@@ -68,87 +68,87 @@ def _init_metrics(self):
         self.llm_tokens_total = Counter(
             "llm_tokens_total",
             "Total LLM tokens consumed",
-            ["team_id", "user_id", "experiment_id", "model", "token_type"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model", "token_type"],
             registry=self.registry,
         )
 
         self.llm_input_tokens_total = Counter(
             "llm_input_tokens_total",
             "Total LLM input tokens consumed",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_output_tokens_total = Counter(
             "llm_output_tokens_total",
             "Total LLM output tokens consumed",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_cache_read_input_tokens_total = Counter(
             "llm_cache_read_input_tokens_total",
             "Total LLM cache read input tokens",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_cache_creation_input_tokens_total = Counter(
             "llm_cache_creation_input_tokens_total",
             "Total LLM cache creation input tokens",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         # Cost metrics
         self.llm_cost_total = Counter(
             "llm_cost_total",
             "Total LLM cost in USD",
-            ["team_id", "user_id", "experiment_id", "model", "cost_type"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model", "cost_type"],
             registry=self.registry,
         )
 
         self.llm_input_cost_total = Counter(
             "llm_input_cost_total",
             "Total LLM input cost in USD",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_output_cost_total = Counter(
             "llm_output_cost_total",
             "Total LLM output cost in USD",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_cache_read_cost_total = Counter(
             "llm_cache_read_cost_total",
             "Total LLM cache read cost in USD",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         self.llm_cache_creation_cost_total = Counter(
             "llm_cache_creation_cost_total",
             "Total LLM cache creation cost in USD",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             registry=self.registry,
         )
 
         # Request metrics
         self.llm_requests_total = Counter(
             "llm_requests_total",
             "Total number of LLM requests",
-            ["team_id", "user_id", "experiment_id", "model", "status"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model", "status"],
             registry=self.registry,
         )
 
         # Latency metrics
         self.llm_request_duration_seconds = Histogram(
             "llm_request_duration_seconds",
             "LLM request duration in seconds",
-            ["team_id", "user_id", "experiment_id", "model"],
+            ["org_id", "team_id", "user_id", "experiment_id", "model"],
             buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0],
             registry=self.registry,
         )
@@ -189,6 +189,7 @@ def _process_span(self, span: ReadableSpan):
                 return
 
             attributes = {k: str(v) for k, v in span.attributes.items()}
+            org_id = attributes.get("org_id", "unknown")
             team_id = attributes.get("team_id", "unknown")
             user_id = attributes.get("user_id", "unknown")
             experiment_id = attributes.get("experiment_id", "unknown")
@@ -205,7 +206,7 @@ def _process_span(self, span: ReadableSpan):
                 self.llm_errors_total.labels(error_type=error_type).inc()
 
             self._process_llm_span(
-                span, attributes, team_id, user_id, experiment_id, duration, status
+                span, attributes, org_id, team_id, user_id, experiment_id, duration, status
             )
 
         except Exception as e:
@@ -236,6 +237,7 @@ def _process_llm_span(
         self,
         span: ReadableSpan,
         attributes: dict[str, str],
+        org_id: str,
         team_id: str,
         user_id: str,
         experiment_id: str,
@@ -260,6 +262,7 @@ def _process_llm_span(
 
         if total_tokens > 0:
             self.llm_tokens_total.labels(
+                org_id=org_id,
                 team_id=team_id,
                 user_id=user_id,
                 experiment_id=experiment_id,
@@ -269,9 +272,10 @@ def _process_llm_span(
 
         if input_tokens > 0:
             self.llm_input_tokens_total.labels(
-                team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
             ).inc(input_tokens)
             self.llm_tokens_total.labels(
+                org_id=org_id,
                 team_id=team_id,
                 user_id=user_id,
                 experiment_id=experiment_id,
@@ -281,9 +285,10 @@ def _process_llm_span(
 
         if output_tokens > 0:
             self.llm_output_tokens_total.labels(
-                team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
             ).inc(output_tokens)
             self.llm_tokens_total.labels(
+                org_id=org_id,
                 team_id=team_id,
                 user_id=user_id,
                 experiment_id=experiment_id,
@@ -293,9 +298,10 @@ def _process_llm_span(
 
         if cache_read_input_tokens > 0:
             self.llm_cache_read_input_tokens_total.labels(
-                team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
             ).inc(cache_read_input_tokens)
             self.llm_tokens_total.labels(
+                org_id=org_id,
                 team_id=team_id,
                 user_id=user_id,
                 experiment_id=experiment_id,
@@ -305,9 +311,10 @@ def _process_llm_span(
 
         if cache_creation_input_tokens > 0:
             self.llm_cache_creation_input_tokens_total.labels(
-                team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
             ).inc(cache_creation_input_tokens)
             self.llm_tokens_total.labels(
+                org_id=org_id,
                 team_id=team_id,
                 user_id=user_id,
                 experiment_id=experiment_id,
@@ -329,6 +336,7 @@ def _process_llm_span(
 
             if total_cost > 0:
                 self.llm_cost_total.labels(
+                    org_id=org_id,
                     team_id=team_id,
                     user_id=user_id,
                     experiment_id=experiment_id,
@@ -338,35 +346,35 @@ def _process_llm_span(
 
             if input_cost > 0:
                 self.llm_input_cost_total.labels(
-                    team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                    org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
                 ).inc(input_cost)
 
             if output_cost > 0:
                 self.llm_output_cost_total.labels(
-                    team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                    org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
                 ).inc(output_cost)
 
             if cache_read_cost > 0:
                 self.llm_cache_read_cost_total.labels(
-                    team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                    org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
                 ).inc(cache_read_cost)
 
             if cache_creation_cost > 0:
                 self.llm_cache_creation_cost_total.labels(
-                    team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+                    org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
                 ).inc(cache_creation_cost)
 
         except (ValueError, TypeError) as e:
             logger.debug(f"No cost data available for span: {e}")
 
         # Request count
         self.llm_requests_total.labels(
-            team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model, status=status
+            org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model, status=status
         ).inc()
 
         # Duration
         self.llm_request_duration_seconds.labels(
-            team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
+            org_id=org_id, team_id=team_id, user_id=user_id, experiment_id=experiment_id, model=model
         ).observe(duration)
 
     def _push_metrics(self):

diff --git a/docs/prometheus-integration.md b/docs/prometheus-integration.md
@@ -81,44 +81,44 @@ async with experiment.CraftExperiment.start(name="my_experiment") as exp:
 ### LLM Token Metrics
 
 - **`llm_tokens_total`** - Total LLM tokens consumed
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`, `token_type` (input/output/cache_read_input/cache_creation_input/total)
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `token_type` (input/output/cache_read_input/cache_creation_input/total)
 
 - **`llm_input_tokens_total`** - Total input tokens
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_output_tokens_total`** - Total output tokens
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_cache_read_input_tokens_total`** - Total cache read input tokens
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_cache_creation_input_tokens_total`** - Total cache creation input tokens
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 ### LLM Cost Metrics (USD)
 
 - **`llm_cost_total`** - Total LLM cost in USD
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`, `cost_type` (total)
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `cost_type` (total)
 
 - **`llm_input_cost_total`** - Total input token cost in USD
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_output_cost_total`** - Total output token cost in USD
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_cache_read_cost_total`** - Total cache read cost in USD
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 - **`llm_cache_creation_cost_total`** - Total cache creation cost in USD
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
 
 ### LLM Request Metrics
 
 - **`llm_requests_total`** - Total number of LLM requests
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`, `status`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`, `status`
 
 - **`llm_request_duration_seconds`** - LLM request duration histogram
-  - Labels: `team_id`, `user_id`, `experiment_id`, `model`
+  - Labels: `org_id`, `team_id`, `user_id`, `experiment_id`, `model`
   - Buckets: 0.1s, 0.5s, 1s, 2s, 5s, 10s, 30s, 60s
 
 ### Error Tracking
@@ -228,6 +228,18 @@ The platform dashboard provides a comprehensive view combining operational healt
 Create your own panels with queries like:
 
 ```promql
+# Cost by organization
+sum by (org_id) (llm_cost_total{cost_type="total"})
+
+# Cost by team within org
+sum by (org_id, team_id) (llm_cost_total{cost_type="total"})
+
+# Top 10 users by cost
+topk(10, sum by (user_id) (llm_cost_total{cost_type="total"}))
+
+# Specific user's cost
+sum(llm_cost_total{user_id="user123", cost_type="total"})
+
 # Token usage by experiment
 sum by (experiment_id) (llm_tokens_total{token_type="total"})
 
@@ -238,7 +250,7 @@ sum by (experiment_id) (llm_cost_total{cost_type="total"})
 rate(llm_requests_total{team_id="YOUR_TEAM_ID"}[5m])
 
 # Average latency
-rate(llm_duration_seconds_sum[5m]) / rate(llm_duration_seconds_count[5m])
+rate(llm_request_duration_seconds_sum[5m]) / rate(llm_request_duration_seconds_count[5m])
 
 # Success rate
 sum(rate(llm_requests_total{status="OK"}[5m])) / sum(rate(llm_requests_total[5m]))
@@ -258,6 +270,9 @@ sum by (team_id) (llm_errors_total)
 # Count unique experiments (derived metric)
 count(sum by (experiment_id) (llm_requests_total))
 
+# Per-user cost within a specific org
+sum by (user_id) (llm_cost_total{org_id="org123", cost_type="total"})
+
 # Count unique teams (derived metric)
 count(sum by (team_id) (llm_requests_total))
 
@@ -306,17 +321,22 @@ tracer_provider.add_span_processor(BatchSpanProcessor(prometheus_exporter))
 
 The implementation balances observability with Prometheus performance. Metrics are aggregated by:
 
-- `team_id` - Organization/team level (low cardinality)
+- `org_id` - Organization level (very low cardinality)
+- `team_id` - Team level within org (low cardinality)
 - `user_id` - User level for per-user cost tracking (medium cardinality)
 - `experiment_id` - Experiment level (medium-high cardinality)
 - `model` - AI model being used (low cardinality)
 - Other minimal dimensions (`status`, `token_type`)
 
 **Cardinality Considerations:**
-- `user_id` is included to enable per-user cost tracking and billing
-- In high-user environments (1000+ users), consider aggregating costs by team in Prometheus and using ClickHouse for detailed per-user breakdowns
+- `org_id` enables multi-tenant deployments and per-organization billing
+- `team_id` allows tracking within organizations
+- `user_id` enables per-user cost tracking and billing within teams
+- In high-user environments (1000+ users per org), consider aggregating costs by team/org in Prometheus and using ClickHouse for detailed per-user breakdowns
 - Labels like `run_id`, `span_kind`, and `semantic_kind` are intentionally excluded
 
+**Label Hierarchy:** `org_id` > `team_id` > `user_id` > `experiment_id`
+
 For detailed trace analysis and span classification, use the ClickHouse trace store which is optimized for high-cardinality data.
 
 ## Troubleshooting