Skip to content

Commit

Permalink
Remove namespace from Prometheus metric names (#550)
Browse files Browse the repository at this point in the history
* Format agent metrics function calls with one parameter per line

* Format gateway metrics function calls with one param per line

* Remove namespace arg to Prometheus metric opts

* Remove unused namespace param from metrics tracking structs

* Do not pass namespaces to functions no longer requiring this

* Remove unused constant in metrics test

* Update Grafana dashboard to use namespace as a label rather than a metric prefix

* Fix alignment & whitespace in scheduler Grafana dashboard

* Fix wrong metrics port for Triton in Compose with bridge network
  • Loading branch information
agrski authored Nov 2, 2022
1 parent 63a6818 commit a75754e
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 155 deletions.
38 changes: 19 additions & 19 deletions prometheus/dashboards/seldon.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "count (seldon_mesh_seldon_loaded_model_memory_bytes_gauge >0 )",
"expr": "count (seldon_loaded_model_memory_bytes_gauge{namespace='seldon-mesh'} > 0 )",
"hide": false,
"interval": "",
"legendFormat": "In-memory",
Expand All @@ -124,7 +124,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum (seldon_mesh_seldon_loaded_model_gauge)",
"expr": "sum (seldon_loaded_model_gauge{namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Registered",
Expand Down Expand Up @@ -183,7 +183,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum by(server) (seldon_mesh_seldon_loaded_model_gauge)",
"expr": "sum by(server) (seldon_loaded_model_gauge{namespace='seldon-mesh'})",
"format": "table",
"instant": true,
"interval": "",
Expand Down Expand Up @@ -266,7 +266,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "count by(server) (seldon_mesh_seldon_loaded_model_memory_bytes_gauge > 0)",
"expr": "count by(server) (seldon_loaded_model_memory_bytes_gauge{namespace='seldon-mesh'} > 0)",
"format": "table",
"instant": true,
"interval": "",
Expand Down Expand Up @@ -359,7 +359,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum by(server) (seldon_mesh_seldon_loaded_model_memory_bytes_gauge) / sum by(server) (seldon_mesh_seldon_server_replica_memory_capacity_overcommit_bytes_gauge)",
"expr": "sum by(server) (seldon_loaded_model_memory_bytes_gauge{namespace='seldon-mesh'}) / sum by(server) (seldon_server_replica_memory_capacity_overcommit_bytes_gauge{namespace='seldon-mesh'})",
"format": "table",
"hide": false,
"instant": false,
Expand Down Expand Up @@ -522,7 +522,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(rate(seldon_mesh_seldon_cache_evict_count[1m]))",
"expr": "sum(rate(seldon_cache_evict_count{namespace='seldon-mesh'}[1m]))",
"format": "time_series",
"instant": false,
"interval": "",
Expand All @@ -533,7 +533,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(rate(seldon_mesh_seldon_cache_miss_count[1m]))",
"expr": "sum(rate(seldon_cache_miss_count{namespace='seldon-mesh'}[1m]))",
"hide": false,
"interval": "",
"legendFormat": "Miss Rate",
Expand Down Expand Up @@ -617,7 +617,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum by (server) (rate(seldon_mesh_seldon_load_model_counter[1m]))",
"expr": "sum by (server) (rate(seldon_load_model_counter{namespace='seldon-mesh'}[1m]))",
"format": "time_series",
"instant": false,
"interval": "",
Expand All @@ -628,7 +628,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum by (server) (rate(seldon_mesh_seldon_unload_model_counter[1m]))",
"expr": "sum by (server) (rate(seldon_unload_model_counter{namespace='seldon-mesh'}[1m]))",
"format": "time_series",
"hide": false,
"instant": false,
Expand Down Expand Up @@ -719,7 +719,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_server_replica_memory_capacity_bytes_gauge{server=\"triton\"})",
"expr": "sum(seldon_server_replica_memory_capacity_bytes_gauge{server=\"triton\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Capacity",
Expand All @@ -728,7 +728,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_loaded_model_memory_bytes_gauge{server=\"triton\"})",
"expr": "sum(seldon_loaded_model_memory_bytes_gauge{server=\"triton\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Used",
Expand All @@ -737,7 +737,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_server_replica_memory_capacity_overcommit_bytes_gauge{server=\"triton\"})",
"expr": "sum(seldon_server_replica_memory_capacity_overcommit_bytes_gauge{server=\"triton\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Capacity with Over-commit",
Expand All @@ -746,7 +746,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_loaded_model_memory_bytes_gauge{server=\"triton\"}) + sum(seldon_mesh_seldon_evicted_model_memory_bytes_gauge{server=\"triton\"})",
"expr": "sum(seldon_loaded_model_memory_bytes_gauge{server=\"triton\", namespace='seldon-mesh'}) + sum(seldon_evicted_model_memory_bytes_gauge{server=\"triton\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Used with Over-commit",
Expand Down Expand Up @@ -835,7 +835,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_server_replica_memory_capacity_bytes_gauge{server=\"mlserver\"})",
"expr": "sum(seldon_server_replica_memory_capacity_bytes_gauge{server=\"mlserver\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Capacity",
Expand All @@ -844,7 +844,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_loaded_model_memory_bytes_gauge{server=\"mlserver\"})",
"expr": "sum(seldon_loaded_model_memory_bytes_gauge{server=\"mlserver\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Used",
Expand All @@ -853,7 +853,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_server_replica_memory_capacity_overcommit_bytes_gauge{server=\"mlserver\"})",
"expr": "sum(seldon_server_replica_memory_capacity_overcommit_bytes_gauge{server=\"mlserver\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Capacity with Over-commit",
Expand All @@ -862,7 +862,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "sum(seldon_mesh_seldon_loaded_model_memory_bytes_gauge{server=\"mlserver\"}) + sum(seldon_mesh_seldon_evicted_model_memory_bytes_gauge{server=\"mlserver\"})",
"expr": "sum(seldon_loaded_model_memory_bytes_gauge{server=\"mlserver\", namespace='seldon-mesh'}) + sum(seldon_evicted_model_memory_bytes_gauge{server=\"mlserver\", namespace='seldon-mesh'})",
"hide": false,
"interval": "",
"legendFormat": "Used with Over-commit",
Expand Down Expand Up @@ -1046,7 +1046,7 @@
{
"datasource": "${DS_DS_PROMETHEUS}",
"exemplar": true,
"expr": "avg((rate(seldon_mesh_seldon_model_aggregate_infer_seconds_total{container=\"agent\"}[1m]) / rate(seldon_mesh_seldon_model_aggregate_infer_total{container=\"agent\"}[1m])) > 0 ) by (server, method_type)",
"expr": "avg((rate(seldon_model_aggregate_infer_seconds_total{container=\"agent\", namespace='seldon-mesh'}[1m]) / rate(seldon_model_aggregate_infer_total{container=\"agent\", namespace='seldon-mesh'}[1m])) > 0 ) by (server, method_type)",
"hide": false,
"interval": "",
"legendFormat": "{{server}}_{{method_type}}_avg",
Expand Down Expand Up @@ -1166,4 +1166,4 @@
"title": "Seldon Core Model Mesh Monitoring",
"uid": "y5MkDIkVz",
"version": 1
}
}
2 changes: 1 addition & 1 deletion scheduler/cmd/agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func main() {
// Create V2 Protocol Handler
v2Client := agent.NewV2Client(cli.InferenceHost, cli.InferenceGrpcPort, logger, true)

promMetrics, err := metrics.NewPrometheusModelMetrics(cli.ServerName, cli.ReplicaIdx, cli.Namespace, logger)
promMetrics, err := metrics.NewPrometheusModelMetrics(cli.ServerName, cli.ReplicaIdx, logger)
if err != nil {
logger.WithError(err).Fatalf("Can't create prometheus metrics")
}
Expand Down
2 changes: 1 addition & 1 deletion scheduler/cmd/pipelinegateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func main() {
}
defer km.Stop()

promMetrics, err := metrics.NewPrometheusPipelineMetrics(namespace, logger)
promMetrics, err := metrics.NewPrometheusPipelineMetrics(logger)
if err != nil {
logger.WithError(err).Fatalf("Can't create prometheus metrics")
}
Expand Down
Loading

0 comments on commit a75754e

Please sign in to comment.