Skip to content

Commit

Permalink
adding health check to monitoring stream pod
Browse files Browse the repository at this point in the history
  • Loading branch information
Eyal-Danieli committed Sep 5, 2023
1 parent 9589e7f commit fde770e
Showing 1 changed file with 41 additions and 27 deletions.
68 changes: 41 additions & 27 deletions mlrun/model_monitoring/model_monitoring_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import numpy as np
import pandas as pd
import requests
import v3io
import v3io.dataplane
import v3io_frames
Expand Down Expand Up @@ -962,40 +963,53 @@ def _update_drift_in_prometheus(
)
)

statistical_metrics = ["hellinger_mean", "tvd_mean", "kld_mean"]
metrics = []
for metric in statistical_metrics:
metrics.append(
{
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
mlrun.common.schemas.model_monitoring.EventFieldType.METRIC: metric,
mlrun.common.schemas.model_monitoring.EventFieldType.VALUE: drift_result[
metric
],
}
)

http_session = mlrun.utils.HTTPSessionWithRetry(
retry_on_post=True,
verbose=True,
max_retries=1,
)
try:
# Model monitoring stream http health check
http_session.request("GET", url=stream_http_path)

# Update statistical metrics
statistical_metrics = ["hellinger_mean", "tvd_mean", "kld_mean"]
metrics = []
for metric in statistical_metrics:
metrics.append(
{
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
mlrun.common.schemas.model_monitoring.EventFieldType.METRIC: metric,
mlrun.common.schemas.model_monitoring.EventFieldType.VALUE: drift_result[
metric
],
}
)

http_session.request(
method="POST",
url=stream_http_path + "/monitoring-batch-metrics",
data=json.dumps(metrics),
)
http_session.request(
method="POST",
url=stream_http_path + "/monitoring-batch-metrics",
data=json.dumps(metrics),
)

drift_status_dict = {
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
mlrun.common.schemas.model_monitoring.EventFieldType.DRIFT_STATUS: drift_status.value,
}
# Update drift status
drift_status_dict = {
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
mlrun.common.schemas.model_monitoring.EventFieldType.DRIFT_STATUS: drift_status.value,
}

http_session.request(
method="POST",
url=stream_http_path + "/monitoring-drift-status",
data=json.dumps(drift_status_dict),
)
http_session.request(
method="POST",
url=stream_http_path + "/monitoring-drift-status",
data=json.dumps(drift_status_dict),
)

except requests.exceptions.ConnectionError as exc:
logger.warning(
"Can't push metrics to Prometheus registry."
"Monitoring stream is not found, probably not deployed: ",
exc=exc,
)


def handler(context: mlrun.run.MLClientCtx):
Expand Down

0 comments on commit fde770e

Please sign in to comment.