diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt index 01b05a6d8..ce2285a26 100644 --- a/dashboard/requirements.txt +++ b/dashboard/requirements.txt @@ -1,2 +1 @@ -httpx==0.24.0 markdown==3.4.1 diff --git a/docker-compose.yaml b/docker-compose.yaml index 3ae253749..8f1852488 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -86,6 +86,8 @@ services: DOCKER_HOST: tcp://docker-engine:2375 CONFIG_FILE: /etc/racetrack/image-builder/config.yaml LIFECYCLE_AUTH_TOKEN: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzZWVkIjoiZGVhNzYzMDAtN2JhYy00ODRjLTkyOTYtNWQxMGNkOTM3YTU4Iiwic3ViamVjdCI6ImltYWdlLWJ1aWxkZXIiLCJzdWJqZWN0X3R5cGUiOiJpbnRlcm5hbCIsInNjb3BlcyI6WyJmdWxsX2FjY2VzcyJdfQ.ND3wDeK58L5T1jIYcuArQ5O3M0Ez3_pCAEi5NXD_hLY + stdin_open: true + tty: true dashboard: container_name: dashboard @@ -110,6 +112,8 @@ services: EXTERNAL_GRAFANA_URL: 'http://127.0.0.1:3100' AUTH_REQUIRED: 'true' SITE_NAME: '' + stdin_open: true + tty: true pub: container_name: pub diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3063114dc..3620f867e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Changed - "Connection refused" error is now retriable in async job calls (will be retried automatically). + ([#459](https://github.com/TheRacetrack/racetrack/issues/459)) + +### Fixed +- Long deployments (over 20 minutes) no longer get stuck in the final stage. ([#448](https://github.com/TheRacetrack/racetrack/issues/448)) ## [2.29.2] - 2024-04-30 diff --git a/image_builder/image_builder/api.py b/image_builder/image_builder/api.py index bce531f36..26fd775e8 100644 --- a/image_builder/image_builder/api.py +++ b/image_builder/image_builder/api.py @@ -15,7 +15,11 @@ from racetrack_commons.api.asgi.asgi_server import serve_asgi_app from racetrack_commons.api.asgi.fastapi import create_fastapi from racetrack_commons.api.metrics import setup_metrics_endpoint +from racetrack_commons.api.server_sent_events import stream_result_with_heartbeat from racetrack_commons.plugin.engine import PluginEngine +from racetrack_client.log.logs import get_logger + +logger = get_logger(__name__) def run_api_server(): @@ -146,3 +150,27 @@ def _build(payload: BuildPayloadModel): 'logs': logs, 'error': error, } + + @api.post('/build/sse', response_model=BuildingResultModel) + def _build_server_sent_events(payload: BuildPayloadModel): + """Stream events of building a Job image from a manifest""" + manifest = load_manifest_from_dict(payload.manifest) + git_credentials = load_credentials_from_dict(payload.git_credentials.model_dump() if payload.git_credentials else None) + tag = payload.tag + secret_build_env = payload.secret_build_env or {} + build_context = payload.build_context + deployment_id = payload.deployment_id + build_flags = payload.build_flags + + def _result_runner() -> Dict: + image_names, logs, error = build_job_image( + config, manifest, git_credentials, secret_build_env, tag, + build_context, deployment_id, plugin_engine, build_flags, + ) + return { + 'image_names': image_names, + 'logs': logs, + 'error': error, + } + + return stream_result_with_heartbeat(_result_runner) diff --git a/lifecycle/lifecycle/deployer/builder.py b/lifecycle/lifecycle/deployer/builder.py index c8cc48325..7b141acbc 100644 --- a/lifecycle/lifecycle/deployer/builder.py +++ b/lifecycle/lifecycle/deployer/builder.py @@ -10,7 +10,7 @@ from lifecycle.job.deployment import create_deployment, save_deployment_build_logs, save_deployment_image_name, save_deployment_result, save_deployment_phase from racetrack_client.client_config.client_config import Credentials from racetrack_client.client.env import SecretVars -from racetrack_client.utils.request import parse_response_object, Requests, RequestError +from racetrack_client.utils.request import Requests, RequestError from racetrack_client.utils.datamodel import datamodel_to_dict from racetrack_client.utils.time import now from racetrack_client.log.context_error import wrap_context @@ -20,6 +20,7 @@ from racetrack_commons.deploy.image import get_job_image from racetrack_commons.entities.dto import DeploymentDto, DeploymentStatus from racetrack_commons.plugin.engine import PluginEngine +from racetrack_commons.api.server_sent_events import make_sse_request logger = get_logger(__name__) @@ -55,17 +56,14 @@ def _send_image_build_request( """ logger.info(f'building a job by image-builder, deployment ID: {deployment.id}') # see `image_builder.api._setup_api_endpoints` - r = Requests.post( - f'{config.image_builder_url}/api/v1/build', - json=_build_image_request_payload(manifest, git_credentials, secret_build_env, tag, build_context, deployment, build_flags), - ) + payload = _build_image_request_payload(manifest, git_credentials, secret_build_env, tag, build_context, deployment, build_flags) + result: Dict = make_sse_request('POST', f'{config.image_builder_url}/api/v1/build/sse', payload) logger.debug(f'image-builder finished building a job, deployment ID: {deployment.id}') - response = parse_response_object(r, 'Image builder API error') - build_logs: str = response['logs'] + build_logs: str = result['logs'] image_name = get_job_image(config.docker_registry, config.docker_registry_namespace, manifest.name, tag) save_deployment_build_logs(deployment.id, build_logs) save_deployment_image_name(deployment.id, image_name) - error: str = response['error'] + error: str = result['error'] if error: raise RuntimeError(error) logger.info(f'job image {image_name} has been built, deployment ID: {deployment.id}') diff --git a/lifecycle/lifecycle/deployer/deploy.py b/lifecycle/lifecycle/deployer/deploy.py index 7c289aa66..c8a208a0b 100644 --- a/lifecycle/lifecycle/deployer/deploy.py +++ b/lifecycle/lifecycle/deployer/deploy.py @@ -23,6 +23,7 @@ from lifecycle.job.audit import AuditLogger from lifecycle.job.deployment import create_deployment, save_deployment_result from lifecycle.job.models_registry import job_exists, find_deleted_job +from lifecycle.server.metrics import metric_done_job_deployments logger = get_logger(__name__) @@ -99,8 +100,12 @@ def deploy_job_in_background( Schedule deployment of a job in background :return: deployment ID """ - infra_target = determine_infrastructure_name(config, plugin_engine, manifest) - deployment = create_deployment(manifest, username, infra_target) + try: + infra_target = determine_infrastructure_name(config, plugin_engine, manifest) + deployment = create_deployment(manifest, username, infra_target) + except BaseException as e: + metric_done_job_deployments.inc() + raise e logger.info(f'starting deployment {deployment.id} in background') args = (config, manifest, git_credentials, secret_vars, deployment, build_context, force, plugin_engine, auth_subject, build_flags) @@ -138,6 +143,8 @@ def deploy_job_saving_result( job_name=deployment.job_name, job_version=deployment.job_version, ) + finally: + metric_done_job_deployments.inc() def _protect_job_overwriting(manifest: Manifest, force: bool): diff --git a/lifecycle/lifecycle/server/metrics.py b/lifecycle/lifecycle/server/metrics.py index 99279293d..5dd1a59c6 100644 --- a/lifecycle/lifecycle/server/metrics.py +++ b/lifecycle/lifecycle/server/metrics.py @@ -10,7 +10,14 @@ from lifecycle.server.db_status import database_status -metric_requested_job_deployments = Counter('requested_job_deployments', 'Number of requests to deploy job') +metric_requested_job_deployments = Counter( + 'requested_job_deployments', + 'Number of started job deployments', +) +metric_done_job_deployments = Counter( + 'done_job_deployments', + 'Number of finished job deployments (processed or failed)', +) metric_deployed_job = Counter('deployed_job', 'Number of Jobs deployed successfully') metric_metrics_scrapes = Counter('metrics_scrapes', 'Number of Prometheus metrics scrapes') diff --git a/lifecycle/tests/sse/test_server_sent_events.py b/lifecycle/tests/sse/test_server_sent_events.py new file mode 100644 index 000000000..09b1938db --- /dev/null +++ b/lifecycle/tests/sse/test_server_sent_events.py @@ -0,0 +1,73 @@ +import asyncio + +import backoff +import httpx +from fastapi import FastAPI +from fastapi.responses import StreamingResponse + +from racetrack_client.log.logs import configure_logs +from racetrack_commons.api.asgi.asgi_server import serve_asgi_in_background +from racetrack_commons.socket import free_tcp_port + + +def test_server_sent_events(): + app = FastAPI() + + def sse_generator(): + for num in range(3): + yield f'data: {{"progress": {num}}}\n\n' + + @app.get("/sse") + def sse_endpoint(): + return StreamingResponse(sse_generator(), media_type="text/event-stream") + + @app.get("/ready") + def ready_endpoint(): + return + + async def test_async(): + configure_logs() + port = free_tcp_port() + with serve_asgi_in_background(app, port): + _wait_until_server_ready(port) + _test_sse_client_get(port) + _test_sse_client_stream(port) + + asyncio.run(test_async()) + + +def _test_sse_client_get(port: int): + response = httpx.get(f'http://127.0.0.1:{port}/sse') + assert response.status_code == 200 + assert response.text == '''data: {"progress": 0} + +data: {"progress": 1} + +data: {"progress": 2} + +''' + + +def _test_sse_client_stream(port: int): + lines = [] + with httpx.Client(timeout=10) as client: + with client.stream('GET', f'http://127.0.0.1:{port}/sse') as stream_response: + for line in stream_response.iter_lines(): + lines.append(line) + + assert lines == [ + 'data: {"progress": 0}', + '', + 'data: {"progress": 1}', + '', + 'data: {"progress": 2}', + '', + ] + stream_response.raise_for_status() + assert stream_response.status_code == 200 + + +@backoff.on_exception(backoff.fibo, httpx.RequestError, max_time=5, jitter=None) +def _wait_until_server_ready(port: int): + response = httpx.get(f'http://127.0.0.1:{port}/ready') + assert response.status_code == 200 diff --git a/racetrack_commons/racetrack_commons/api/asgi/access_log.py b/racetrack_commons/racetrack_commons/api/asgi/access_log.py index e117469a4..2f75b03e2 100644 --- a/racetrack_commons/racetrack_commons/api/asgi/access_log.py +++ b/racetrack_commons/racetrack_commons/api/asgi/access_log.py @@ -1,6 +1,7 @@ import time from fastapi import FastAPI, Request, Response +from starlette.types import ASGIApp, Receive, Scope, Send from racetrack_client.log.logs import get_logger from racetrack_commons.api.asgi.asgi_server import HIDDEN_ACCESS_LOGS @@ -27,10 +28,28 @@ def enable_request_access_log(fastapi_app: FastAPI): tracing_header = get_tracing_header_name() caller_header = get_caller_header_name() - @fastapi_app.middleware('http') - async def access_log_on_receive(request: Request, call_next) -> Response: - tracing_id = request.headers.get(tracing_header) - caller_name = request.headers.get(caller_header) + fastapi_app.add_middleware(RequestAccessLogMiddleware, tracing_header=tracing_header, caller_header=caller_header) + + +class RequestAccessLogMiddleware: + def __init__( + self, + app: ASGIApp, + tracing_header: str = '', + caller_header: str = '', + ) -> None: + self.app: ASGIApp = app + self.tracing_header: str = tracing_header + self.caller_header: str = caller_header + + async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: + if scope["type"] != "http": + await self.app(scope, receive, send) + return + + request = Request(scope=scope) + tracing_id = request.headers.get(self.tracing_header) + caller_name = request.headers.get(self.caller_header) uri = request.url.replace(scheme='', netloc='') request_logger = RequestTracingLogger(logger, { 'tracing_id': tracing_id, @@ -39,7 +58,8 @@ async def access_log_on_receive(request: Request, call_next) -> Response: message = f'{request.method} {uri}' if message not in HIDDEN_REQUEST_LOGS: request_logger.debug(f'Request: {message}') - return await call_next(request) + + await self.app(scope, receive, send) def enable_response_access_log(fastapi_app: FastAPI): @@ -68,6 +88,12 @@ async def access_log(request: Request, call_next) -> Response: metric_request_duration.observe(time.time() - start_time) metric_requests_done.inc() + if await request.is_disconnected(): + method = request.method + uri = request.url.replace(scheme='', netloc='') + logger.error(f"Request cancelled by the client: {method} {uri}") + return Response(status_code=204) # No Content + method = request.method uri = request.url.replace(scheme='', netloc='') response_code = response.status_code diff --git a/racetrack_commons/racetrack_commons/api/asgi/error_handler.py b/racetrack_commons/racetrack_commons/api/asgi/error_handler.py index e0de0e749..80dc3c91e 100644 --- a/racetrack_commons/racetrack_commons/api/asgi/error_handler.py +++ b/racetrack_commons/racetrack_commons/api/asgi/error_handler.py @@ -1,8 +1,11 @@ from __future__ import annotations +import json import sys +from typing import Any from fastapi import FastAPI, Request from fastapi.responses import JSONResponse +from starlette.types import ASGIApp, Receive, Scope, Send from racetrack_client.log.errors import EntityNotFound, AlreadyExists, ValidationError from racetrack_commons.api.metrics import metric_internal_server_errors @@ -67,18 +70,43 @@ def default_error_handler(request: Request, error: Exception): content={'error': error_message, 'type': error_type}, ) - @api.middleware('http') - async def catch_all_exceptions_middleware(request: Request, call_next): + api.add_middleware(ErrorHandlerMiddleware) + + +class ErrorHandlerMiddleware: + def __init__( + self, + app: ASGIApp, + ) -> None: + self.app: ASGIApp = app + + async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: + if scope["type"] != "http": + await self.app(scope, receive, send) + return try: - return await call_next(request) + await self.app(scope, receive, send) except BaseException as error: metric_internal_server_errors.inc() + request = Request(scope=scope) log_request_exception_with_tracing(request, error) error_message, error_type = _upack_error_message(error) - return JSONResponse( - status_code=500, - content={'error': error_message, 'type': error_type}, - ) + json_content = {'error': error_message, 'type': error_type} + await send_json_content(send, 500, json_content) + + +async def send_json_content(send: Send, status_code: int, json_content: Any): + await send({ + "type": "http.response.start", + "status": status_code, + "headers": [ + [b"content-type", b"application/json"], + ], + }) + await send({ + "type": "http.response.body", + "body": json.dumps(json_content).encode(), + }) def _upack_error_message(e: BaseException) -> tuple[str, str]: diff --git a/racetrack_commons/racetrack_commons/api/server_sent_events.py b/racetrack_commons/racetrack_commons/api/server_sent_events.py new file mode 100644 index 000000000..0970994dd --- /dev/null +++ b/racetrack_commons/racetrack_commons/api/server_sent_events.py @@ -0,0 +1,88 @@ +import json +from threading import Thread +from typing import Dict, Callable +import queue + +import httpx +from fastapi.responses import StreamingResponse + +from racetrack_client.log.logs import get_logger + +logger = get_logger(__name__) + +EVENT_RESULT = 'event: result\n' +EVENT_HEARTBEAT = 'event: keepalive_heartbeat' +DATA_MESSAGE = 'data: ' + + +def stream_result_with_heartbeat(result_runner: Callable[[], Dict]): + """ + Return result dict in SSE (Server-Sent Events) response, streaming heartbeat events to keep the connection alive + """ + result_channel = queue.Queue(maxsize=0) + + def _runner(): + try: + result = result_runner() + result_channel.put(json.dumps({ + 'result': result, + })) + except BaseException as e: + result_channel.put(json.dumps({ + 'error': str(e), + })) + + Thread(target=_runner, daemon=True).start() + + def sse_generator(): + while True: + try: + event_data: str = result_channel.get(block=True, timeout=60) + yield f'{EVENT_RESULT}data: {event_data}\n\n' + result_channel.task_done() + return + except queue.Empty: + yield f'{EVENT_HEARTBEAT}\n\n' + + return StreamingResponse(sse_generator(), media_type="text/event-stream") + + +def make_sse_request( + method: str, + url: str, + payload: Dict, +): + response_buffer = '' + with httpx.Client(timeout=3600) as client: + with client.stream(method.upper(), url, json=payload) as stream_response: + for line in stream_response.iter_lines(): + if line.strip() == 'event: keepalive_heartbeat': + logger.debug(f'keepalive heartbeat for {method} {url}') + else: + response_buffer += line + '\n' + + validate_streaming_response(stream_response) + response = extract_response_dict(response_buffer) + error = response.get('error') + if error: + raise RuntimeError(f'Streaming Response error: {error}') + return response['result'] + + +def validate_streaming_response(response: httpx.Response): + if response.is_success: + return + message = f'HTTP error "{response.status_code} {response.reason_phrase}" ' \ + f'for url {response.request.method} {response.url}' + raise RuntimeError(message) + + +def extract_response_dict(response_text: str) -> Dict: + prefix = EVENT_RESULT + DATA_MESSAGE + last_occurrence = response_text.find(prefix) + if last_occurrence == -1: + raise ValueError('could not find result event in the SSE response') + + remainder = response_text[last_occurrence + len(prefix):] + json_dict = json.loads(remainder) + return json_dict diff --git a/racetrack_commons/requirements.txt b/racetrack_commons/requirements.txt index c120b2c91..d244c1faf 100644 --- a/racetrack_commons/requirements.txt +++ b/racetrack_commons/requirements.txt @@ -8,3 +8,4 @@ protobuf==4.22.0 # needed by opentelemetry exporter python-multipart==0.0.9 # uploading files watchdog==2.3.1 a2wsgi==1.10.4 +httpx==0.27.0 diff --git a/requirements-test.txt b/requirements-test.txt index 64bc39dc2..39353fa26 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -4,6 +4,6 @@ coverage==7.2.1 backoff==2.2.1 httpretty==1.1.4 Flask==2.2.5 -httpx==0.24.0 +httpx==0.27.0 pytest-django==4.5.2 anyio==3.6.2 diff --git a/utils/grafana/dashboards/lifecycle.json b/utils/grafana/dashboards/lifecycle.json index 804908cea..17183142e 100644 --- a/utils/grafana/dashboards/lifecycle.json +++ b/utils/grafana/dashboards/lifecycle.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 19, + "id": 5, "links": [], "liveNow": false, "panels": [ @@ -52,6 +52,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -156,6 +157,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -272,6 +274,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -380,6 +383,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -485,6 +489,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -607,6 +612,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "", "fieldConfig": { "defaults": { "color": { @@ -626,6 +632,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -639,13 +646,14 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -669,7 +677,7 @@ "x": 6, "y": 8 }, - "id": 5, + "id": 19, "options": { "legend": { "calcs": [ @@ -677,7 +685,7 @@ ], "displayMode": "table", "placement": "right", - "showLegend": false, + "showLegend": true, "sortBy": "Last *", "sortDesc": true }, @@ -695,7 +703,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "lifecycle_event_stream_client_connected{job=\"lifecycle\"} - lifecycle_event_stream_client_disconnected{job=\"lifecycle\"}", + "expr": "requested_job_deployments_total{job=~\"lifecycle.*\"} - done_job_deployments_total{job=~\"lifecycle.*\"}", "hide": false, "interval": "", "legendFormat": "{{instance}}", @@ -703,7 +711,7 @@ "refId": "B" } ], - "title": "Clients connected to websocket Event Stream", + "title": "Active deployments (Unfinished)", "type": "timeseries" }, { @@ -711,7 +719,6 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { @@ -731,6 +738,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -774,7 +782,7 @@ "x": 12, "y": 8 }, - "id": 10, + "id": 5, "options": { "legend": { "calcs": [ @@ -782,7 +790,7 @@ ], "displayMode": "table", "placement": "right", - "showLegend": true, + "showLegend": false, "sortBy": "Last *", "sortDesc": true }, @@ -800,7 +808,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "commons_internal_server_errors_total{job=~\"lifecycle.*\"}", + "expr": "lifecycle_event_stream_client_connected{job=\"lifecycle\"} - lifecycle_event_stream_client_disconnected{job=\"lifecycle\"}", "hide": false, "interval": "", "legendFormat": "{{instance}}", @@ -808,7 +816,7 @@ "refId": "B" } ], - "title": "Total Internal server errors", + "title": "Clients connected to websocket Event Stream", "type": "timeseries" }, { @@ -816,6 +824,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "", "fieldConfig": { "defaults": { "color": { @@ -835,6 +844,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -848,7 +858,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -878,7 +888,7 @@ "x": 18, "y": 8 }, - "id": 8, + "id": 10, "options": { "legend": { "calcs": [ @@ -891,7 +901,7 @@ "sortDesc": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -904,15 +914,15 @@ }, "editorMode": "code", "exemplar": true, - "expr": "lifecycle_tcp_connections_count", + "expr": "commons_internal_server_errors_total{job=~\"lifecycle.*\"}", "hide": false, "interval": "", - "legendFormat": "{{status}} {{instance}}", + "legendFormat": "{{instance}}", "range": true, "refId": "B" } ], - "title": "TCP connections", + "title": "Total Internal server errors", "type": "timeseries" }, { @@ -939,6 +949,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -1041,6 +1052,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -1145,6 +1157,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -1228,7 +1241,6 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "Useful to determine pod restarts", "fieldConfig": { "defaults": { "color": { @@ -1248,6 +1260,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -1261,7 +1274,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1291,6 +1304,112 @@ "x": 18, "y": 16 }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": true, + "expr": "lifecycle_tcp_connections_count", + "hide": false, + "interval": "", + "legendFormat": "{{status}} {{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "TCP connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Useful to determine pod restarts", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 24 + }, "id": 9, "options": { "legend": { @@ -1375,7 +1494,7 @@ "text": {}, "textMode": "value_and_name" }, - "pluginVersion": "9.3.6", + "pluginVersion": "10.1.4", "targets": [ { "datasource": { @@ -1396,7 +1515,7 @@ } ], "refresh": "30s", - "schemaVersion": 37, + "schemaVersion": 38, "style": "dark", "tags": [], "templating": { @@ -1410,6 +1529,6 @@ "timezone": "", "title": "Lifecycle", "uid": "VZLtJLFnz", - "version": 4, + "version": 2, "weekStart": "" } \ No newline at end of file