From e9cd0ce3d2927304c4e120a98afb0d569c3bdc16 Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Mon, 1 Sep 2025 18:24:15 +0200 Subject: [PATCH 01/10] initial code for the better-ehr-health check --- .../better-ehr-health/better-ehr-health | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100755 check-plugins/better-ehr-health/better-ehr-health diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health new file mode 100755 index 00000000..f614b626 --- /dev/null +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; py-indent-offset: 4 -*- +# +# Author: Linuxfabrik GmbH, Zurich, Switzerland +# Contact: info (at) linuxfabrik (dot) ch +# https://www.linuxfabrik.ch/ +# License: The Unlicense, see LICENSE file. + +# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md + +"""See the check's README for more details. +""" + +import argparse # pylint: disable=C0413 +import json # pylint: disable=C0413 +import sys # pylint: disable=C0413 + +import lib.args # pylint: disable=C0413 +import lib.base # pylint: disable=C0413 +import lib.url # pylint: disable=C0413 +import lib.lftest # pylint: disable=C0413 +import lib.time # pylint: disable=C0413 +from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413 + STATE_UNKNOWN, STATE_WARN) + +try: + import psutil # pylint: disable=C0413 +except ImportError: + print('Python module "psutil" is not installed.') + sys.exit(STATE_UNKNOWN) + + +__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland' +__version__ = '2025090101' + +DESCRIPTION = """A monitoring plugin, querying the Better EHR Health JSON endpoint. + http://server:port/health (application/json response)""" + +DEFAULT_WARN = 80 +DEFAULT_CRIT = 90 +DEFAULT_INSECURE = True +DEFAULT_NO_PROXY = False +DEFAULT_URL = 'http://localhost:80/health' +DEFAULT_TIMEOUT = 3 + +def parse_args(): + """Parse command line arguments using argparse. + """ + parser = argparse.ArgumentParser(description=DESCRIPTION) + + parser.add_argument( + '-V', '--version', + action='version', + version=f'%(prog)s: v{__version__} by {__author__}' + ) + + parser.add_argument( + '--always-ok', + help='Always returns OK.', + dest='ALWAYS_OK', + action='store_true', + default=False, + ) + + parser.add_argument( + '-c', '--critical', + help='Set the CRIT threshold as a percentage. ' + 'Default: >= %(default)s', + dest='CRIT', + type=int, + default=DEFAULT_CRIT, + ) + + parser.add_argument( + '--insecure', + help='This option explicitly allows to perform "insecure" SSL connections. ' + 'Default: %(default)s', + dest='INSECURE', + action='store_true', + default=DEFAULT_INSECURE, + ) + + parser.add_argument( + '--no-proxy', + help='Do not use a proxy. ' + 'Default: %(default)s', + dest='NO_PROXY', + action='store_true', + default=DEFAULT_NO_PROXY, + ) + + parser.add_argument( + '--url', + help='Better EHR Health Endpoint. ' + 'Default: %(default)s', + dest='URL', + default=DEFAULT_URL, + ) + + parser.add_argument( + '--test', + help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".', + dest='TEST', + type=lib.args.csv, + ) + + parser.add_argument( + '--timeout', + help='Network timeout in seconds. ' + 'Default: %(default)s (seconds)', + dest='TIMEOUT', + type=int, + default=DEFAULT_TIMEOUT, + ) + + parser.add_argument( + '-w', '--warning', + help='Set the WARN threshold as a percentage. ' + 'Default: >= %(default)s', + dest='WARN', + type=int, + default=DEFAULT_WARN, + ) + + return parser.parse_args() + + +def main(): + """The main function. Hier spielt die Musik. + """ + + # parse the command line, exit with UNKNOWN if it fails + try: + args = parse_args() + except SystemExit: + sys.exit(STATE_UNKNOWN) + + # fetch data + if args.TEST is None: + if not args.URL.startswith('http'): + lib.base.cu('--url parameter has to start with "http://" or https://".') + if not args.URL.endswith('/health'): + lib.base.cu('--url parameter has to end in "/health".') + url = args.URL + + header = { + 'Accept': 'application/json' + } + # and get the info from the API + success, jsonst = lib.url.fetch_json( + url, + header=header, + insecure=args.INSECURE, + no_proxy=args.NO_PROXY, + timeout=args.TIMEOUT, + extended=True, + ) + + try: + print(jsonst) + result = json.loads(jsonst) + except: + lib.base.cu('ValueError: No JSON object could be decoded') + else: + # do not call the command, put in test data + result, retc = lib.lftest.test(args.TEST) + + print_r(result) + # init some vars + msg = '' + state = STATE_OK + perfdata = '' + try: + compiled_ignore_regex = [re.compile(item) for item in args.IGNORE_REGEX] + except: + lib.base.cu('Unable to compile regex.') + + table_data = [] + + # analyze data + #HTTP response code is 200 for server status UP and 503 for statuses DEGRADED and DOWN. + + + title = 'Lorem ipsum' + if any(item.search(title) for item in compiled_ignore_regex): + pass # in loops: continue + value = str(lib.time.now())[-2:] + + # build the message + state = lib.base.get_state(value, args.WARN, args.CRIT) + msg += '{}% used{}'.format(value, lib.base.state2str(state, prefix=' ')) + perfdata += lib.base.get_perfdata('usage_percent', value, '%', args.WARN, args.CRIT, 0, 100) + + # over and out + lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK) + + +if __name__ == '__main__': + try: + main() + except Exception: # pylint: disable=W0703 + lib.base.cu() From b4745990472414f18e48ec014ac8a475fcff708e Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Tue, 2 Sep 2025 19:24:56 +0200 Subject: [PATCH 02/10] add status, messge with table and perfdata --- .../better-ehr-health/better-ehr-health | 72 ++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health index f614b626..88111520 100755 --- a/check-plugins/better-ehr-health/better-ehr-health +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -122,6 +122,14 @@ def parse_args(): default=DEFAULT_WARN, ) + parser.add_argument( + '-v', '--verbose', + help='Set the verbosity level.', + dest='VERBOSITY', + action='count', + default=0 + ) + return parser.parse_args() @@ -156,40 +164,72 @@ def main(): extended=True, ) + if args.VERBOSITY > 2: + print(success, jsonst, type(jsonst)) try: - print(jsonst) - result = json.loads(jsonst) + result = jsonst['response_json'] + status_code = jsonst['status_code'] except: lib.base.cu('ValueError: No JSON object could be decoded') else: # do not call the command, put in test data result, retc = lib.lftest.test(args.TEST) - print_r(result) + if args.VERBOSITY > 2: + print(status_code, result) + # init some vars msg = '' state = STATE_OK perfdata = '' - try: - compiled_ignore_regex = [re.compile(item) for item in args.IGNORE_REGEX] - except: - lib.base.cu('Unable to compile regex.') - table_data = [] # analyze data #HTTP response code is 200 for server status UP and 503 for statuses DEGRADED and DOWN. - + if status_code == 503: + state = STATE_WARN - title = 'Lorem ipsum' - if any(item.search(title) for item in compiled_ignore_regex): - pass # in loops: continue - value = str(lib.time.now())[-2:] + if result['status'] == 'DOWN': + state = STATE_CRIT # build the message - state = lib.base.get_state(value, args.WARN, args.CRIT) - msg += '{}% used{}'.format(value, lib.base.state2str(state, prefix=' ')) - perfdata += lib.base.get_perfdata('usage_percent', value, '%', args.WARN, args.CRIT, 0, 100) + + table_cols = ["Component", "Status", "Details"] + + for component, info in result['components'].items(): + status = info.get('status') + details = info.get('details', {}) + # Flatten details dict to a string + table_data.append({'Component': component, 'Status': status, 'Details': ''}) + for k, v in details.items(): + table_data.append({'Component': '', 'Status': '', 'Details': '{}={}'.format(k,v)}) + if isinstance(v, (int, float)): + if k == 'threshold': + continue + if k == 'total': + continue + warn_threshold = details.get('threshold', '') + crit_threshold = details.get('threshold', '') + lable = '_'.join([component, k]) + value = v + uom = '' + total = threshold = details.get('total', '') + if isinstance(v, bool): + value = int(v) + warn_threshold = '' + crit_threshold = '' + total = '' + perfdata += lib.base.get_perfdata( + lable, + value, + uom, + warn_threshold, + crit_threshold, + total) + + msg += '{} Status: {}'.format(lib.base.state2str(state, prefix=' '), result['status']) + msg += '\n{}'.format(lib.base.get_table(table_data, table_cols, table_cols)) + #perfdata += lib.base.get_perfdata('usage_percent', value, '%', args.WARN, args.CRIT, 0, 100) # over and out lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK) From 2c110941d2bbea2a2d23ebf9577db9fae091473d Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Thu, 4 Sep 2025 21:27:43 +0200 Subject: [PATCH 03/10] add override arguments for state and threshold of components --- .../better-ehr-health/better-ehr-health | 142 +++++++++++++----- 1 file changed, 106 insertions(+), 36 deletions(-) diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health index 88111520..faff7fc3 100755 --- a/check-plugins/better-ehr-health/better-ehr-health +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -36,8 +36,6 @@ __version__ = '2025090101' DESCRIPTION = """A monitoring plugin, querying the Better EHR Health JSON endpoint. http://server:port/health (application/json response)""" -DEFAULT_WARN = 80 -DEFAULT_CRIT = 90 DEFAULT_INSECURE = True DEFAULT_NO_PROXY = False DEFAULT_URL = 'http://localhost:80/health' @@ -62,15 +60,6 @@ def parse_args(): default=False, ) - parser.add_argument( - '-c', '--critical', - help='Set the CRIT threshold as a percentage. ' - 'Default: >= %(default)s', - dest='CRIT', - type=int, - default=DEFAULT_CRIT, - ) - parser.add_argument( '--insecure', help='This option explicitly allows to perform "insecure" SSL connections. ' @@ -114,12 +103,25 @@ def parse_args(): ) parser.add_argument( - '-w', '--warning', - help='Set the WARN threshold as a percentage. ' - 'Default: >= %(default)s', - dest='WARN', - type=int, - default=DEFAULT_WARN, + '--override-status', + metavar='COMPONENT:API_STATE:NAGIOS_STATE', + action='append', + help=( + 'Override mapping from API state to Nagios state. ' + 'Format: component:api_state:nagios_state ' + 'Example: diskSpace:DEGRADED:WARN' + ) + ) + + parser.add_argument( + '--override-threshold', + metavar='COMPONENT:DETAIL[:WARN[:CRIT]]', + action='append', + help=( + 'Override threshold check for a component detail. ' + 'Nagios format: component:detail[:warn[:crit]] ' + 'Example: diskSpace:free::20000000' + ) ) parser.add_argument( @@ -143,6 +145,30 @@ def main(): except SystemExit: sys.exit(STATE_UNKNOWN) + status_overrides = [] + for item in args.override_status or []: + try: + comp, api_state, nagios_state = item.split(':', 2) + status_overrides.append({ + 'component': comp, + 'api_state': api_state.upper(), + 'nagios_state': nagios_state.upper(), + }) + except ValueError: + lib.base.cu('Invalid status override: {}'.format(item)) + + threshold_overrides = [] + for item in args.override_threshold or []: + parts = item.split(':') + if len(parts) < 2: + lib.base.cu('Invalid threshold override: {}'.format(item)) + threshold_overrides.append({ + 'component': parts[0], + 'detail': parts[1], + 'warn': parts[2] if len(parts) > 2 and parts[2] != '' else None, + 'crit': parts[3] if len(parts) > 3 and parts[3] != '' else None, + }) + # fetch data if args.TEST is None: if not args.URL.startswith('http'): @@ -182,9 +208,12 @@ def main(): msg = '' state = STATE_OK perfdata = '' + table_cols = ['Component', 'Status', 'Details'] table_data = [] + s_o_s = state + t_o_s = state - # analyze data + # analyze data and build message #HTTP response code is 200 for server status UP and 503 for statuses DEGRADED and DOWN. if status_code == 503: state = STATE_WARN @@ -192,47 +221,88 @@ def main(): if result['status'] == 'DOWN': state = STATE_CRIT - # build the message - - table_cols = ["Component", "Status", "Details"] - + # Go over components and check for state overrides + # build the table while we're at it for component, info in result['components'].items(): status = info.get('status') details = info.get('details', {}) - # Flatten details dict to a string + + # Check for status overrides + for s_o in status_overrides: + if s_o['component'] == component: + api_state = info.get('status', '').upper() + if api_state == s_o['api_state']: + temp_state = { + 'CRIT': STATE_CRIT, + 'CRITICAL': STATE_CRIT, + 'WARN': STATE_WARN, + 'WARNING': STATE_WARN, + 'OK': STATE_OK, + 'UNKNOWN': STATE_UNKNOWN, + }.get(s_o['nagios_state'], STATE_UNKNOWN) + s_o_s = lib.base.get_worst(s_o_s, temp_state) + status = '{} -> {}'.format(status, lib.base.state2str(temp_state)) + table_data.append({'Component': component, 'Status': status, 'Details': ''}) + for k, v in details.items(): - table_data.append({'Component': '', 'Status': '', 'Details': '{}={}'.format(k,v)}) - if isinstance(v, (int, float)): - if k == 'threshold': - continue - if k == 'total': - continue - warn_threshold = details.get('threshold', '') - crit_threshold = details.get('threshold', '') + status = '' + warn = '' + crit = '' + # Apply threshold overrides that match this detail + for t_o in threshold_overrides: + if t_o['component'] == component and t_o['detail'] == k: + warn = t_o['warn'] + crit = t_o['crit'] + temp_state = lib.base.get_state(v, warn, crit, 'range') + t_o_s = lib.base.get_worst(t_o_s, temp_state) + status = lib.base.state2str(temp_state) + + table_data.append({'Component': '', 'Status': status, 'Details': '{}={}'.format(k,v)}) + + # Generate perfdata for numeric values + if isinstance(v, (int, float)) and k not in ('threshold', 'total'): + if warn != '': + warn_threshold = warn + else: + warn_threshold = details.get('threshold', '') + if crit != '': + crit_threshold = crit + else: + crit_threshold = details.get('threshold', '') lable = '_'.join([component, k]) + label = f'{component}_{k}' value = v uom = '' - total = threshold = details.get('total', '') + total = details.get('total', '') if isinstance(v, bool): value = int(v) warn_threshold = '' crit_threshold = '' total = '' perfdata += lib.base.get_perfdata( - lable, + label, value, uom, warn_threshold, crit_threshold, total) - msg += '{} Status: {}'.format(lib.base.state2str(state, prefix=' '), result['status']) + # Final plugin state + final_state = lib.base.get_worst(lib.base.get_worst(state, s_o_s), t_o_s) + + # Compose message + if final_state == state: + msg += '{} API Status: {}'.format(lib.base.state2str(state, prefix=' '), result['status']) + else: + msg += '{} overridden from {} API Status: {}'.format( + lib.base.state2str(final_state, prefix=' '), + lib.base.state2str(state, prefix=' '), + result['status']) msg += '\n{}'.format(lib.base.get_table(table_data, table_cols, table_cols)) - #perfdata += lib.base.get_perfdata('usage_percent', value, '%', args.WARN, args.CRIT, 0, 100) # over and out - lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK) + lib.base.oao(msg, final_state, perfdata, always_ok=args.ALWAYS_OK) if __name__ == '__main__': From 76d7f4e212c4c9c896ddd89cddbb57543d643e50 Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 09:22:17 +0200 Subject: [PATCH 04/10] update description and add readme --- check-plugins/better-ehr-health/README.md | 95 +++++++++++++++++++ .../better-ehr-health/better-ehr-health | 6 +- 2 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 check-plugins/better-ehr-health/README.md diff --git a/check-plugins/better-ehr-health/README.md b/check-plugins/better-ehr-health/README.md new file mode 100644 index 00000000..c3a5dd75 --- /dev/null +++ b/check-plugins/better-ehr-health/README.md @@ -0,0 +1,95 @@ +# Check better-ehr-health + +## Overview + +Monitoring plugin for Better EHR, querying the JSON health endpoint (for example, `http://server:port/health`). +Supports overriding component states and applying Nagios-style threshold ranges to detail metrics. + +Hints: + +* Useful for monitoring Better EHR availability and component health. +* Allows fine-grained overrides to adjust alerting behaviour. + +## Fact Sheet + +| Fact | Value | +|----------------------------------|---------------------------------------------------------------------------------------------| +| Check Plugin Download | https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/better-ehr-health | +| Check Interval Recommendation | Once a minute | +| Can be called without parameters | No | + +## Help + +```text +usage: better-ehr-health [-h] [-V] [--always-ok] [--insecure] [--no-proxy] + [--url URL] [--test TEST] [--timeout TIMEOUT] + [--override-status COMPONENT:API_STATE:NAGIOS_STATE] + [--override-threshold COMPONENT:DETAIL[:WARN[:CRIT]]] + [-v] + +Monitoring plugin for Better EHR, querying the JSON health endpoint (e.g. +http://server:port/health). Supports overriding component states and applying +Nagios-style threshold ranges to detail metrics. + +options: + -h, --help show this help message and exit + -V, --version show program's version number and exit + --always-ok Always returns OK. + --insecure Allow insecure SSL connections. Default: True + --no-proxy Do not use a proxy. Default: False + --url URL Better EHR Health endpoint. Default: + http://localhost:80/health + --test TEST For unit tests. Needs "path-to-stdout-file,path-to- + stderr-file,expected-retc". + --timeout TIMEOUT Network timeout in seconds. Default: 3 + --override-status COMPONENT:API_STATE:NAGIOS_STATE + Override mapping from API state to Nagios state. + Example: diskSpace:DEGRADED:WARN + --override-threshold COMPONENT:DETAIL[:WARN[:CRIT]] + Override threshold check for a component detail. + Example: diskSpace:free::20000000 + -v, --verbose Set the verbosity level. +``` + +## Usage Examples + +```bash +./better-ehr-health --url http://server:8080/health +``` + +```bash +./better-ehr-health --override-status diskSpace:DEGRADED:WARN +``` + +```bash +./better-ehr-health --override-threshold diskSpace:free::20000000 +``` + +## States + +* Returns **OK**, **WARN**, **CRIT**, or **UNKNOWN** depending on API state, component overrides, and threshold checks. +* `--always-ok` forces the check to always return OK. + +## Perfdata / Metrics + +Each numeric detail is exposed as perfdata unless explicitly excluded (`threshold`, `total`). + +| Name | Type | Description | +| ------------------ | ------ | ----------------------- | +| diskSpace\_free | Bytes | Free disk space | +| diskSpace\_total | Bytes | Total disk space | +| hikari\_activeConn | Number | Active DB connections | +| ... | ... | Other component details | + +## Troubleshooting + +**Error:** Connection refused +**Solution:** Verify Better EHR endpoint is reachable and correct URL is provided with `--url`. + +**Error:** SSL error +**Solution:** Use `--insecure` for testing, but configure proper certificates in production. + +## Credits, License + +* Authors: [Linuxfabrik GmbH, Zurich](https://www.linuxfabrik.ch) +* License: The Unlicense, see [LICENSE file](https://unlicense.org/). diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health index faff7fc3..ac8221bb 100755 --- a/check-plugins/better-ehr-health/better-ehr-health +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -31,10 +31,10 @@ except ImportError: __author__ = 'Linuxfabrik GmbH, Zurich/Switzerland' -__version__ = '2025090101' +__version__ = '2025090501' -DESCRIPTION = """A monitoring plugin, querying the Better EHR Health JSON endpoint. - http://server:port/health (application/json response)""" +DESCRIPTION = """Monitoring plugin for Better EHR, querying the JSON health endpoint (e.g. http://server:port/health). + Supports overriding component states and applying Nagios-style threshold ranges to detail metrics.""" DEFAULT_INSECURE = True DEFAULT_NO_PROXY = False From 9a28ba4fdf99aa9027f16277e315643e8836cf9c Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 10:00:27 +0200 Subject: [PATCH 05/10] add unit tests --- .../better-ehr-health/better-ehr-health | 33 +++++++-- .../unit-test/fhir_degraded.json | 44 ++++++++++++ .../unit-test/fhir_down.json | 44 ++++++++++++ .../better-ehr-health/unit-test/fhir_up.json | 44 ++++++++++++ check-plugins/better-ehr-health/unit-test/run | 71 +++++++++++++++++++ .../unit-test/stdout/EXAMPLE01 | 1 + 6 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 check-plugins/better-ehr-health/unit-test/fhir_degraded.json create mode 100644 check-plugins/better-ehr-health/unit-test/fhir_down.json create mode 100644 check-plugins/better-ehr-health/unit-test/fhir_up.json create mode 100755 check-plugins/better-ehr-health/unit-test/run create mode 100644 check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health index ac8221bb..ffb1604c 100755 --- a/check-plugins/better-ehr-health/better-ehr-health +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -88,9 +88,15 @@ def parse_args(): parser.add_argument( '--test', - help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".', + help='For unit tests. Provide a path to a JSON file containing a captured API response.', dest='TEST', - type=lib.args.csv, + ) + + parser.add_argument( + '--record-json', + help='Write the full fetched JSON (including status_code and response_json) to the given file.', + dest='RECORD_JSON', + metavar='FILE', ) parser.add_argument( @@ -197,9 +203,28 @@ def main(): status_code = jsonst['status_code'] except: lib.base.cu('ValueError: No JSON object could be decoded') + + # record JSON ready for testing + if args.RECORD_JSON: + try: + record = { + "status_code": status_code, + "response_json": result, + } + with open(args.RECORD_JSON, 'w', encoding='utf-8') as f: + json.dump(record, f, indent=2, sort_keys=True) + except Exception as e: + lib.base.cu(f'Failed to write JSON to {args.RECORD_JSON}: {e}') + else: - # do not call the command, put in test data - result, retc = lib.lftest.test(args.TEST) + # Load test data from JSON file + try: + with open(args.TEST, 'r', encoding='utf-8') as f: + jsonst = json.load(f) + result = jsonst['response_json'] + status_code = jsonst.get('status_code', 200) + except Exception as e: + lib.base.cu(f'Failed to load test JSON file {args.TEST}: {e}') if args.VERBOSITY > 2: print(status_code, result) diff --git a/check-plugins/better-ehr-health/unit-test/fhir_degraded.json b/check-plugins/better-ehr-health/unit-test/fhir_degraded.json new file mode 100644 index 00000000..127b2876 --- /dev/null +++ b/check-plugins/better-ehr-health/unit-test/fhir_degraded.json @@ -0,0 +1,44 @@ +{ + "response_json": { + "components": { + "db": { + "details": { + "database": "Oracle", + "validationQuery": "isValid()" + }, + "status": "UP" + }, + "diskSpace": { + "details": { + "exists": true, + "free": 34581540864, + "threshold": 10485760, + "total": 61041709056 + }, + "status": "UP" + }, + "hikariConnectionPool": { + "details": { + "activeConnections": 0, + "maxPoolSize": 40 + }, + "status": "UP" + }, + "indexStatus": { + "status": "GREEN" + }, + "indexSynchronization": { + "details": { + "erroredEntries": 0, + "queuedEntries": 0 + }, + "status": "GREEN" + }, + "ping": { + "status": "UP" + } + }, + "status": "DEGRADED" + }, + "status_code": 503 +} diff --git a/check-plugins/better-ehr-health/unit-test/fhir_down.json b/check-plugins/better-ehr-health/unit-test/fhir_down.json new file mode 100644 index 00000000..ad409c9d --- /dev/null +++ b/check-plugins/better-ehr-health/unit-test/fhir_down.json @@ -0,0 +1,44 @@ +{ + "response_json": { + "components": { + "db": { + "details": { + "database": "Oracle", + "validationQuery": "isValid()" + }, + "status": "UP" + }, + "diskSpace": { + "details": { + "exists": true, + "free": 34581540864, + "threshold": 10485760, + "total": 61041709056 + }, + "status": "UP" + }, + "hikariConnectionPool": { + "details": { + "activeConnections": 0, + "maxPoolSize": 40 + }, + "status": "UP" + }, + "indexStatus": { + "status": "GREEN" + }, + "indexSynchronization": { + "details": { + "erroredEntries": 0, + "queuedEntries": 0 + }, + "status": "GREEN" + }, + "ping": { + "status": "UP" + } + }, + "status": "DOWN" + }, + "status_code": 503 +} diff --git a/check-plugins/better-ehr-health/unit-test/fhir_up.json b/check-plugins/better-ehr-health/unit-test/fhir_up.json new file mode 100644 index 00000000..d41cf3e4 --- /dev/null +++ b/check-plugins/better-ehr-health/unit-test/fhir_up.json @@ -0,0 +1,44 @@ +{ + "response_json": { + "components": { + "db": { + "details": { + "database": "Oracle", + "validationQuery": "isValid()" + }, + "status": "UP" + }, + "diskSpace": { + "details": { + "exists": true, + "free": 34581540864, + "threshold": 10485760, + "total": 61041709056 + }, + "status": "UP" + }, + "hikariConnectionPool": { + "details": { + "activeConnections": 0, + "maxPoolSize": 40 + }, + "status": "UP" + }, + "indexStatus": { + "status": "GREEN" + }, + "indexSynchronization": { + "details": { + "erroredEntries": 0, + "queuedEntries": 0 + }, + "status": "GREEN" + }, + "ping": { + "status": "UP" + } + }, + "status": "UP" + }, + "status_code": 200 +} diff --git a/check-plugins/better-ehr-health/unit-test/run b/check-plugins/better-ehr-health/unit-test/run new file mode 100755 index 00000000..6dd4af52 --- /dev/null +++ b/check-plugins/better-ehr-health/unit-test/run @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; py-indent-offset: 4 -*- +# +# Author: Linuxfabrik GmbH, Zurich, Switzerland +# Contact: info (at) linuxfabrik (dot) ch +# https://www.linuxfabrik.ch/ +# License: The Unlicense, see LICENSE file. + +# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md + +import sys +import unittest + +sys.path.append("..") # Adds higher directory to python modules path. + +from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN, STATE_CRIT +import lib.base +import lib.shell + + +class TestCheck(unittest.TestCase): + + check = '../better-ehr-health' # path to your plugin + + def run_check(self, args): + """Helper to run the check with args and return stdout, stderr, retc.""" + cmd = f"{self.check} {args}" + return lib.base.coe(lib.shell.shell_exec(cmd)) + + def test_fhir_up(self): + stdout, stderr, retc = self.run_check("--test fhir_up.json") + self.assertEqual(retc, STATE_OK, msg=stdout) + self.assertIn("API Status: UP", stdout) + self.assertEqual(stderr, "") + + def test_fhir_down(self): + stdout, stderr, retc = self.run_check("--test fhir_down.json") + self.assertEqual(retc, STATE_CRIT, msg=stdout) + self.assertIn("API Status: DOWN", stdout) + self.assertEqual(stderr, "") + + def test_fhir_degraded(self): + stdout, stderr, retc = self.run_check("--test fhir_degraded.json") + # degraded → WARN via HTTP 503 + self.assertEqual(retc, STATE_WARN, msg=stdout) + self.assertIn("API Status: DEGRADED", stdout) + self.assertEqual(stderr, "") + + def test_override_status(self): + stdout, stderr, retc = self.run_check( + "--override-status indexStatus:GREEN:WARN " + "--test fhir_up.json" + ) + # GREEN overridden → WARN + self.assertEqual(retc, STATE_WARN, msg=stdout) + self.assertIn("overridden", stdout) + self.assertEqual(stderr, "") + + def test_override_threshold(self): + stdout, stderr, retc = self.run_check( + "--override-threshold diskSpace:free:20:2000000000000000000000 " + "--test fhir_up.json" + ) + # depending on JSON values, expect WARN or CRIT, but at least not UNKNOWN + self.assertIn(retc, [STATE_OK, STATE_WARN, STATE_CRIT], msg=stdout) + self.assertIn("diskSpace", stdout) + self.assertEqual(stderr, "") + + +if __name__ == '__main__': + unittest.main() diff --git a/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 b/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 new file mode 100644 index 00000000..0d6cf128 --- /dev/null +++ b/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 @@ -0,0 +1 @@ +There are critical errors. \ No newline at end of file From 98a04cb9d784f6555d9a512adf3d72fba9b8b5f4 Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 10:24:52 +0200 Subject: [PATCH 06/10] add icon --- .../better-ehr-health/icon/better-ehr-health.svg | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 check-plugins/better-ehr-health/icon/better-ehr-health.svg diff --git a/check-plugins/better-ehr-health/icon/better-ehr-health.svg b/check-plugins/better-ehr-health/icon/better-ehr-health.svg new file mode 100644 index 00000000..81210308 --- /dev/null +++ b/check-plugins/better-ehr-health/icon/better-ehr-health.svg @@ -0,0 +1,11 @@ + + + + From 15a0dce56954c1d6a8fe407c762592b315a51b6c Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 20:01:47 +0200 Subject: [PATCH 07/10] add isort changes --- check-plugins/better-ehr-health/better-ehr-health | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/check-plugins/better-ehr-health/better-ehr-health b/check-plugins/better-ehr-health/better-ehr-health index ffb1604c..55ccca6f 100755 --- a/check-plugins/better-ehr-health/better-ehr-health +++ b/check-plugins/better-ehr-health/better-ehr-health @@ -17,11 +17,11 @@ import sys # pylint: disable=C0413 import lib.args # pylint: disable=C0413 import lib.base # pylint: disable=C0413 -import lib.url # pylint: disable=C0413 import lib.lftest # pylint: disable=C0413 import lib.time # pylint: disable=C0413 +import lib.url # pylint: disable=C0413 from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413 - STATE_UNKNOWN, STATE_WARN) + STATE_UNKNOWN, STATE_WARN) try: import psutil # pylint: disable=C0413 From 7ed1ce060e10a02e2a17d5f45f89d02b7371d6fe Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 20:24:53 +0200 Subject: [PATCH 08/10] update readme --- check-plugins/better-ehr-health/README.md | 45 +++++++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/check-plugins/better-ehr-health/README.md b/check-plugins/better-ehr-health/README.md index c3a5dd75..69205141 100644 --- a/check-plugins/better-ehr-health/README.md +++ b/check-plugins/better-ehr-health/README.md @@ -22,7 +22,8 @@ Hints: ```text usage: better-ehr-health [-h] [-V] [--always-ok] [--insecure] [--no-proxy] - [--url URL] [--test TEST] [--timeout TIMEOUT] + [--url URL] [--test TEST] [--record-json FILE] + [--timeout TIMEOUT] [--override-status COMPONENT:API_STATE:NAGIOS_STATE] [--override-threshold COMPONENT:DETAIL[:WARN[:CRIT]]] [-v] @@ -35,19 +36,24 @@ options: -h, --help show this help message and exit -V, --version show program's version number and exit --always-ok Always returns OK. - --insecure Allow insecure SSL connections. Default: True + --insecure This option explicitly allows to perform "insecure" + SSL connections. Default: True --no-proxy Do not use a proxy. Default: False - --url URL Better EHR Health endpoint. Default: + --url URL Better EHR Health Endpoint. Default: http://localhost:80/health - --test TEST For unit tests. Needs "path-to-stdout-file,path-to- - stderr-file,expected-retc". - --timeout TIMEOUT Network timeout in seconds. Default: 3 + --test TEST For unit tests. Provide a path to a JSON file + containing a captured API response. + --record-json FILE Write the full fetched JSON (including status_code and + response_json) to the given file. + --timeout TIMEOUT Network timeout in seconds. Default: 3 (seconds) --override-status COMPONENT:API_STATE:NAGIOS_STATE Override mapping from API state to Nagios state. - Example: diskSpace:DEGRADED:WARN + Format: component:api_state:nagios_state Example: + diskSpace:DEGRADED:WARN --override-threshold COMPONENT:DETAIL[:WARN[:CRIT]] Override threshold check for a component detail. - Example: diskSpace:free::20000000 + Nagios format: component:detail[:warn[:crit]] Example: + diskSpace:free::20000000 -v, --verbose Set the verbosity level. ``` @@ -65,6 +71,29 @@ options: ./better-ehr-health --override-threshold diskSpace:free::20000000 ``` +### Output: +```text +[WARNING] overridden from API Status: UP +Component ! Status ! Details +---------------------+--------------------+-------------------------- +db ! UP ! + ! ! database=Oracle + ! ! validationQuery=isValid() +diskSpace ! UP ! + ! ! total=61041709056 + ! [WARNING] ! free=35045371904 + ! ! threshold=10485760 + ! ! exists=True +hikariConnectionPool ! UP ! + ! ! activeConnections=0 + ! ! maxPoolSize=40 +indexStatus ! GREEN -> [WARNING] ! +indexSynchronization ! GREEN ! + ! ! queuedEntries=0 + ! ! erroredEntries=0 +ping ! UP !|'diskSpace_free'=35045371904;20;2000000000000000000000;61041709056; 'diskSpace_exists'=1;;;; 'hikariConnectionPool_activeConnections'=0;;;; 'hikariConnectionPool_maxPoolSize'=40;;;; 'indexSynchronization_queuedEntries'=0;;;; 'indexSynchronization_erroredEntries'=0;;;; +``` + ## States * Returns **OK**, **WARN**, **CRIT**, or **UNKNOWN** depending on API state, component overrides, and threshold checks. From cf1cdc2ded8169e5e5900ec802a939baa900d11b Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Fri, 5 Sep 2025 20:27:01 +0200 Subject: [PATCH 09/10] remove not needed example01 --- check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 diff --git a/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 b/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 deleted file mode 100644 index 0d6cf128..00000000 --- a/check-plugins/better-ehr-health/unit-test/stdout/EXAMPLE01 +++ /dev/null @@ -1 +0,0 @@ -There are critical errors. \ No newline at end of file From f80dc2194523e91eb5c4f71895158cc7721f3aa7 Mon Sep 17 00:00:00 2001 From: Dominik Riva Date: Mon, 8 Sep 2025 15:54:10 +0200 Subject: [PATCH 10/10] Add director basket --- .../better-ehr-health.json | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 check-plugins/better-ehr-health/icingaweb2-module-director/better-ehr-health.json diff --git a/check-plugins/better-ehr-health/icingaweb2-module-director/better-ehr-health.json b/check-plugins/better-ehr-health/icingaweb2-module-director/better-ehr-health.json new file mode 100644 index 00000000..a8fe690c --- /dev/null +++ b/check-plugins/better-ehr-health/icingaweb2-module-director/better-ehr-health.json @@ -0,0 +1,204 @@ +{ + "Command": { + "cmd-check-better-ehr-health": { + "arguments": { + "--always-ok": { + "set_if": "$better_ehr_health_always_ok$" + }, + "--insecure": { + "set_if": "$better_ehr_health_insecure$" + }, + "--no-proxy": { + "set_if": "$better_ehr_health_no_proxy$" + }, + "--url": { + "value": "$better_ehr_health_url$" + }, + "--timeout": { + "value": "$better_ehr_health_timeout$" + }, + "--override-status": { + "value": "$better_ehr_health_override_status$", + "repeat_key": true + }, + "--override-threshold": { + "value": "$better_ehr_health_override_threshold$", + "repeat_key": true + } + }, + "command": "/usr/lib64/nagios/plugins/better-ehr-health", + "disabled": false, + "fields": [ + { + "datafield_id": 1, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 2, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 3, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 4, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 5, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 6, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 7, + "is_required": "n", + "var_filter": null + } + ], + "imports": [], + "is_string": null, + "methods_execute": "PluginCheck", + "object_name": "cmd-check-better-ehr-health", + "object_type": "object", + "timeout": "10", + "vars": {}, + "zone": null, + "uuid": "b3e6cb57-0f39-4e02-a0e6-fc61d26a69c1" + } + }, + "ServiceTemplate": { + "tpl-service-better-ehr-health": { + "action_url": null, + "apply_for": null, + "assign_filter": null, + "check_command": "cmd-check-better-ehr-health", + "check_interval": 60, + "check_period": null, + "check_timeout": null, + "command_endpoint": null, + "disabled": false, + "display_name": null, + "enable_active_checks": null, + "enable_event_handler": null, + "enable_flapping": null, + "enable_notifications": true, + "enable_passive_checks": null, + "enable_perfdata": null, + "event_command": null, + "fields": [], + "flapping_threshold_high": null, + "flapping_threshold_low": null, + "groups": [], + "host": null, + "icon_image": "better-ehr-health.svg", + "icon_image_alt": null, + "imports": [ + "tpl-service-generic" + ], + "max_check_attempts": 5, + "notes": "Monitoring plugin for Better EHR, querying the JSON health endpoint (e.g. http://server:port/health). Supports overriding component states and applying Nagios-style threshold ranges to detail metrics.", + "notes_url": "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/better-ehr-health", + "object_name": "tpl-service-better-ehr-health", + "object_type": "template", + "retry_interval": 15, + "service_set": null, + "template_choice": null, + "use_agent": null, + "use_var_overrides": null, + "vars": { + "criticality": "C", + "better_ehr_health_always_ok": false, + "better_ehr_health_insecure": true, + "better_ehr_health_no_proxy": false, + "better_ehr_health_url": "http://localhost:80/health", + "better_ehr_health_timeout": 3 + }, + "volatile": null, + "zone": null, + "uuid": "b00c65c3-dc7f-47d9-af3c-250a2536993a" + } + }, + "Datafield": { + "1": { + "varname": "better_ehr_health_always_ok", + "caption": "Better Ehr Health: Always OK?", + "description": "Always returns OK.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean", + "format": null, + "settings": {}, + "uuid": "c0f015d4-9611-43b9-94be-a54281919225" + }, + "2": { + "varname": "better_ehr_health_insecure", + "caption": "Better Ehr Health: Insecure?", + "description": "This option explicitly allows to perform \"insecure\" SSL connections.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean", + "format": null, + "settings": {}, + "uuid": "7c8acd11-2987-46d9-8b84-3a113e5b09ec" + }, + "3": { + "varname": "better_ehr_health_no_proxy", + "caption": "Better Ehr Health: No Proxy?", + "description": "Do not use a proxy.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean", + "format": null, + "settings": {}, + "uuid": "fd1a04f8-5a63-476f-9ca7-c07dcc755087" + }, + "4": { + "varname": "better_ehr_health_url", + "caption": "Better Ehr Health: URL", + "description": "Better EHR Health Endpoint.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "24f6ab65-8de1-4905-b641-bd7addda40d0" + }, + "5": { + "varname": "better_ehr_health_timeout", + "caption": "Better Ehr Health: Timeout", + "description": "Network timeout in seconds.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "cee2e3b2-0964-4ece-9845-f7f3442b7bef" + }, + "6": { + "varname": "better_ehr_health_override_status", + "caption": "Better Ehr Health: Override Status", + "description": "Override mapping from API state to Nagios state. Format: component:api_state:nagios_state Example: diskSpace:DEGRADED:WARN", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeArray", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "6f8326de-63b1-4fb9-ad3c-7d62782953b3" + }, + "7": { + "varname": "better_ehr_health_override_threshold", + "caption": "Better Ehr Health: Override Threshold", + "description": "Override threshold check for a component detail. Nagios format: component:detail[:warn[:crit]] Example: diskSpace:free::20000000", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeArray", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "1e91953d-eca6-46df-8784-5d5cd13015ac" + } + } +}