From e360d9fd90877197c019b48ef6249d93bca767a4 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Wed, 27 Jul 2016 18:48:20 +0200 Subject: [PATCH] better health check --- checks.d/rabbitmq.py | 72 +++++++++++++++--------------- tests/checks/mock/test_rabbitmq.py | 62 +++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 36 deletions(-) create mode 100644 tests/checks/mock/test_rabbitmq.py diff --git a/checks.d/rabbitmq.py b/checks.d/rabbitmq.py index 60128804f0..1f16264a52 100644 --- a/checks.d/rabbitmq.py +++ b/checks.d/rabbitmq.py @@ -11,6 +11,7 @@ # 3p import requests +from requests.exceptions import RequestException # project from checks import AgentCheck @@ -89,6 +90,10 @@ } +class RabbitMQException(Exception): + pass + + class RabbitMQ(AgentCheck): """This check is for gathering statistics from the RabbitMQ @@ -139,29 +144,33 @@ def _get_config(self, instance): def check(self, instance): base_url, max_detailed, specified, auth = self._get_config(instance) + try: + # Generate metrics from the status API. + self.get_stats(instance, base_url, QUEUE_TYPE, max_detailed[QUEUE_TYPE], specified[QUEUE_TYPE], auth=auth) + self.get_stats(instance, base_url, NODE_TYPE, max_detailed[NODE_TYPE], specified[NODE_TYPE], auth=auth) + + # Generate a service check from the aliveness API. In the case of an invalid response + # code or unparseable JSON this check will send no data. + vhosts = instance.get('vhosts') + self._check_aliveness(base_url, vhosts, auth=auth) - # Generate metrics from the status API. - self.get_stats(instance, base_url, QUEUE_TYPE, max_detailed[ - QUEUE_TYPE], specified[QUEUE_TYPE], auth=auth) - self.get_stats(instance, base_url, NODE_TYPE, max_detailed[ - NODE_TYPE], specified[NODE_TYPE], auth=auth) + # Generate a service check for the service status. + self.service_check('rabbitmq.status', AgentCheck.OK) - # Generate a service check from the aliveness API. - vhosts = instance.get('vhosts') - self._check_aliveness(base_url, vhosts, auth=auth) + except RabbitMQException as e: + msg = "Error executing check: {}".format(e) + self.service_check('rabbitmq.status', AgentCheck.CRITICAL, message=msg) + self.log.error(msg) def _get_data(self, url, auth=None): try: r = requests.get(url, auth=auth) r.raise_for_status() - data = r.json() - except requests.exceptions.HTTPError as e: - raise Exception( - 'Cannot open RabbitMQ API url: %s %s' % (url, str(e))) + return r.json() + except RequestException as e: + raise RabbitMQException('Cannot open RabbitMQ API url: {} {}'.format(url, str(e))) except ValueError as e: - raise Exception( - 'Cannot parse JSON response from API url: %s %s' % (url, str(e))) - return data + raise RabbitMQException('Cannot parse JSON response from API url: {} {}'.format(url, str(e))) def get_stats(self, instance, base_url, object_type, max_detailed, filters, auth=None): """ @@ -171,9 +180,8 @@ def get_stats(self, instance, base_url, object_type, max_detailed, filters, auth max_detailed: the limit of objects to collect for this type filters: explicit or regexes filters of specified queues or nodes (specified in the yaml file) """ + data = self._get_data(urlparse.urljoin(base_url, object_type), auth=auth) - data = self._get_data( - urlparse.urljoin(base_url, object_type), auth=auth) # Make a copy of this list as we will remove items from it at each # iteration explicit_filters = list(filters['explicit']) @@ -304,11 +312,10 @@ def alert(self, base_url, max_detailed, size, object_type): self.event(event) def _check_aliveness(self, base_url, vhosts=None, auth=None): - """ Check the aliveness API against all or a subset of vhosts. The API - will return {"status": "ok"} and a 200 response code in the case - that the check passes. - In the case of an invalid response code or unparseable JSON the - service check will be CRITICAL. + """ + Check the aliveness API against all or a subset of vhosts. The API + will return {"status": "ok"} and a 200 response code in the case + that the check passes. """ if not vhosts: # Fetch a list of _all_ vhosts from the API. @@ -321,19 +328,12 @@ def _check_aliveness(self, base_url, vhosts=None, auth=None): # We need to urlencode the vhost because it can be '/'. path = u'aliveness-test/%s' % (urllib.quote_plus(vhost)) aliveness_url = urlparse.urljoin(base_url, path) - message = None - try: - aliveness_response = self._get_data(aliveness_url, auth=auth) - message = u"Response from aliveness API: %s" % aliveness_response - if aliveness_response.get('status') == 'ok': - status = AgentCheck.OK - else: - status = AgentCheck.CRITICAL - except Exception as e: - # Either we got a bad status code or unparseable JSON. + aliveness_response = self._get_data(aliveness_url, auth=auth) + message = u"Response from aliveness API: %s" % aliveness_response + + if aliveness_response.get('status') == 'ok': + status = AgentCheck.OK + else: status = AgentCheck.CRITICAL - self.warning('Error when checking aliveness for vhost %s: %s' - % (vhost, str(e))) - self.service_check( - 'rabbitmq.aliveness', status, tags, message=message) + self.service_check('rabbitmq.aliveness', status, tags, message=message) diff --git a/tests/checks/mock/test_rabbitmq.py b/tests/checks/mock/test_rabbitmq.py new file mode 100644 index 0000000000..f4d6470347 --- /dev/null +++ b/tests/checks/mock/test_rabbitmq.py @@ -0,0 +1,62 @@ +import sys + +import mock +import requests + +from checks import AgentCheck +from tests.checks.common import AgentCheckTest, get_checksd_path, get_os + + +class TestRabbitMQ(AgentCheckTest): + + CHECK_NAME = 'rabbitmq' + + @classmethod + def setUpClass(cls): + sys.path.append(get_checksd_path(get_os())) + + @classmethod + def tearDownClass(cls): + sys.path.pop() + + def test__get_data(self): + with mock.patch('rabbitmq.requests') as r: + from rabbitmq import RabbitMQ, RabbitMQException # pylint: disable=import-error + check = RabbitMQ('rabbitmq', {}, {"instances": [{"rabbitmq_api_url": "http://example.com"}]}) + r.get.side_effect = [requests.exceptions.HTTPError, ValueError] + self.assertRaises(RabbitMQException, check._get_data, '') + self.assertRaises(RabbitMQException, check._get_data, '') + + def test_status_check(self): + self.run_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]}) + self.assertEqual(len(self.service_checks), 1) + sc = self.service_checks[0] + self.assertEqual(sc['check'], 'rabbitmq.status') + self.assertEqual(sc['status'], AgentCheck.CRITICAL) + + self.check._get_data = mock.MagicMock() + self.run_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]}) + self.assertEqual(len(self.service_checks), 1) + sc = self.service_checks[0] + self.assertEqual(sc['check'], 'rabbitmq.status') + self.assertEqual(sc['status'], AgentCheck.OK) + + def test__check_aliveness(self): + self.load_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]}) + self.check._get_data = mock.MagicMock() + + # only one vhost should be OK + self.check._get_data.side_effect = [{"status": "ok"}, {}] + self.check._check_aliveness('', vhosts=['foo', 'bar']) + sc = self.check.get_service_checks() + + self.assertEqual(len(sc), 2) + self.assertEqual(sc[0]['check'], 'rabbitmq.aliveness') + self.assertEqual(sc[0]['status'], AgentCheck.OK) + self.assertEqual(sc[1]['check'], 'rabbitmq.aliveness') + self.assertEqual(sc[1]['status'], AgentCheck.CRITICAL) + + # in case of connection errors, this check should stay silent + from rabbitmq import RabbitMQException # pylint: disable=import-error + self.check._get_data.side_effect = RabbitMQException + self.assertRaises(RabbitMQException, self.check._check_aliveness, '')