Skip to content

Commit

Permalink
better health check
Browse files Browse the repository at this point in the history
  • Loading branch information
Massimiliano Pippi committed Jul 28, 2016
1 parent 7680e4b commit e360d9f
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 36 deletions.
72 changes: 36 additions & 36 deletions checks.d/rabbitmq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# 3p
import requests
from requests.exceptions import RequestException

# project
from checks import AgentCheck
Expand Down Expand Up @@ -89,6 +90,10 @@
}


class RabbitMQException(Exception):
pass


class RabbitMQ(AgentCheck):

"""This check is for gathering statistics from the RabbitMQ
Expand Down Expand Up @@ -139,29 +144,33 @@ def _get_config(self, instance):

def check(self, instance):
base_url, max_detailed, specified, auth = self._get_config(instance)
try:
# Generate metrics from the status API.
self.get_stats(instance, base_url, QUEUE_TYPE, max_detailed[QUEUE_TYPE], specified[QUEUE_TYPE], auth=auth)
self.get_stats(instance, base_url, NODE_TYPE, max_detailed[NODE_TYPE], specified[NODE_TYPE], auth=auth)

# Generate a service check from the aliveness API. In the case of an invalid response
# code or unparseable JSON this check will send no data.
vhosts = instance.get('vhosts')
self._check_aliveness(base_url, vhosts, auth=auth)

# Generate metrics from the status API.
self.get_stats(instance, base_url, QUEUE_TYPE, max_detailed[
QUEUE_TYPE], specified[QUEUE_TYPE], auth=auth)
self.get_stats(instance, base_url, NODE_TYPE, max_detailed[
NODE_TYPE], specified[NODE_TYPE], auth=auth)
# Generate a service check for the service status.
self.service_check('rabbitmq.status', AgentCheck.OK)

# Generate a service check from the aliveness API.
vhosts = instance.get('vhosts')
self._check_aliveness(base_url, vhosts, auth=auth)
except RabbitMQException as e:
msg = "Error executing check: {}".format(e)
self.service_check('rabbitmq.status', AgentCheck.CRITICAL, message=msg)
self.log.error(msg)

def _get_data(self, url, auth=None):
try:
r = requests.get(url, auth=auth)
r.raise_for_status()
data = r.json()
except requests.exceptions.HTTPError as e:
raise Exception(
'Cannot open RabbitMQ API url: %s %s' % (url, str(e)))
return r.json()
except RequestException as e:
raise RabbitMQException('Cannot open RabbitMQ API url: {} {}'.format(url, str(e)))
except ValueError as e:
raise Exception(
'Cannot parse JSON response from API url: %s %s' % (url, str(e)))
return data
raise RabbitMQException('Cannot parse JSON response from API url: {} {}'.format(url, str(e)))

def get_stats(self, instance, base_url, object_type, max_detailed, filters, auth=None):
"""
Expand All @@ -171,9 +180,8 @@ def get_stats(self, instance, base_url, object_type, max_detailed, filters, auth
max_detailed: the limit of objects to collect for this type
filters: explicit or regexes filters of specified queues or nodes (specified in the yaml file)
"""
data = self._get_data(urlparse.urljoin(base_url, object_type), auth=auth)

data = self._get_data(
urlparse.urljoin(base_url, object_type), auth=auth)
# Make a copy of this list as we will remove items from it at each
# iteration
explicit_filters = list(filters['explicit'])
Expand Down Expand Up @@ -304,11 +312,10 @@ def alert(self, base_url, max_detailed, size, object_type):
self.event(event)

def _check_aliveness(self, base_url, vhosts=None, auth=None):
""" Check the aliveness API against all or a subset of vhosts. The API
will return {"status": "ok"} and a 200 response code in the case
that the check passes.
In the case of an invalid response code or unparseable JSON the
service check will be CRITICAL.
"""
Check the aliveness API against all or a subset of vhosts. The API
will return {"status": "ok"} and a 200 response code in the case
that the check passes.
"""
if not vhosts:
# Fetch a list of _all_ vhosts from the API.
Expand All @@ -321,19 +328,12 @@ def _check_aliveness(self, base_url, vhosts=None, auth=None):
# We need to urlencode the vhost because it can be '/'.
path = u'aliveness-test/%s' % (urllib.quote_plus(vhost))
aliveness_url = urlparse.urljoin(base_url, path)
message = None
try:
aliveness_response = self._get_data(aliveness_url, auth=auth)
message = u"Response from aliveness API: %s" % aliveness_response
if aliveness_response.get('status') == 'ok':
status = AgentCheck.OK
else:
status = AgentCheck.CRITICAL
except Exception as e:
# Either we got a bad status code or unparseable JSON.
aliveness_response = self._get_data(aliveness_url, auth=auth)
message = u"Response from aliveness API: %s" % aliveness_response

if aliveness_response.get('status') == 'ok':
status = AgentCheck.OK
else:
status = AgentCheck.CRITICAL
self.warning('Error when checking aliveness for vhost %s: %s'
% (vhost, str(e)))

self.service_check(
'rabbitmq.aliveness', status, tags, message=message)
self.service_check('rabbitmq.aliveness', status, tags, message=message)
62 changes: 62 additions & 0 deletions tests/checks/mock/test_rabbitmq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import sys

import mock
import requests

from checks import AgentCheck
from tests.checks.common import AgentCheckTest, get_checksd_path, get_os


class TestRabbitMQ(AgentCheckTest):

CHECK_NAME = 'rabbitmq'

@classmethod
def setUpClass(cls):
sys.path.append(get_checksd_path(get_os()))

@classmethod
def tearDownClass(cls):
sys.path.pop()

def test__get_data(self):
with mock.patch('rabbitmq.requests') as r:
from rabbitmq import RabbitMQ, RabbitMQException # pylint: disable=import-error
check = RabbitMQ('rabbitmq', {}, {"instances": [{"rabbitmq_api_url": "http://example.com"}]})
r.get.side_effect = [requests.exceptions.HTTPError, ValueError]
self.assertRaises(RabbitMQException, check._get_data, '')
self.assertRaises(RabbitMQException, check._get_data, '')

def test_status_check(self):
self.run_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]})
self.assertEqual(len(self.service_checks), 1)
sc = self.service_checks[0]
self.assertEqual(sc['check'], 'rabbitmq.status')
self.assertEqual(sc['status'], AgentCheck.CRITICAL)

self.check._get_data = mock.MagicMock()
self.run_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]})
self.assertEqual(len(self.service_checks), 1)
sc = self.service_checks[0]
self.assertEqual(sc['check'], 'rabbitmq.status')
self.assertEqual(sc['status'], AgentCheck.OK)

def test__check_aliveness(self):
self.load_check({"instances": [{"rabbitmq_api_url": "http://example.com"}]})
self.check._get_data = mock.MagicMock()

# only one vhost should be OK
self.check._get_data.side_effect = [{"status": "ok"}, {}]
self.check._check_aliveness('', vhosts=['foo', 'bar'])
sc = self.check.get_service_checks()

self.assertEqual(len(sc), 2)
self.assertEqual(sc[0]['check'], 'rabbitmq.aliveness')
self.assertEqual(sc[0]['status'], AgentCheck.OK)
self.assertEqual(sc[1]['check'], 'rabbitmq.aliveness')
self.assertEqual(sc[1]['status'], AgentCheck.CRITICAL)

# in case of connection errors, this check should stay silent
from rabbitmq import RabbitMQException # pylint: disable=import-error
self.check._get_data.side_effect = RabbitMQException
self.assertRaises(RabbitMQException, self.check._check_aliveness, '')

0 comments on commit e360d9f

Please sign in to comment.