-
Notifications
You must be signed in to change notification settings - Fork 816
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The service check uses the `varnishadm debug.health` command to get the health of available backends. One check per backend will be submitted, tagged by the backend name. The BIG question here is how we will let users enable this because the varnishadm command requires access to the secret key which has root permissions by default (and is owned by root).
- Loading branch information
1 parent
27d986b
commit e28661b
Showing
3 changed files
with
193 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,29 @@ | ||
# stdlib | ||
import xml.parsers.expat # python 2.4 compatible | ||
from collections import defaultdict | ||
import re | ||
import subprocess | ||
import xml.parsers.expat # python 2.4 compatible | ||
|
||
# project | ||
from checks import AgentCheck | ||
|
||
|
||
class BackendStatus(object): | ||
HEALTHY = 'healthy' | ||
SICK = 'sick' | ||
ALL = (HEALTHY, SICK) | ||
|
||
@classmethod | ||
def to_check_status(cls, status): | ||
if status == cls.HEALTHY: | ||
return AgentCheck.OK | ||
elif status == cls.SICK: | ||
return AgentCheck.CRITICAL | ||
return AgentCheck.UNKNOWN | ||
|
||
class Varnish(AgentCheck): | ||
SERVICE_CHECK_NAME = 'varnish.backend_healthy' | ||
|
||
# XML parsing bits, a.k.a. Kafka in Code | ||
def _reset(self): | ||
self._current_element = "" | ||
|
@@ -47,39 +64,6 @@ def _char_data(self, data): | |
self._current_str = data | ||
|
||
def check(self, instance): | ||
"""Extract stats from varnishstat -x | ||
The text option (-1) is not reliable enough when counters get large. | ||
VBE.media_video_prd_services_01(10.93.67.16,,8080).happy18446744073709551615 | ||
2 types of data, "a" for counter ("c" in newer versions of varnish), "i" for gauge ("g") | ||
https://github.com/varnish/Varnish-Cache/blob/master/include/tbl/vsc_fields.h | ||
Bitmaps are not supported. | ||
<varnishstat> | ||
<stat> | ||
<name>fetch_304</name> | ||
<value>0</value> | ||
<flag>a</flag> | ||
<description>Fetch no body (304)</description> | ||
</stat> | ||
<stat> | ||
<name>n_sess_mem</name> | ||
<value>334</value> | ||
<flag>i</flag> | ||
<description>N struct sess_mem</description> | ||
</stat> | ||
<stat> | ||
<type>LCK</type> | ||
<ident>vcl</ident> | ||
<name>creat</name> | ||
<value>1</value> | ||
<flag>a</flag> | ||
<description>Created locks</description> | ||
</stat> | ||
</varnishstat> | ||
""" | ||
# Not configured? Not a problem. | ||
if instance.get("varnishstat", None) is None: | ||
raise Exception("varnishstat is not configured") | ||
|
@@ -88,16 +72,47 @@ def check(self, instance): | |
tags = [] | ||
else: | ||
tags = list(set(tags)) | ||
varnishstat_path = instance.get("varnishstat") | ||
name = instance.get('name') | ||
|
||
# Get version and version-specific args from varnishstat -V. | ||
version, use_xml = self._get_version_info(varnishstat_path) | ||
|
||
# Parse metrics from varnishstat. | ||
arg = '-x' if use_xml else '-1' | ||
cmd = [varnishstat_path, arg] | ||
|
||
if name is not None: | ||
cmd.extend(['-n', name]) | ||
tags += [u'varnish_name:%s' % name] | ||
else: | ||
tags += [u'varnish_name:default'] | ||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE) | ||
output, error = proc.communicate() | ||
if error and len(error) > 0: | ||
self.log.error(error) | ||
self._parse_varnishstat_metrics(varnishstat_path, use_xml, tags) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
conorbranagan
Author
Member
|
||
|
||
# Parse service checks from varnishadm. | ||
varnishadm_path = instance.get('varnishadm') | ||
if varnishadm_path: | ||
secretfile_path = instance.get('secretfile', '/etc/varnish/secret') | ||
varnishadm_path = 'sudo %s' % varnishadm_path | ||
cmd = [varnishadm_path, '-S', secretfile_path, 'debug.health'] | ||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) | ||
output, _ = proc.communicate() | ||
if output: | ||
self._parse_varnishadm(output) | ||
|
||
def _get_version_info(self, varnishstat_path): | ||
# Get the varnish version from varnishstat | ||
output, error = subprocess.Popen([instance.get("varnishstat"), "-V"], | ||
output, error = subprocess.Popen([varnishstat_path, "-V"], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE).communicate() | ||
|
||
# Assumptions regarding varnish's version | ||
use_xml = True | ||
arg = "-x" # varnishstat argument | ||
version = 3 | ||
|
||
m1 = re.search(r"varnish-(\d+)", output, re.MULTILINE) | ||
|
@@ -118,26 +133,44 @@ def check(self, instance): | |
# Location of varnishstat | ||
if version <= 2: | ||
use_xml = False | ||
arg = "-1" | ||
|
||
cmd = [instance.get("varnishstat"), arg] | ||
if name is not None: | ||
cmd.extend(['-n', name]) | ||
tags += [u'varnish_name:%s' % name] | ||
else: | ||
tags += [u'varnish_name:default'] | ||
try: | ||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE) | ||
output, error = proc.communicate() | ||
except Exception: | ||
self.log.error(u"Failed to run %s" % repr(cmd)) | ||
raise | ||
if error and len(error) > 0: | ||
self.log.error(error) | ||
self._parse_varnishstat(output, use_xml, tags) | ||
return version, use_xml | ||
|
||
def _parse_varnishstat(self, output, use_xml, tags=None): | ||
"""Extract stats from varnishstat -x | ||
The text option (-1) is not reliable enough when counters get large. | ||
VBE.media_video_prd_services_01(10.93.67.16,,8080).happy18446744073709551615 | ||
2 types of data, "a" for counter ("c" in newer versions of varnish), "i" for gauge ("g") | ||
https://github.com/varnish/Varnish-Cache/blob/master/include/tbl/vsc_fields.h | ||
Bitmaps are not supported. | ||
Example XML output (with `use_xml=True`) | ||
<varnishstat> | ||
<stat> | ||
<name>fetch_304</name> | ||
<value>0</value> | ||
<flag>a</flag> | ||
<description>Fetch no body (304)</description> | ||
</stat> | ||
<stat> | ||
<name>n_sess_mem</name> | ||
<value>334</value> | ||
<flag>i</flag> | ||
<description>N struct sess_mem</description> | ||
</stat> | ||
<stat> | ||
<type>LCK</type> | ||
<ident>vcl</ident> | ||
<name>creat</name> | ||
<value>1</value> | ||
<flag>a</flag> | ||
<description>Created locks</description> | ||
</stat> | ||
</varnishstat> | ||
""" | ||
tags = tags or [] | ||
if use_xml: | ||
p = xml.parsers.expat.ParserCreate() | ||
|
@@ -165,4 +198,50 @@ def _parse_varnishstat(self, output, use_xml, tags=None): | |
self.log.debug("Varnish (rate) %s %d" % (metric_name, int(gauge_val))) | ||
self.rate(metric_name, float(gauge_val), tags=tags) | ||
|
||
|
||
def _parse_varnishadm(self, output): | ||
""" Parse out service checks from varnishadm. | ||
Example output: | ||
Backend b0 is Sick | ||
Current states good: 2 threshold: 3 window: 5 | ||
Average responsetime of good probes: 0.000000 | ||
Oldest Newest | ||
================================================================ | ||
-------------------------------------------------------------444 Good IPv4 | ||
-------------------------------------------------------------XXX Good Xmit | ||
-------------------------------------------------------------RRR Good Recv | ||
----------------------------------------------------------HHH--- Happy | ||
Backend b1 is Sick | ||
Current states good: 2 threshold: 3 window: 5 | ||
Average responsetime of good probes: 0.000000 | ||
Oldest Newest | ||
================================================================ | ||
----------------------------------------------------------HHH--- Happy | ||
""" | ||
# Process status by backend. | ||
backends_by_status = defaultdict(list) | ||
backend, status, message = None, None, None | ||
for line in output.split("\n"): | ||
tokens = line.strip().split(' ') | ||
if len(tokens) > 0: | ||
if tokens[0] == 'Backend': | ||
backend = tokens[1] | ||
status = tokens[1].lower() | ||
elif tokens[0] == 'Current' and backend is not None: | ||
try: | ||
message = ' '.join(tokens[2:]).strip() | ||
except Exception: | ||
# If we can't parse a message still send a status. | ||
self.log.exception('Error when parsing message from varnishadm') | ||
message = '' | ||
backends_by_status[status].append((backend, message)) | ||
|
||
for status, backends in backends_by_status.iteritems(): | ||
check_status = BackendStatus.to_check_status(status) | ||
for backend, message in backends: | ||
tags = ['backend:%s' % backend] | ||
self.service_check(self.SERVICE_CHECK_NAME, check_status, | ||
tags=tags, message=message) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,28 @@ | ||
init_config: | ||
|
||
instances: | ||
# - varnishstat: (required) String path to varnishstat binary | ||
# name: (optional) String name of varnish instance. Passed to the -n parameter of varnishstat. Will also tag each metric with this name. | ||
# tags: (optional) Additional tags to tag each metric with | ||
# | ||
# Example: | ||
# | ||
- varnishstat: /usr/bin/varnishstat | ||
name: myvarnishinstance | ||
tags: | ||
- instance:production | ||
# The full path to the varnishstat binary | ||
# - varnishstat: /usr/bin/varnishstat | ||
|
||
# The (optional) name will be used in the varnishstat command for the | ||
# -n argument and will add a name:$instancename tag to all metrics. | ||
# name: myvarnishinstance | ||
|
||
# The (optional) list of tags will be applied to every emitted metric. | ||
# tags: | ||
# - instance:production | ||
|
||
# The (optional) path to the varnishadm binary will signal the check to | ||
# emit a service check status on backend health using `debug.health`. | ||
# The service check will be tagged by backend. | ||
# NOTE: The Agent must be able to access varnishadm as with root | ||
# privilleges. You can configure your sudoers file for this: | ||
# | ||
# example /etc/sudoers entry: | ||
# dd-agent ALL=(ALL) NOPASSWD:/usr/bin/varnishadm | ||
# | ||
# varnishadm: /usr/bin/varnishadm | ||
|
||
# The (optional) path to the varnish secretfile will be used in the | ||
# varnishadm command, if enabled. | ||
# secretfile: /etc/varnish/secret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Warning: there is no "_parse_varnishstat_metrics" function. It is still called "_parse_varnishstat".