Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tristan/agent checks ui #922

Merged
merged 12 commits into from
May 19, 2014
10 changes: 6 additions & 4 deletions checks.d/couch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ class CouchDb(AgentCheck):
"""Extracts stats from CouchDB via its REST API
http://wiki.apache.org/couchdb/Runtime_Statistics
"""
SOURCE_TYPE_NAME = 'couchdb'

def _create_metric(self, data, tags=None):
overall_stats = data.get('stats', {})
for key, stats in overall_stats.items():
for metric, val in stats.items():
if val['current'] is not None:
metric_name = '.'.join(['couchdb', key, metric])
self.gauge(metric_name, val['current'], tags=tags)

for db_name, db_stats in data.get('databases', {}).items():
for name, val in db_stats.items():
if name in ['doc_count', 'disk_size'] and val is not None:
Expand All @@ -23,7 +25,7 @@ def _create_metric(self, data, tags=None):
metric_tags.append('db:%s' % db_name)
self.gauge(metric_name, val, tags=metric_tags, device_name=db_name)


def _get_stats(self, url):
"Hit a given URL and return the parsed json"
self.log.debug('Fetching Couchdb stats at url: %s' % url)
Expand Down Expand Up @@ -79,9 +81,9 @@ def parse_agent_config(agentConfig):
if not agentConfig.get('couchdb_server'):
return False


return {
'instances': [{
'server': agentConfig.get('couchdb_server'),
}]
}
}
2 changes: 2 additions & 0 deletions checks.d/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class ElasticSearch(AgentCheck):
"elasticsearch.cluster_status": ("gauge", "status", lambda v: {"red":0,"yellow":1,"green":2}.get(v, -1)),
}

SOURCE_TYPE_NAME = 'elasticsearch'

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

Expand Down
4 changes: 3 additions & 1 deletion checks.d/gearmand.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

class Gearman(AgentCheck):

SOURCE_TYPE_NAME = 'germand'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo here


def get_library_versions(self):
try:
import gearman
Expand All @@ -28,7 +30,7 @@ def _get_metrics(self, client, tags):
running = 0
queued = 0
workers = 0

for stat in data:
running += stat['running']
queued += stat['queued']
Expand Down
3 changes: 3 additions & 0 deletions checks.d/kafka_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import random

class KafkaCheck(AgentCheck):

SOURCE_TYPE_NAME = 'kafka'

def check(self, instance):
consumer_groups = self.read_config(instance, 'consumer_groups',
cast=self._validate_consumer_groups)
Expand Down
3 changes: 2 additions & 1 deletion checks.d/kyototycoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class KyotoTycoonCheck(AgentCheck):
"""Report statistics about the Kyoto Tycoon DBM-style
database server (http://fallabs.com/kyototycoon/)
"""
SOURCE_TYPE_NAME = 'kyoto tycoon'

GAUGES = {
'repl_delay': 'replication.delay',
Expand Down Expand Up @@ -68,7 +69,7 @@ def check(self, instance):
if key in self.GAUGES:
name = self.GAUGES[key]
self.gauge('kyototycoon.%s' % name, float(value), tags=tags)

elif key in self.RATES:
name = self.RATES[key]
self.rate('kyototycoon.%s_per_s' % name, float(value), tags=tags)
Expand Down
25 changes: 14 additions & 11 deletions checks.d/mcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,34 @@
# version string Version string of this server
# pointer_size 32 Default size of pointers on the host OS
# (generally 32 or 64)
# rusage_user 32u:32u Accumulated user time for this process
# rusage_user 32u:32u Accumulated user time for this process
# (seconds:microseconds)
# rusage_system 32u:32u Accumulated system time for this process
# rusage_system 32u:32u Accumulated system time for this process
# (seconds:microseconds)
# curr_items 32u Current number of items stored by the server
# total_items 32u Total number of items stored by this server
# total_items 32u Total number of items stored by this server
# ever since it started
# bytes 64u Current number of bytes used by this server
# bytes 64u Current number of bytes used by this server
# to store items
# curr_connections 32u Number of open connections
# total_connections 32u Total number of connections opened since
# total_connections 32u Total number of connections opened since
# the server started running
# connection_structures 32u Number of connection structures allocated
# connection_structures 32u Number of connection structures allocated
# by the server
# cmd_get 64u Cumulative number of retrieval requests
# cmd_set 64u Cumulative number of storage requests
# get_hits 64u Number of keys that have been requested and
# get_hits 64u Number of keys that have been requested and
# found present
# get_misses 64u Number of items that have been requested
# get_misses 64u Number of items that have been requested
# and not found
# evictions 64u Number of valid items removed from cache
# to free memory for new items
# bytes_read 64u Total number of bytes read by this server
# bytes_read 64u Total number of bytes read by this server
# from network
# bytes_written 64u Total number of bytes sent by this server to
# bytes_written 64u Total number of bytes sent by this server to
# network
# limit_maxbytes 32u Number of bytes this server is allowed to
# use for storage.
# use for storage.
# threads 32u Number of worker threads requested.
# (see doc/threads.txt)
# >>> mc.get_stats()
Expand All @@ -55,6 +55,9 @@
# https://github.com/membase/ep-engine/blob/master/docs/stats.org

class Memcache(AgentCheck):

SOURCE_TYPE_NAME = 'memcached'

DEFAULT_PORT = 11211

GAUGES = [
Expand Down
2 changes: 2 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

class MongoDb(AgentCheck):

SOURCE_TYPE_NAME = 'mongodb'

GAUGES = [
"indexCounters.btree.missRatio",
"globalLock.ratio",
Expand Down
4 changes: 2 additions & 2 deletions checks.d/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _submit_devicemetrics(self, iface, vals_by_metric):

# For reasons i don't understand only these metrics are skipped if a
# particular interface is in the `excluded_interfaces` config list.
# Not sure why the others aren't included. Until I understand why, I'm
# Not sure why the others aren't included. Until I understand why, I'm
# going to keep the same behaviour.
exclude_iface_metrics = [
'packets_in.count',
Expand Down Expand Up @@ -238,7 +238,7 @@ def _check_bsd(self, instance):
'bytes_rcvd': self._parse_value(x[-5]),
'bytes_sent': self._parse_value(x[-2]),
'packets_in.count': self._parse_value(x[-7]),
'packets_in.error': self._parse_value(x[-6]),
'packets_in.error': self._parse_value(x[-6]),
'packets_out.count': self._parse_value(x[-4]),
'packets_out.error':self._parse_value(x[-3]),
}
Expand Down
9 changes: 6 additions & 3 deletions checks.d/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
class Redis(AgentCheck):
db_key_pattern = re.compile(r'^db\d+')
subkeys = ['keys', 'expires']

SOURCE_TYPE_NAME = 'redis'

GAUGE_KEYS = {
# Append-only metrics
'aof_last_rewrite_time_sec': 'redis.aof.last_rewrite_time',
Expand Down Expand Up @@ -106,7 +109,7 @@ def _get_conn(self, instance):
key = self._generate_instance_key(instance)
if key not in self.connections:
try:

# Only send useful parameters to the redis client constructor
list_params = ['host', 'port', 'db', 'password', 'socket_timeout',
'connection_pool', 'charset', 'errors', 'unix_socket_path']
Expand Down Expand Up @@ -139,12 +142,12 @@ def _check_db(self, instance, custom_tags=None):
try:
info = conn.info()
except ValueError, e:
# This is likely a know issue with redis library 2.0.0
# This is likely a know issue with redis library 2.0.0
# See https://github.com/DataDog/dd-agent/issues/374 for details
import redis
raise Exception("""Unable to run the info command. This is probably an issue with your version of the python-redis library.
Minimum required version: 2.4.11
Your current version: %s
Your current version: %s
Please upgrade to a newer version by running sudo easy_install redis""" % redis.__version__)

latency_ms = round((time.time() - start) * 1000, 2)
Expand Down
3 changes: 3 additions & 0 deletions checks.d/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
VALID_METRIC_TYPES = ('gauge', 'rate', 'histogram')

class SQLServer(AgentCheck):

SOURCE_TYPE_NAME = 'sql server'

METRICS = [
('sqlserver.buffer.cache_hit_ratio', 'gauge', 'Buffer cache hit ratio'),
('sqlserver.buffer.page_life_expectancy', 'gauge', 'Page life expectancy'),
Expand Down
2 changes: 1 addition & 1 deletion checks.d/wmi_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_wmi_conn(self, host, user, password):
def check(self, instance):
if wmi is None:
raise Exception("Missing 'wmi' module")

host = instance.get('host', None)
user = instance.get('username', None)
password = instance.get('password', None)
Expand Down
3 changes: 2 additions & 1 deletion checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def get_metrics(self, expire=True):
class AgentCheck(object):
OK, WARNING, CRITICAL, UNKNOWN, NONE = (0, 1, 2, 3, 4)

SOURCE_TYPE_NAME = None

def __init__(self, name, init_config, agentConfig, instances=None):
"""
Initialize a new check.
Expand All @@ -275,7 +277,6 @@ def __init__(self, name, init_config, agentConfig, instances=None):
"""
from aggregator import MetricsAggregator


self.name = name
self.init_config = init_config
self.agentConfig = agentConfig
Expand Down
3 changes: 2 additions & 1 deletion checks/check_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ class CheckStatus(object):
def __init__(self, check_name, instance_statuses, metric_count=None,
event_count=None, service_check_count=None,
init_failed_error=None, init_failed_traceback=None,
library_versions=None):
library_versions=None, check_source_type_name=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick but if the class name is CheckStatus we don't need the check_ prefix in the attribute as it would be redundant.

self.name = check_name
self.source_type_name = check_source_type_name
self.instance_statuses = instance_statuses
self.metric_count = metric_count or 0
self.event_count = event_count or 0
Expand Down
23 changes: 20 additions & 3 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ def run(self, checksd=None, start_event=True):
try:
# Run the check.
instance_statuses = check.run()

# Collect the metrics and events.
current_check_metrics = check.get_metrics()
current_check_events = check.get_events()
Expand All @@ -281,7 +280,8 @@ def run(self, checksd=None, start_event=True):
log.exception("Error running check %s" % check.name)

check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count, service_check_count,
library_versions=check.get_library_info())
library_versions=check.get_library_info(),
check_source_type_name=check.SOURCE_TYPE_NAME or check.name)
check_statuses.append(check_status)

for check_name, info in self.init_failed_checks_d.iteritems():
Expand All @@ -297,6 +297,21 @@ def run(self, checksd=None, start_event=True):
payload['metrics'] = metrics
payload['events'] = events
payload['service_checks'] = service_checks

# Add agent_checks if needed
if self._should_send_metadata():
agent_checks = []
for check in check_statuses:
for instance_status in check.instance_statuses:
agent_checks.append(
(
check.name, check.source_type_name,
instance_status.instance_id,
instance_status.status, instance_status.error
)
)
payload['agent_checks'] = agent_checks

collect_duration = timer.step()

if self.os != 'windows':
Expand Down Expand Up @@ -379,7 +394,7 @@ def _build_payload(self, start_event=True):
}]

# Periodically send the host metadata.
if self._is_first_run() or self._should_send_metadata():
if self._should_send_metadata():
payload['systemStats'] = get_system_stats()
payload['meta'] = self._get_metadata()
self.metadata_cache = payload['meta']
Expand Down Expand Up @@ -427,6 +442,8 @@ def _get_metadata(self):
return metadata

def _should_send_metadata(self):
if self._is_first_run():
return True
# If the interval has passed, send the metadata again
now = time.time()
if now - self.metadata_start >= self.metadata_interval:
Expand Down