Skip to content

Commit

Permalink
Merge pull request #922 from DataDog/tristan/agent-checks-ui
Browse files Browse the repository at this point in the history
Tristan/agent checks ui
  • Loading branch information
tmichelet committed May 19, 2014
2 parents 47931e5 + cc3c32e commit 6780b3f
Show file tree
Hide file tree
Showing 19 changed files with 65 additions and 13 deletions.
3 changes: 3 additions & 0 deletions checks.d/couch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ class CouchDb(AgentCheck):
"""Extracts stats from CouchDB via its REST API
http://wiki.apache.org/couchdb/Runtime_Statistics
"""

SOURCE_TYPE_NAME = 'couchdb'

def _create_metric(self, data, tags=None):
overall_stats = data.get('stats', {})
for key, stats in overall_stats.items():
Expand Down
3 changes: 3 additions & 0 deletions checks.d/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class DirectoryCheck(AgentCheck):
"pattern" - string, the `fnmatch` pattern to use when reading the "directory"'s files. default "*"
"recursive" - boolean, when true the stats will recurse into directories. default False
"""

SOURCE_TYPE_NAME = 'system'

def check(self, instance):
if "directory" not in instance:
raise Exception('DirectoryCheck: missing "directory" in config')
Expand Down
2 changes: 2 additions & 0 deletions checks.d/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class ElasticSearch(AgentCheck):
"elasticsearch.cluster_status": ("gauge", "status", lambda v: {"red":0,"yellow":1,"green":2}.get(v, -1)),
}

SOURCE_TYPE_NAME = 'elasticsearch'

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

Expand Down
2 changes: 1 addition & 1 deletion checks.d/gearmand.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _get_metrics(self, client, tags):
running = 0
queued = 0
workers = 0

for stat in data:
running += stat['running']
queued += stat['queued']
Expand Down
2 changes: 2 additions & 0 deletions checks.d/http_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

class HTTPCheck(ServicesCheck):

SOURCE_TYPE_NAME = 'system'

def _load_conf(self, instance):
# Fetches the conf
tags = instance.get('tags', [])
Expand Down
3 changes: 3 additions & 0 deletions checks.d/kafka_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import random

class KafkaCheck(AgentCheck):

SOURCE_TYPE_NAME = 'kafka'

def check(self, instance):
consumer_groups = self.read_config(instance, 'consumer_groups',
cast=self._validate_consumer_groups)
Expand Down
3 changes: 2 additions & 1 deletion checks.d/kyototycoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class KyotoTycoonCheck(AgentCheck):
"""Report statistics about the Kyoto Tycoon DBM-style
database server (http://fallabs.com/kyototycoon/)
"""
SOURCE_TYPE_NAME = 'kyoto tycoon'

GAUGES = {
'repl_delay': 'replication.delay',
Expand Down Expand Up @@ -68,7 +69,7 @@ def check(self, instance):
if key in self.GAUGES:
name = self.GAUGES[key]
self.gauge('kyototycoon.%s' % name, float(value), tags=tags)

elif key in self.RATES:
name = self.RATES[key]
self.rate('kyototycoon.%s_per_s' % name, float(value), tags=tags)
Expand Down
3 changes: 3 additions & 0 deletions checks.d/mcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
# https://github.com/membase/ep-engine/blob/master/docs/stats.org

class Memcache(AgentCheck):

SOURCE_TYPE_NAME = 'memcached'

DEFAULT_PORT = 11211

GAUGES = [
Expand Down
2 changes: 2 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

class MongoDb(AgentCheck):

SOURCE_TYPE_NAME = 'mongodb'

GAUGES = [
"indexCounters.btree.missRatio",
"indexCounters.missRatio",
Expand Down
6 changes: 4 additions & 2 deletions checks.d/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

class Network(AgentCheck):

SOURCE_TYPE_NAME = 'system'

TCP_STATES = {
"ESTABLISHED": "established",
"SYN_SENT": "opening",
Expand Down Expand Up @@ -88,7 +90,7 @@ def _submit_devicemetrics(self, iface, vals_by_metric):

# For reasons i don't understand only these metrics are skipped if a
# particular interface is in the `excluded_interfaces` config list.
# Not sure why the others aren't included. Until I understand why, I'm
# Not sure why the others aren't included. Until I understand why, I'm
# going to keep the same behaviour.
exclude_iface_metrics = [
'packets_in.count',
Expand Down Expand Up @@ -238,7 +240,7 @@ def _check_bsd(self, instance):
'bytes_rcvd': self._parse_value(x[-5]),
'bytes_sent': self._parse_value(x[-2]),
'packets_in.count': self._parse_value(x[-7]),
'packets_in.error': self._parse_value(x[-6]),
'packets_in.error': self._parse_value(x[-6]),
'packets_out.count': self._parse_value(x[-4]),
'packets_out.error':self._parse_value(x[-3]),
}
Expand Down
2 changes: 2 additions & 0 deletions checks.d/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

class ProcessCheck(AgentCheck):

SOURCE_TYPE_NAME = 'system'

PROCESS_GAUGE = (
'system.processes.threads',
'system.processes.cpu.pct',
Expand Down
9 changes: 6 additions & 3 deletions checks.d/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
class Redis(AgentCheck):
db_key_pattern = re.compile(r'^db\d+')
subkeys = ['keys', 'expires']

SOURCE_TYPE_NAME = 'redis'

GAUGE_KEYS = {
# Append-only metrics
'aof_last_rewrite_time_sec': 'redis.aof.last_rewrite_time',
Expand Down Expand Up @@ -106,7 +109,7 @@ def _get_conn(self, instance):
key = self._generate_instance_key(instance)
if key not in self.connections:
try:

# Only send useful parameters to the redis client constructor
list_params = ['host', 'port', 'db', 'password', 'socket_timeout',
'connection_pool', 'charset', 'errors', 'unix_socket_path']
Expand Down Expand Up @@ -139,12 +142,12 @@ def _check_db(self, instance, custom_tags=None):
try:
info = conn.info()
except ValueError, e:
# This is likely a know issue with redis library 2.0.0
# This is likely a know issue with redis library 2.0.0
# See https://github.com/DataDog/dd-agent/issues/374 for details
import redis
raise Exception("""Unable to run the info command. This is probably an issue with your version of the python-redis library.
Minimum required version: 2.4.11
Your current version: %s
Your current version: %s
Please upgrade to a newer version by running sudo easy_install redis""" % redis.__version__)

latency_ms = round((time.time() - start) * 1000, 2)
Expand Down
3 changes: 3 additions & 0 deletions checks.d/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
VALID_METRIC_TYPES = ('gauge', 'rate', 'histogram')

class SQLServer(AgentCheck):

SOURCE_TYPE_NAME = 'sql server'

METRICS = [
('sqlserver.buffer.cache_hit_ratio', 'gauge', 'Buffer cache hit ratio'),
('sqlserver.buffer.page_life_expectancy', 'gauge', 'Page life expectancy'),
Expand Down
2 changes: 2 additions & 0 deletions checks.d/tcp_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ class BadConfException(Exception): pass

class TCPCheck(ServicesCheck):

SOURCE_TYPE_NAME = 'system'

def _load_conf(self, instance):
# Fetches the conf

Expand Down
2 changes: 1 addition & 1 deletion checks.d/wmi_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_wmi_conn(self, host, user, password):
def check(self, instance):
if wmi is None:
raise Exception("Missing 'wmi' module")

host = instance.get('host', None)
user = instance.get('username', None)
password = instance.get('password', None)
Expand Down
2 changes: 2 additions & 0 deletions checks.d/zk.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
class Zookeeper(AgentCheck):
version_pattern = re.compile(r'Zookeeper version: ([^.]+)\.([^.]+)\.([^-]+)', flags=re.I)

SOURCE_TYPE_NAME = 'zookeeper'

def check(self, instance):
host = instance.get('host', 'localhost')
port = int(instance.get('port', 2181))
Expand Down
3 changes: 2 additions & 1 deletion checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def get_metrics(self, expire=True):
class AgentCheck(object):
OK, WARNING, CRITICAL, UNKNOWN, NONE = (0, 1, 2, 3, 4)

SOURCE_TYPE_NAME = None

def __init__(self, name, init_config, agentConfig, instances=None):
"""
Initialize a new check.
Expand All @@ -275,7 +277,6 @@ def __init__(self, name, init_config, agentConfig, instances=None):
"""
from aggregator import MetricsAggregator


self.name = name
self.init_config = init_config
self.agentConfig = agentConfig
Expand Down
3 changes: 2 additions & 1 deletion checks/check_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ class CheckStatus(object):
def __init__(self, check_name, instance_statuses, metric_count=None,
event_count=None, service_check_count=None,
init_failed_error=None, init_failed_traceback=None,
library_versions=None):
library_versions=None, source_type_name=None):
self.name = check_name
self.source_type_name = source_type_name
self.instance_statuses = instance_statuses
self.metric_count = metric_count or 0
self.event_count = event_count or 0
Expand Down
23 changes: 20 additions & 3 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ def run(self, checksd=None, start_event=True):
try:
# Run the check.
instance_statuses = check.run()

# Collect the metrics and events.
current_check_metrics = check.get_metrics()
current_check_events = check.get_events()
Expand All @@ -281,7 +280,8 @@ def run(self, checksd=None, start_event=True):
log.exception("Error running check %s" % check.name)

check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count, service_check_count,
library_versions=check.get_library_info())
library_versions=check.get_library_info(),
source_type_name=check.SOURCE_TYPE_NAME or check.name)
check_statuses.append(check_status)

for check_name, info in self.init_failed_checks_d.iteritems():
Expand All @@ -297,6 +297,21 @@ def run(self, checksd=None, start_event=True):
payload['metrics'] = metrics
payload['events'] = events
payload['service_checks'] = service_checks

# Add agent_checks if needed
if self._should_send_metadata():
agent_checks = []
for check in check_statuses:
for instance_status in check.instance_statuses:
agent_checks.append(
(
check.name, check.source_type_name,
instance_status.instance_id,
instance_status.status, instance_status.error
)
)
payload['agent_checks'] = agent_checks

collect_duration = timer.step()

if self.os != 'windows':
Expand Down Expand Up @@ -379,7 +394,7 @@ def _build_payload(self, start_event=True):
}]

# Periodically send the host metadata.
if self._is_first_run() or self._should_send_metadata():
if self._should_send_metadata():
payload['systemStats'] = get_system_stats()
payload['meta'] = self._get_metadata()
self.metadata_cache = payload['meta']
Expand Down Expand Up @@ -427,6 +442,8 @@ def _get_metadata(self):
return metadata

def _should_send_metadata(self):
if self._is_first_run():
return True
# If the interval has passed, send the metadata again
now = time.time()
if now - self.metadata_start >= self.metadata_interval:
Expand Down

0 comments on commit 6780b3f

Please sign in to comment.