Skip to content

Commit

Permalink
Merge pull request #1415 from DataDog/alq666/pg-statio
Browse files Browse the repository at this point in the history
[postgres] Collect StatIO per table & revamp test
  • Loading branch information
LeoCavaille committed Apr 7, 2015
2 parents 881ead0 + ab60a13 commit c1570d8
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 199 deletions.
97 changes: 68 additions & 29 deletions checks.d/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class PostgreSql(AgentCheck):
('schemaname', 'schema')
],
'metrics': {
'pg_stat_user_tables': ('postgresql.total_tables', GAUGE),
'pg_stat_user_tables': ('postgresql.table.count', GAUGE),
},
'relation': False,
'query': """
Expand Down Expand Up @@ -218,6 +218,28 @@ class PostgreSql(AgentCheck):
"""
}

STATIO_METRICS = {
'descriptors': [
('relname', 'table'),
],
'metrics': {
'heap_blks_read' : ('postgresql.heap_blocks_read', RATE),
'heap_blks_hit' : ('postgresql.heap_blocks_hit', RATE),
'idx_blks_read' : ('postgresql.index_blocks_read', RATE),
'idx_blks_hit' : ('postgresql.index_blocks_hit', RATE),
'toast_blks_read' : ('postgresql.toast_blocks_read', RATE),
'toast_blks_hit' : ('postgresql.toast_blocks_hit', RATE),
'tidx_blks_read' : ('postgresql.toast_index_blocks_read', RATE),
'tidx_blks_hit' : ('postgresql.toast_index_blocks_hit', RATE),
},
'query': """
SELECT relname,
%s
FROM pg_statio_user_tables
WHERE relname = ANY(%s)""",
'relation': True,
}

def __init__(self, name, init_config, agentConfig, instances=None):
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
self.dbs = {}
Expand All @@ -227,6 +249,7 @@ def __init__(self, name, init_config, agentConfig, instances=None):
self.db_instance_metrics = []
self.db_bgw_metrics = []
self.replication_metrics = {}
self.custom_metrics = {}

def _get_version(self, key, db):
if key not in self.versions:
Expand Down Expand Up @@ -263,20 +286,18 @@ def _get_instance_metrics(self, key, db):
metrics = self.instance_metrics.get(key)

if metrics is None:

# Hack to make sure that if we have multiple instances that connect to
# the same host, port, we don't collect metrics twice
# as it will result in https://github.com/DataDog/dd-agent/issues/1211
sub_key = key[:2]
if sub_key in self.db_instance_metrics:
self.instance_metrics[key] = {}
self.log.debug("Not collecting instance metrics for key: {0} as"\
self.instance_metrics[key] = None
self.log.debug("Not collecting instance metrics for key: {0} as"
" they are already collected by another instance".format(key))
return {}
return None

self.db_instance_metrics.append(sub_key)


if self._is_9_2_or_above(key, db):
self.instance_metrics[key] = dict(self.COMMON_METRICS, **self.NEWER_92_METRICS)
else:
Expand All @@ -293,16 +314,15 @@ def _get_bgw_metrics(self, key, db):
metrics = self.bgw_metrics.get(key)

if metrics is None:

# Hack to make sure that if we have multiple instances that connect to
# the same host, port, we don't collect metrics twice
# as it will result in https://github.com/DataDog/dd-agent/issues/1211
sub_key = key[:2]
if sub_key in self.db_bgw_metrics:
self.bgw_metrics[key] = {}
self.log.debug("Not collecting bgw metrics for key: {0} as"\
self.bgw_metrics[key] = None
self.log.debug("Not collecting bgw metrics for key: {0} as"
" they are already collected by another instance".format(key))
return {}
return None

self.db_bgw_metrics.append(sub_key)

Expand Down Expand Up @@ -334,26 +354,38 @@ def _collect_stats(self, key, db, instance_tags, relations, custom_metrics):
If custom_metrics is not an empty list, gather custom metrics defined in postgres.yaml
"""

self.DB_METRICS['metrics'] = self._get_instance_metrics(key, db)
self.BGW_METRICS['metrics'] = self._get_bgw_metrics(key, db)
metric_scope = [
self.DB_METRICS,
self.CONNECTION_METRICS,
self.BGW_METRICS,
self.LOCK_METRICS,
self.COUNT_METRICS
self.COUNT_METRICS,
]

# These are added only once per PG server, thus the test
db_instance_metrics = self._get_instance_metrics(key, db)
bgw_instance_metrics = self._get_bgw_metrics(key, db)

if db_instance_metrics is not None:
# FIXME: constants shouldn't be modified
self.DB_METRICS['metrics'] = db_instance_metrics
metric_scope.append(self.DB_METRICS)

if bgw_instance_metrics is not None:
# FIXME: constants shouldn't be modified
self.BGW_METRICS['metrics'] = bgw_instance_metrics
metric_scope.append(self.BGW_METRICS)

# Do we need relation-specific metrics?
if relations:
metric_scope += [
self.REL_METRICS,
self.IDX_METRICS,
self.SIZE_METRICS
self.SIZE_METRICS,
self.STATIO_METRICS
]

replication_metrics = self._get_replication_metrics(key, db)
if replication_metrics is not None:
# FIXME: constants shouldn't be modified
self.REPLICATION_METRICS['metrics'] = replication_metrics
metric_scope.append(self.REPLICATION_METRICS)

Expand All @@ -364,10 +396,8 @@ def _collect_stats(self, key, db, instance_tags, relations, custom_metrics):
for scope in full_metric_scope:
if scope == self.REPLICATION_METRICS or not self._is_above(key, db, [9,0,0]):
log_func = self.log.debug
warning_func = self.log.debug
else:
log_func = self.log.warning
warning_func = self.warning

# build query
cols = scope['metrics'].keys() # list of metrics to query, in some order
Expand All @@ -394,10 +424,11 @@ def _collect_stats(self, key, db, instance_tags, relations, custom_metrics):

if scope in custom_metrics and len(results) > MAX_CUSTOM_RESULTS:
self.warning(
"Query: {0} returned more than {1} results ({2})Truncating").format(
"Query: {0} returned more than {1} results ({2}). Truncating").format(
query, MAX_CUSTOM_RESULTS, len(results))
results = results[:MAX_CUSTOM_RESULTS]

# FIXME this cramps my style
if scope == self.DB_METRICS:
self.gauge("postgresql.db.count", len(results),
tags=[t for t in instance_tags if not t.startswith("db:")])
Expand Down Expand Up @@ -482,7 +513,12 @@ def get_connection(self, key, host, port, user, password, dbname, use_cached=Tru
self.dbs[key] = connection
return connection

def _process_customer_metrics(self,custom_metrics):
def _get_custom_metrics(self, custom_metrics, key):
# Pre-processed cached custom_metrics
if key in self.custom_metrics:
return self.custom_metrics[key]

# Otherwise pre-process custom metrics and verify definition
required_parameters = ("descriptors", "metrics", "query", "relation")

for m in custom_metrics:
Expand All @@ -493,14 +529,17 @@ def _process_customer_metrics(self,custom_metrics):

self.log.debug("Metric: {0}".format(m))

for k, v in m['metrics'].items():
if v[1].upper() not in ['RATE','GAUGE','MONOTONIC']:
raise CheckException("Collector method {0} is not known."\
"Known methods are RATE,GAUGE,MONOTONIC".format(
v[1].upper()))
for ref, (_, mtype) in m['metrics'].iteritems():
cap_mtype = mtype.upper()
if cap_mtype not in ('RATE', 'GAUGE', 'MONOTONIC'):
raise CheckException("Collector method {0} is not known."
" Known methods are RATE, GAUGE, MONOTONIC".format(cap_mtype))

m['metrics'][k][1] = getattr(PostgreSql, v[1].upper())
self.log.debug("Method: %s" % (str(v[1])))
m['metrics'][ref][1] = getattr(PostgreSql, cap_mtype)
self.log.debug("Method: %s" % (str(mtype)))

self.custom_metrics[key] = custom_metrics
return custom_metrics

def check(self, instance):
host = instance.get('host', '')
Expand All @@ -510,8 +549,6 @@ def check(self, instance):
tags = instance.get('tags', [])
dbname = instance.get('dbname', None)
relations = instance.get('relations', [])
custom_metrics = instance.get('custom_metrics') or []
self._process_customer_metrics(custom_metrics)

if relations and not dbname:
self.warning('"dbname" parameter must be set when using the "relations" parameter.')
Expand All @@ -521,6 +558,8 @@ def check(self, instance):

key = (host, port, dbname)

custom_metrics = self._get_custom_metrics(instance.get('custom_metrics', []), key)

# Clean up tags in case there was a None entry in the instance
# e.g. if the yaml contains tags: but no actual tags
if tags is None:
Expand Down
21 changes: 4 additions & 17 deletions ci/postgres.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,13 @@ def pg_rootdir
sleep_for 5
sh %(#{pg_rootdir}/bin/psql\
-p 15432 -U $USER\
-c "CREATE USER datadog WITH PASSWORD 'datadog'"\
postgres)
postgres < $TRAVIS_BUILD_DIR/ci/resources/postgres/postgres.sql)
sh %(#{pg_rootdir}/bin/psql\
-p 15432 -U $USER\
-c "GRANT SELECT ON pg_stat_database TO datadog"\
postgres)
sh %(#{pg_rootdir}/bin/psql\
-p 15432 -U $USER\
-c "CREATE DATABASE datadog_test"\
postgres)
sh %(#{pg_rootdir}/bin/psql\
-p 15432 -U $USER\
-c "GRANT ALL PRIVILEGES ON DATABASE datadog_test TO datadog"\
postgres)
-p 15432 -U datadog\
datadog_test < $TRAVIS_BUILD_DIR/ci/resources/postgres/datadog_test.sql)
sh %(#{pg_rootdir}/bin/psql\
-p 15432 -U datadog\
-c "CREATE TABLE persons (personid INT, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255))"\
datadog_test)
# For pg_stat_user_table to return stuff
sleep_for 5
dogs < $TRAVIS_BUILD_DIR/ci/resources/postgres/dogs.sql)
end

task :script => ['ci:common:script'] do
Expand Down
5 changes: 5 additions & 0 deletions ci/resources/postgres/datadog_test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE persons (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255));
INSERT INTO persons (lastname, firstname, address, city) VALUES ('Cavaille', 'Leo', 'Midtown', 'New York'), ('Someveryveryveryveryveryveryveryveryveryverylongname', 'something', 'Avenue des Champs Elysees', 'Beautiful city of lights');
SELECT * FROM persons;
SELECT * FROM persons;
SELECT * FROM persons;
7 changes: 7 additions & 0 deletions ci/resources/postgres/dogs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE TABLE breed (id SERIAL, name VARCHAR(255));
CREATE TABLE kennel (id SERIAL, address VARCHAR(255));
INSERT INTO kennel (address) VALUES ('Midtown, New York'), ('Boston');
SELECT * FROM kennel;
CREATE INDEX breed_names ON breed(name);
INSERT INTO breed (name) VALUES ('Labrador Retriver'), ('German Shepherd'), ('Yorkshire Terrier'), ('Golden Retriever'), ('Bulldog');
SELECT * FROM breed WHERE name = 'Labrador';
5 changes: 5 additions & 0 deletions ci/resources/postgres/postgres.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE USER datadog WITH PASSWORD 'datadog';
GRANT SELECT ON pg_stat_database TO datadog;
CREATE DATABASE datadog_test;
GRANT ALL PRIVILEGES ON DATABASE datadog_test TO datadog;
CREATE DATABASE dogs;
6 changes: 4 additions & 2 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def assertMetricTagPrefix(self, metric_name, tag_prefix, count=None, at_least=1)
.format(tag_prefix, metric_name))
if count is not None:
log.debug(" * should have exactly {0} data points".format(count))
if at_least is not None:
elif at_least is not None:
log.debug(" * should have at least {0} data points".format(at_least))

candidates = []
Expand Down Expand Up @@ -316,7 +316,7 @@ def assertMetricTag(self, metric_name, tag, count=None, at_least=1):
log.debug("Looking for tag {0} on metric {1}".format(tag, metric_name))
if count is not None:
log.debug(" * should have exactly {0} data points".format(count))
if at_least is not None:
elif at_least is not None:
log.debug(" * should have at least {0} data points".format(at_least))

candidates = []
Expand Down Expand Up @@ -350,6 +350,8 @@ def assertServiceCheck(self, service_check_name, status=None, tags=None,
log.debug(" * tagged with {0}".format(tags))
if count is not None:
log.debug(" * should have exactly {0} statuses".format(count))
elif at_least is not None:
log.debug(" * should have at least {0} statuses".format(count))
candidates = []
for sc in self.service_checks:
if sc['check'] == service_check_name:
Expand Down

0 comments on commit c1570d8

Please sign in to comment.