diff --git a/checks.d/docker.py b/checks.d/docker.py index ca66907da8..86af4f0c55 100644 --- a/checks.d/docker.py +++ b/checks.d/docker.py @@ -133,11 +133,11 @@ def check(self, instance): containers, ids_to_names = self._get_and_count_containers(instance) # Report container metrics from cgroups - self._report_containers_metrics(containers, instance) + skipped_container_ids = self._report_containers_metrics(containers, instance) # Send events from Docker API if instance.get('collect_events', True): - self._process_events(instance, ids_to_names) + self._process_events(instance, ids_to_names, skipped_container_ids) # Containers @@ -188,23 +188,39 @@ def _get_and_count_containers(self, instance): return running_containers, ids_to_names - def _is_container_included(self, instance, tags): - def _is_tag_included(tag): - for exclude_rule in instance.get("exclude") or []: - if re.match(exclude_rule, tag): - for include_rule in instance.get("include") or []: - if re.match(include_rule, tag): - return True - return False + def _prepare_filters(self, instance): + # The reasoning is to check exclude first, so we can skip if there is no exclude + if not instance.get("exclude"): + return False + + # Compile regex + instance["exclude_patterns"] = [re.compile(rule) for rule in instance.get("exclude", [])] + instance["include_patterns"] = [re.compile(rule) for rule in instance.get("include", [])] + + return True + + def _is_container_excluded(self, instance, tags): + if self._tags_match_patterns(tags, instance.get("exclude_patterns")): + if self._tags_match_patterns(tags, instance.get("include_patterns")): + return False return True - for tag in tags: - if _is_tag_included(tag): - return True + return False + + def _tags_match_patterns(self, tags, filters): + for rule in filters: + for tag in tags: + if re.match(rule, tag): + return True return False def _report_containers_metrics(self, containers, instance): + skipped_container_ids = [] collect_uncommon_metrics = instance.get("collect_all_metrics", False) tags = instance.get("tags", []) + + # Pre-compile regex to include/exclude containers + use_filters = self._prepare_filters(instance) + for container in containers: container_tags = list(tags) for name in container["Names"]: @@ -215,7 +231,8 @@ def _report_containers_metrics(self, containers, instance): container_tags.append(tag) # Check if the container is included/excluded via its tags - if not self._is_container_included(instance, container_tags): + if use_filters and self._is_container_excluded(instance, container_tags): + skipped_container_ids.append(container['Id']) continue for key, (dd_key, metric_type) in DOCKER_METRICS.iteritems(): @@ -228,9 +245,12 @@ def _report_containers_metrics(self, containers, instance): for key, (dd_key, metric_type, common_metric) in cgroup['metrics'].iteritems(): if key in stats and (common_metric or collect_uncommon_metrics): getattr(self, metric_type)(dd_key, int(stats[key]), tags=container_tags) + if use_filters: + self.log.debug("List of excluded containers: {0}".format(skipped_container_ids)) - def _make_tag(self, key, value, instance): + return skipped_container_ids + def _make_tag(self, key, value, instance): tag_name = key.lower() if tag_name == "command" and not instance.get("tag_by_command", False): return None @@ -246,19 +266,24 @@ def _new_tags_conversion(self, tag): # Events - def _process_events(self, instance, ids_to_names): + def _process_events(self, instance, ids_to_names, skipped_container_ids): try: api_events = self._get_events(instance) - aggregated_events = self._pre_aggregate_events(api_events) + aggregated_events = self._pre_aggregate_events(api_events, skipped_container_ids) events = self._format_events(aggregated_events, ids_to_names) self._report_events(events) except (socket.timeout, urllib2.URLError): self.warning('Timeout during socket connection. Events will be missing.') - def _pre_aggregate_events(self, api_events): + def _pre_aggregate_events(self, api_events, skipped_container_ids): # Aggregate events, one per image. Put newer events first. events = defaultdict(list) for event in api_events: + # Skip events related to filtered containers + if event['id'] in skipped_container_ids: + self.log.debug("Excluded event: container {0} status changed to {1}".format( + event['id'], event['status'])) + continue # Known bug: from may be missing if 'from' in event: events[event['from']].insert(0, event) diff --git a/conf.d/docker.yaml.example b/conf.d/docker.yaml.example index b761661174..f11628537a 100644 --- a/conf.d/docker.yaml.example +++ b/conf.d/docker.yaml.example @@ -27,25 +27,26 @@ instances: # Example: # tags: ["extra_tag", "env:example"] - # To include or exclude containers based on their tags, use the include and - # exclude keys in your instance. - # The reasoning is: if a tag matches an exclude rule, it won't be included - # unless it also matches an include rule. + # Exclude containers based on their tags + # An excluded container will ne longer report performance metrics or events. However, + # we still count the number of running and stopped of all containers. # + # How it works: if a tag matches an exclude rule, it won't be included + # unless it also matches an include rule. # Examples: # # exclude all, except ubuntu and debian. # include: - # - "image:ubuntu" - # - "image:debian" + # - "docker_image:ubuntu" + # - "docker_image:debian" # exclude: # - ".*" # # include all, except ubuntu and debian. # include: [] # exclude: - # - "image:ubuntu" - # - "image:debian" + # - "docker_image:ubuntu" + # - "docker_image:debian" # # include: [] # exclude: [] @@ -55,7 +56,8 @@ instances: # collect_events: true # Collect disk usage per container with docker.disk.size metric. - # Warning: Some bugs in Docker (especially Docker 1.2) can break it, use with caution. + # Warning: This feature is broken in some version of Docker (such as 1.2). + # Test it before running it in production. # # collect_container_size: false