Skip to content

Commit

Permalink
Refactor threshold evaluator
Browse files Browse the repository at this point in the history
This change creates a base class for evaluator.

The alarm service load all evaluators.
A evaluator needs to implement abstract method of this base class.

The alarm service now check in alarm.type use the right extension to
evaluate the alarm.

The previous threshold evaluator code has been moved into the threshold
extension.

Related to blueprint alarming-logical-combination

Change-Id: If6057b7db1e894333e6e9f1edb41ab75bc2c4444
  • Loading branch information
Mehdi Abaakouk committed Sep 19, 2013
1 parent 48c85f7 commit 42f02ab
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 219 deletions.
78 changes: 78 additions & 0 deletions ceilometer/alarm/evaluator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- encoding: utf-8 -*-
#
# Copyright © 2013 eNovance <licensing@enovance.com>
#
# Authors: Mehdi Abaakouk <mehdi.abaakouk@enovance.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


import abc

from oslo.config import cfg

from ceilometerclient import client as ceiloclient

from ceilometer.openstack.common import log
from ceilometer.openstack.common.gettextutils import _

LOG = log.getLogger(__name__)


class Evaluator(object):
"""Base class for alarm rule evaluator plugins."""

__metaclass__ = abc.ABCMeta

def __init__(self, notifier):
self.notifier = notifier
self.api_client = None

@property
def _client(self):
"""Construct or reuse an authenticated API client."""
if not self.api_client:
auth_config = cfg.CONF.service_credentials
creds = dict(
os_auth_url=auth_config.os_auth_url,
os_tenant_name=auth_config.os_tenant_name,
os_password=auth_config.os_password,
os_username=auth_config.os_username,
cacert=auth_config.os_cacert,
endpoint_type=auth_config.os_endpoint_type,
)
self.api_client = ceiloclient.get_client(2, **creds)
return self.api_client

def _refresh(self, alarm, state, reason):
"""Refresh alarm state."""
try:
previous = alarm.state
if previous != state:
LOG.info(_('alarm %(id)s transitioning to %(state)s because '
'%(reason)s') % {'id': alarm.alarm_id,
'state': state,
'reason': reason})

self._client.alarms.update(alarm.alarm_id, **dict(state=state))
alarm.state = state
if self.notifier:
self.notifier.notify(alarm, previous, reason)
except Exception:
# retry will occur naturally on the next evaluation
# cycle (unless alarm state reverts in the meantime)
LOG.exception(_('alarm state update failed'))

@abc.abstractmethod
def evaluate(self, alarm):
pass
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright © 2013 Red Hat, Inc
#
# Author: Eoghan Glynn <eglynn@redhat.com>
# Author: Mehdi Abaakouk <mehdi.abaakouk@enovance.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
Expand All @@ -19,11 +20,9 @@
import datetime
import operator

from oslo.config import cfg

from ceilometer.alarm import evaluator
from ceilometer.openstack.common import log
from ceilometer.openstack.common import timeutils
from ceilometerclient import client as ceiloclient
from ceilometer.openstack.common.gettextutils import _

LOG = log.getLogger(__name__)
Expand All @@ -42,10 +41,7 @@
ALARM = 'alarm'


class Evaluator(object):
"""This class implements the basic alarm threshold evaluation
logic.
"""
class ThresholdEvaluator(evaluator.Evaluator):

# the sliding evaluation window is extended to allow
# for reporting/ingestion lag
Expand All @@ -55,31 +51,6 @@ class Evaluator(object):
# avoid unknown state
quorum = 1

def __init__(self, notifier=None):
self.alarms = []
self.notifier = notifier
self.api_client = None

def assign_alarms(self, alarms):
"""Assign alarms to be evaluated."""
self.alarms = alarms

@property
def _client(self):
"""Construct or reuse an authenticated API client."""
if not self.api_client:
auth_config = cfg.CONF.service_credentials
creds = dict(
os_auth_url=auth_config.os_auth_url,
os_tenant_name=auth_config.os_tenant_name,
os_password=auth_config.os_password,
os_username=auth_config.os_username,
cacert=auth_config.os_cacert,
endpoint_type=auth_config.os_endpoint_type,
)
self.api_client = ceiloclient.get_client(2, **creds)
return self.api_client

@classmethod
def _bound_duration(cls, alarm, constraints):
"""Bound the duration of the statistics query."""
Expand Down Expand Up @@ -118,25 +89,6 @@ def _statistics(self, alarm, query):
LOG.exception(_('alarm stats retrieval failed'))
return []

def _refresh(self, alarm, state, reason):
"""Refresh alarm state."""
try:
previous = alarm.state
if previous != state:
LOG.info(_('alarm %(id)s transitioning to %(state)s because '
'%(reason)s') % {'id': alarm.alarm_id,
'state': state,
'reason': reason})

self._client.alarms.update(alarm.alarm_id, **dict(state=state))
alarm.state = state
if self.notifier:
self.notifier.notify(alarm, previous, reason)
except Exception:
# retry will occur naturally on the next evaluation
# cycle (unless alarm state reverts in the meantime)
LOG.exception(_('alarm state update failed'))

def _sufficient(self, alarm, statistics):
"""Ensure there is sufficient data for evaluation,
transitioning to unknown otherwise.
Expand Down Expand Up @@ -194,40 +146,27 @@ def _transition(self, alarm, statistics, compared):
reason = self._reason(alarm, statistics, distilled, state)
self._refresh(alarm, state, reason)

def evaluate(self):
"""Evaluate the alarms assigned to this evaluator."""

LOG.info(_('initiating evaluation cycle on %d alarms') %
len(self.alarms))

for alarm in self.alarms:

if not alarm.enabled:
LOG.debug(_('skipping alarm %s') % alarm.alarm_id)
continue
LOG.debug(_('evaluating alarm %s') % alarm.alarm_id)

query = self._bound_duration(
alarm,
alarm.rule['query']
)

statistics = self._sanitize(
alarm,
self._statistics(alarm, query)
)

if self._sufficient(alarm, statistics):

def _compare(stat):
op = COMPARATORS[alarm.rule['comparison_operator']]
value = getattr(stat, alarm.rule['statistic'])
limit = alarm.rule['threshold']
LOG.debug(_('comparing value %(value)s against threshold'
' %(limit)s') %
{'value': value, 'limit': limit})
return op(value, limit)

self._transition(alarm,
statistics,
list(map(_compare, statistics)))
def evaluate(self, alarm):
query = self._bound_duration(
alarm,
alarm.rule['query']
)

statistics = self._sanitize(
alarm,
self._statistics(alarm, query)
)

if self._sufficient(alarm, statistics):
def _compare(stat):
op = COMPARATORS[alarm.rule['comparison_operator']]
value = getattr(stat, alarm.rule['statistic'])
limit = alarm.rule['threshold']
LOG.debug(_('comparing value %(value)s against threshold'
' %(limit)s') %
{'value': value, 'limit': limit})
return op(value, limit)

self._transition(alarm,
statistics,
map(_compare, statistics))
90 changes: 53 additions & 37 deletions ceilometer/alarm/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@


OPTS = [
cfg.IntOpt('threshold_evaluation_interval',
cfg.IntOpt('evaluation_interval',
default=60,
help='Period of threshold evaluation cycle, should'
help='Period of evaluation cycle, should'
' be >= than configured pipeline interval for'
' collection of underlying metrics.'),
' collection of underlying metrics.',
deprecated_opts=[cfg.DeprecatedOpt(
'threshold_evaluation_interval', group='alarm')])
]

cfg.CONF.register_opts(OPTS, group='alarm')
Expand All @@ -49,54 +51,68 @@

class SingletonAlarmService(os_service.Service):

ALARM_NAMESPACE = 'ceilometer.alarm'
EXTENSIONS_NAMESPACE = "ceilometer.alarm.evaluator"

def __init__(self):
super(SingletonAlarmService, self).__init__()
self.extension_manager = extension.ExtensionManager(
namespace=self.ALARM_NAMESPACE,
self.api_client = None
self.evaluators = extension.ExtensionManager(
self.EXTENSIONS_NAMESPACE,
invoke_on_load=True,
invoke_args=(rpc_alarm.RPCAlarmNotifier(),)
)
invoke_args=(rpc_alarm.RPCAlarmNotifier(),))
self.supported_evaluators = [ext.name for ext in
self.evaluators.extensions]

def start(self):
super(SingletonAlarmService, self).start()
for ext in self.extension_manager.extensions:
if ext.name == 'threshold_eval':
self.threshold_eval = ext.obj
interval = cfg.CONF.alarm.threshold_evaluation_interval
args = [ext.obj, self._client()]
self.tg.add_timer(
interval,
self._evaluate_all_alarms,
0,
*args)
break
interval = cfg.CONF.alarm.evaluation_interval
self.tg.add_timer(
interval,
self._evaluate_all_alarms,
0)
# Add a dummy thread to have wait() working
self.tg.add_timer(604800, lambda: None)

@staticmethod
def _client():
auth_config = cfg.CONF.service_credentials
creds = dict(
os_auth_url=auth_config.os_auth_url,
os_tenant_name=auth_config.os_tenant_name,
os_password=auth_config.os_password,
os_username=auth_config.os_username,
cacert=auth_config.os_cacert,
endpoint_type=auth_config.os_endpoint_type,
)
return ceiloclient.get_client(2, **creds)

@staticmethod
def _evaluate_all_alarms(threshold_eval, api_client):
@property
def _client(self):
"""Construct or reuse an authenticated API client."""
if not self.api_client:
auth_config = cfg.CONF.service_credentials
creds = dict(
os_auth_url=auth_config.os_auth_url,
os_tenant_name=auth_config.os_tenant_name,
os_password=auth_config.os_password,
os_username=auth_config.os_username,
cacert=auth_config.os_cacert,
endpoint_type=auth_config.os_endpoint_type,
)
self.api_client = ceiloclient.get_client(2, **creds)
return self.api_client

def _evaluate_all_alarms(self):
try:
alarms = api_client.alarms.list()
threshold_eval.assign_alarms(alarms)
threshold_eval.evaluate()
alarms = self._client.alarms.list()
LOG.info(_('initiating evaluation cycle on %d alarms') %
len(alarms))
for alarm in alarms:
self._evaluate_alarm(alarm)
except Exception:
LOG.exception(_('threshold evaluation cycle failed'))

def _evaluate_alarm(self, alarm):
"""Evaluate the alarms assigned to this evaluator."""
if not alarm.enabled:
LOG.debug(_('skipping alarm %s: alarm disabled') %
alarm.alarm_id)
return
if alarm.type not in self.supported_evaluators:
LOG.debug(_('skipping alarm %s: type unsupported') %
alarm.alarm_id)
return

LOG.debug(_('evaluating alarm %s') % alarm.alarm_id)
self.evaluators[alarm.type].obj.evaluate(alarm)


def singleton_alarm():
prepare_service()
Expand Down
8 changes: 4 additions & 4 deletions etc/ceilometer/ceilometer.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -610,10 +610,10 @@
# Options defined in ceilometer.alarm.service
#

# Period of threshold evaluation cycle, should be >= than
# configured pipeline interval for collection of underlying
# metrics. (integer value)
#threshold_evaluation_interval=60
# Period of evaluation cycle, should be >= than configured
# pipeline interval for collection of underlying metrics.
# (integer value)
#evaluation_interval=60


#
Expand Down
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ ceilometer.publisher =
udp = ceilometer.publisher.udp:UDPPublisher
file = ceilometer.publisher.file:FilePublisher

ceilometer.alarm =
threshold_eval = ceilometer.alarm.threshold_evaluation:Evaluator
ceilometer.alarm.evaluator =
threshold = ceilometer.alarm.evaluator.threshold:ThresholdEvaluator

ceilometer.alarm.notifier =
log = ceilometer.alarm.notifier.log:LogAlarmNotifier
Expand All @@ -129,6 +129,7 @@ ceilometer.dispatcher =
database = ceilometer.collector.dispatcher.database:DatabaseDispatcher
file = ceilometer.collector.dispatcher.file:FileDispatcher


[build_sphinx]
all_files = 1
build-dir = doc/build
Expand Down
Empty file.

0 comments on commit 42f02ab

Please sign in to comment.