Skip to content

Commit

Permalink
Merge pull request #906 from Alignak-monitoring/nsca-umlaut
Browse files Browse the repository at this point in the history
Fix #905: correctly manage the output encoding for external commands
  • Loading branch information
ddurieux committed Aug 29, 2017
2 parents ff1f3b5 + 632ddba commit 7fd92a6
Show file tree
Hide file tree
Showing 2 changed files with 276 additions and 63 deletions.
156 changes: 93 additions & 63 deletions alignak/external_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2973,49 +2973,61 @@ def process_host_check_result(self, host, status_code, plugin_output):
:return: None
TODO: say that check is PASSIVE
"""
# raise a PASSIVE check only if needed
now = time.time()
cls = host.__class__

# If globally disabled OR host disabled, do not launch..
if not cls.accept_passive_checks or not host.passive_checks_enabled:
return

try:
plugin_output = plugin_output.decode('utf8', 'ignore')
logger.debug('%s > Passive host check plugin output: %s',
host.get_full_name(), plugin_output)
except UnicodeError:
pass

# Maybe the check is just too old, if so, bail out!
if self.current_timestamp < host.last_chk:
logger.warning('%s > Passive host check is too old. Ignoring...',
host.get_full_name())
return

chk = host.launch_check(now, self.hosts, self.services, self.timeperiods,
self.daemon.macromodulations, self.daemon.checkmodulations,
self.daemon.checks, force=True)
# Should not be possible to not find the check, but if so, don't crash
if not chk:
logger.error('%s > Passive host check failed. None check launched !?',
host.get_full_name())
return
# Now we 'transform the check into a result'
# So exit_status, output and status is eaten by the host
chk.exit_status = status_code
chk.get_outputs(plugin_output, host.max_plugins_output_length)
chk.status = 'waitconsume'
chk.check_time = self.current_timestamp # we are using the external command timestamps
# Set the corresponding host's check type to passive
chk.set_type_passive()
# self.daemon.nb_check_received += 1
self.send_an_element(chk)
# Ok now this result will be read by the scheduler the next loop

# raise a passive check log only if needed
if self.conf.log_passive_checks:
log_level = 'info'
if status_code == 1: # DOWN
log_level = 'error'
if status_code == 2: # UNREACHABLE
log_level = 'warning'
brok = make_monitoring_log(
log_level, 'PASSIVE HOST CHECK: %s;%d;%s'
log_level, 'PASSIVE HOST CHECK: %s;%d;%s;%s;%s'
% (host.get_name().decode('utf8', 'ignore'),
status_code, plugin_output.decode('utf8', 'ignore'))
status_code, chk.output, chk.long_output, chk.perf_data)
)
# Send a brok to our arbiter else to our scheduler
self.send_an_element(brok)

now = time.time()
cls = host.__class__
# If globally disable OR locally, do not launch
if cls.accept_passive_checks and host.passive_checks_enabled:
# Maybe the check is just too old, if so, bail out!
if self.current_timestamp < host.last_chk:
return

chk = host.launch_check(now, self.hosts, self.services, self.timeperiods,
self.daemon.macromodulations, self.daemon.checkmodulations,
self.daemon.checks, force=True)
# Should not be possible to not find the check, but if so, don't crash
if not chk:
logger.error('%s > Passive host check failed. None check launched !?',
host.get_full_name())
return
# Now we 'transform the check into a result'
# So exit_status, output and status is eaten by the host
chk.exit_status = status_code
chk.get_outputs(plugin_output, host.max_plugins_output_length)
chk.status = 'waitconsume'
chk.check_time = self.current_timestamp # we are using the external command timestamps
# Set the corresponding host's check type to passive
chk.set_type_passive()
# self.daemon.nb_check_received += 1
self.send_an_element(chk)
# Ok now this result will be read by scheduler the next loop

def process_host_output(self, host, plugin_output):
"""Process host output
Format of the line that triggers function call::
Expand Down Expand Up @@ -3044,51 +3056,69 @@ def process_service_check_result(self, service, return_code, plugin_output):
:type plugin_output: str
:return: None
"""
# raise a PASSIVE check only if needed
now = time.time()
cls = service.__class__

# If globally disabled OR service disabled, do not launch..
if not cls.accept_passive_checks or not service.passive_checks_enabled:
return

try:
plugin_output = plugin_output.decode('utf8', 'ignore')
logger.debug('%s > Passive service check plugin output: %s',
service.get_full_name(), plugin_output)
except UnicodeError:
pass

# Maybe the check is just too old, if so, bail out!
if self.current_timestamp < service.last_chk:
logger.warning('%s > Passive service check is too old. Ignoring...',
service.get_full_name())
return

# Create a check object from the external command
chk = service.launch_check(now, self.hosts, self.services, self.timeperiods,
self.daemon.macromodulations, self.daemon.checkmodulations,
self.daemon.checks, force=True)
# Should not be possible to not find the check, but if so, don't crash
if not chk:
logger.error('%s > Passive service check failed. None check launched !?',
service.get_full_name())
return

# Now we 'transform the check into a result'
# So exit_status, output and status is eaten by the service
chk.exit_status = return_code
chk.get_outputs(plugin_output, service.max_plugins_output_length)

logger.debug('%s > Passive service check output: %s',
service.get_full_name(), chk.output)

chk.status = 'waitconsume'
chk.check_time = self.current_timestamp # we are using the external command timestamps
# Set the corresponding service's check type to passive
chk.set_type_passive()
# self.daemon.nb_check_received += 1
self.send_an_element(chk)
# Ok now this result will be read by the scheduler the next loop

# raise a passive check log only if needed
if self.conf.log_passive_checks:
log_level = 'info'
if return_code == 1: # WARNING
log_level = 'warning'
if return_code == 2: # CRITICAL
log_level = 'error'
brok = make_monitoring_log(
log_level, 'PASSIVE SERVICE CHECK: %s;%s;%d;%s' % (
log_level, 'PASSIVE SERVICE CHECK: %s;%s;%d;%s;%s;%s' % (
self.hosts[service.host].get_name().decode('utf8', 'ignore'),
service.get_name().decode('utf8', 'ignore'),
return_code, plugin_output.decode('utf8', 'ignore')
return_code, chk.output, chk.long_output, chk.perf_data
)
)
# Send a brok to our arbiter else to our scheduler
# Notify the brok
self.send_an_element(brok)

now = time.time()
cls = service.__class__
# If globally disable OR locally, do not launch
if cls.accept_passive_checks and service.passive_checks_enabled:
# Maybe the check is just too old, if so, bail out!
if self.current_timestamp < service.last_chk:
return

chk = service.launch_check(now, self.hosts, self.services, self.timeperiods,
self.daemon.macromodulations, self.daemon.checkmodulations,
self.daemon.checks, force=True)
# Should not be possible to not find the check, but if so, don't crash
if not chk:
logger.error('%s > Passive service check failed. None check launched !?',
service.get_full_name())
return
# Now we 'transform the check into a result'
# So exit_status, output and status is eaten by the service
chk.exit_status = return_code
chk.get_outputs(plugin_output, service.max_plugins_output_length)
chk.status = 'waitconsume'
chk.check_time = self.current_timestamp # we are using the external command timestamps
# Set the corresponding service's check type to passive
chk.set_type_passive()
# self.daemon.nb_check_received += 1
self.send_an_element(chk)
# Ok now this result will be reap by scheduler the next loop

def process_service_output(self, service, plugin_output):
"""Process service output
Format of the line that triggers function call::
Expand Down
183 changes: 183 additions & 0 deletions test/test_monitoring_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,189 @@ def test_external_commands(self):
for log_level, log_message in expected_logs:
assert (log_level, log_message) in monitoring_logs

def test_passive_checks_host(self):
""" Test logs for external commands - passive host checks
:return:
"""
self.print_header()
self.setup_with_file('cfg/cfg_monitoring_logs.cfg')
assert self.conf_is_correct

self._sched = self.schedulers['scheduler-master'].sched

# -----------------------------
# Host part
# -----------------------------
# Get and configure host
host = self._sched.hosts.find_by_name("test_host_0")
host.checks_in_progress = []
host.act_depend_of = [] # ignore the router which we depend of
host.event_handler_enabled = False
assert host is not None

now = int(time.time())

# Receive passive host check Down
excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now
self._sched.run_external_command(excmd)
self.external_command_loop()
assert 'DOWN' == host.state
assert 'SOFT' == host.state_type
assert 'Host is dead' == host.output
excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now
self._sched.run_external_command(excmd)
self.external_command_loop()
assert 'DOWN' == host.state
assert 'SOFT' == host.state_type
assert 'Host is dead' == host.output
excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now
self._sched.run_external_command(excmd)
self.external_command_loop()
assert 'DOWN' == host.state
assert 'HARD' == host.state_type
assert 'Host is dead' == host.output

# Extract monitoring logs
monitoring_logs = []
for brok in self._sched.brokers['broker-master']['broks'].itervalues():
if brok.type == 'monitoring_log':
data = unserialize(brok.data)
monitoring_logs.append((data['level'], data['message']))
print("Log (unicode: %s): %s" % (isinstance(data['message'], unicode), data['message']))

# Passive host check log contains:
# - host name,
# - host status,
# - output,
# - performance data and
# - long output
# All are separated with a semi-colon
expected_logs = [
(u'info',
u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now),
(u'info',
u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now),
(u'info',
u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now),
(u'warning',
u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'),
(u'warning',
u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'),
(u'warning',
u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'),
(u'error',
u'HOST ALERT: test_host_0;DOWN;SOFT;1;Host is dead'),
(u'error',
u'HOST ALERT: test_host_0;DOWN;SOFT;2;Host is dead'),
(u'error',
u'HOST ALERT: test_host_0;DOWN;HARD;3;Host is dead'),
(u'error',
u'HOST NOTIFICATION: test_contact;test_host_0;DOWN;notify-host;Host is dead')
]
for log_level, log_message in expected_logs:
print("Msg: %s" % log_message)
assert (log_level, log_message) in monitoring_logs

def test_passive_checks_service(self):
""" Test logs for external commands - passive service checks
:return:
"""
self.print_header()
self.setup_with_file('cfg/cfg_monitoring_logs.cfg')
assert self.conf_is_correct

self._sched = self.schedulers['scheduler-master'].sched

now = int(time.time())

# -----------------------------
# Service part
# -----------------------------
# Get host
host = self._sched.hosts.find_by_name('test_host_0')
host.checks_in_progress = []
host.event_handler_enabled = False
host.active_checks_enabled = True
host.passive_checks_enabled = True
assert host is not None

# Get service
svc = self._sched.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0")
svc.checks_in_progress = []
svc.event_handler_enabled = False
svc.active_checks_enabled = True
svc.passive_checks_enabled = True
assert svc is not None

# Passive checks for host and service
# ---------------------------------------------
# Receive passive host check Up
excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;0;Host is UP' % time.time()
self.schedulers['scheduler-master'].sched.run_external_command(excmd)
self.external_command_loop()
assert 'UP' == host.state
assert 'Host is UP' == host.output

# Service is going ok ...
excmd = '[%d] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' \
'Service is OK|rtt=9999;5;10;0;10000' % now
self._sched.run_external_command(excmd)
self.external_command_loop()
assert 'OK' == svc.state
assert 'Service is OK' == svc.output
assert 'rtt=9999;5;10;0;10000' == svc.perf_data

# Service is going ok ... with long output
excmd = '[%d] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' \
'Service is OK and have some special characters: àéèüäï' \
'|rtt=9999;5;10;0;10000' \
'\r\nLong output... also some specials: àéèüäï' % now
self._sched.run_external_command(excmd)
self.external_command_loop()
assert 'OK' == svc.state
assert u'Service is OK and have some special characters: àéèüäï' == svc.output
assert 'rtt=9999;5;10;0;10000' == svc.perf_data
assert u'Long output... also some specials: àéèüäï' == svc.long_output

# Extract monitoring logs
monitoring_logs = []
for brok in self._sched.brokers['broker-master']['broks'].itervalues():
if brok.type == 'monitoring_log':
data = unserialize(brok.data)
monitoring_logs.append((data['level'], data['message']))
print("Log (unicode: %s): %s" % (isinstance(data['message'], unicode), data['message']))

# Passive service check log contains:
# - host name,
# - host status,
# - output,
# - performance data and
# - long output
# All are separated with a semi-colon
expected_logs = [
(u'info',
u'EXTERNAL COMMAND: [%s] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;'
u'Service is OK|rtt=9999;5;10;0;10000' % now),
(u'info',
u'PASSIVE SERVICE CHECK: test_host_0;test_ok_0;0;Service is OK;;rtt=9999;5;10;0;10000'),

(u'info',
u'EXTERNAL COMMAND: [%s] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;'
u'Service is OK and have some special characters: àéèüäï'
u'|rtt=9999;5;10;0;10000'
u'\r\nLong output... also some specials: àéèüäï' % now),
(u'info',
u'PASSIVE SERVICE CHECK: test_host_0;test_ok_0;0;'
u'Service is OK and have some special characters: àéèüäï;'
u'Long output... also some specials: àéèüäï;'
u'rtt=9999;5;10;0;10000')
]
for log_level, log_message in expected_logs:
print("Msg: %s" % log_message)
assert (log_level, log_message) in monitoring_logs

def test_special_external_commands(self):
""" Test logs for special external commands
:return:
Expand Down

0 comments on commit 7fd92a6

Please sign in to comment.