Skip to content

Commit

Permalink
For #962 - allow to get metrics about CPU and memory of the Alignak d…
Browse files Browse the repository at this point in the history
…aemons:

- add an information log
- send metrics to the StatsD if Alignak is configured for

Define an environment variable ALIGNAK_DAEMONS_MONITORING with the number of daemon loop iteration where a log and statsd sending should take place.
  • Loading branch information
mohierf committed Nov 26, 2017
1 parent 5d1578f commit deea52d
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 6 deletions.
50 changes: 50 additions & 0 deletions alignak/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ def get_all_groups():
from alignak.misc.common import setproctitle
from alignak.version import VERSION

if 'ALIGNAK_DAEMONS_MONITORING' in os.environ:
import psutil


# Friendly names for the system signals
SIGNALS_TO_NAMES_DICT = dict((k, v) for v, k in reversed(sorted(signal.__dict__.items()))
if v.startswith('SIG') and not v.startswith('SIG_'))
Expand Down Expand Up @@ -255,6 +259,20 @@ def __init__(self, name, config_file, is_daemon, do_replace,
self.interrupted = False
self.pidfile = "%s.pid" % self.name

# Self daemon monitoring (cpu, memory)
self.daemon_monitoring = False
self.daemon_monitoring_period = 10
if 'ALIGNAK_DAEMONS_MONITORING' in os.environ:
self.daemon_monitoring = True
if os.environ['ALIGNAK_DAEMONS_MONITORING']:
try:
self.daemon_monitoring_period = int(os.environ['ALIGNAK_DAEMONS_MONITORING'])
except ValueError:
pass
if self.daemon_monitoring:
logger.info("Daemon '%s' self monitoring is enabled, reporting every %d loop count.",
self.name, self.daemon_monitoring_period)

if port:
self.port = int(port)
print("Daemon '%s' is started with an overidden port number: %d"
Expand Down Expand Up @@ -418,6 +436,38 @@ def do_mainloop(self):
logger.debug('Ask for configuration reloading')
return

if self.daemon_monitoring and (self.loop_count % self.daemon_monitoring_period == 1):
perfdatas = []
my_process = psutil.Process()
with my_process.oneshot():
perfdatas.append("num_threads=%d" % my_process.num_threads())
statsmgr.counter("num_threads", my_process.num_threads())
# perfdatas.append("num_ctx_switches=%d" % my_process.num_ctx_switches())
perfdatas.append("num_fds=%d" % my_process.num_fds())
# perfdatas.append("num_handles=%d" % my_process.num_handles())
perfdatas.append("create_time=%d" % my_process.create_time())
perfdatas.append("cpu_num=%d" % my_process.cpu_num())
statsmgr.counter("cpu_num", my_process.cpu_num())
perfdatas.append("cpu_usable=%d" % len(my_process.cpu_affinity()))
statsmgr.counter("cpu_usable", len(my_process.cpu_affinity()))
perfdatas.append("cpu_percent=%.2f%%" % my_process.cpu_percent())
statsmgr.counter("cpu_percent", my_process.cpu_percent())

cpu_times_percent = my_process.cpu_times()
for key in cpu_times_percent._fields:
perfdatas.append("cpu_%s_time=%.2fs" % (key,
getattr(cpu_times_percent, key)))
statsmgr.counter("cpu_%s_time" % key, getattr(cpu_times_percent, key))

memory = my_process.memory_full_info()
for key in memory._fields:
perfdatas.append("mem_%s=%db" % (key, getattr(memory, key)))
statsmgr.counter("mem_%s" % key, getattr(memory, key))

logger.debug("Daemon %s (%s), pid=%s, ppid=%s, status=%s, cpu/memory|%s",
self.name, my_process.name(), my_process.pid, my_process.ppid(),
my_process.status(), " ".join(perfdatas))

if self.log_loop:
logger.debug("[%s] +++ %d", self.name, self.loop_count)
# Maybe we ask us to die, if so, do it :)
Expand Down
2 changes: 1 addition & 1 deletion alignak/objects/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2697,7 +2697,7 @@ def cut_into_parts(self): # pylint: disable=R0912,R0914
cur_conf.contacts = self.contacts
cur_conf.triggers = self.triggers
cur_conf.escalations = self.escalations
# Create hostgroups with just the name and same id, but no members
# Create servicegroups with just the name and same id, but no members
new_servicegroups = []
for servicegroup in self.servicegroups:
new_servicegroups.append(servicegroup.copy_shell())
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ numpy>=1.9.0; python_version >= '2.7'
pyopenssl>=0.15
docopt

# Use psutil for daemons memory monitoring (env ALIGNAK_DAEMONS_MONITORING)
# Use psutil for scheduler TEST_LOG_MONITORING
psutil
3 changes: 3 additions & 0 deletions test_run/test_launch_daemons.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ def _run_daemons_and_test_api(self, ssl=False):
"""
req = requests.Session()

# Set an environment variable to activate the logging of system cpu, memory and disk
os.environ['ALIGNAK_DAEMONS_MONITORING'] = '2'

cfg_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'run/test_launch_daemons')

Expand Down
10 changes: 5 additions & 5 deletions test_run/test_launch_daemons_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def test_daemons_modules_logs_restart_module(self):
"[alignak.modulesmanager] Module properties: {'daemons': ['broker'], 'phases': ['running'], 'type': 'logs', 'external': True}",
"[alignak.modulesmanager] Imported 'alignak_module_logs' for logs",
"[alignak.modulesmanager] Loaded Python module 'alignak_module_logs' (logs)",
"[alignak.module] Give an instance of alignak_module_logs for alias: logs",
# "[alignak.module] Give an instance of alignak_module_logs for alias: logs",
"[alignak.module.logs] logger default configuration:",
"[alignak.module.logs] - rotating logs in /tmp/monitoring-logs.log",
"[alignak.module.logs] - log level: 20",
Expand Down Expand Up @@ -471,7 +471,7 @@ def test_daemons_modules_ws(self):
"[alignak.modulesmanager] Module properties: {'daemons': ['receiver'], 'phases': ['running'], 'type': 'web-services', 'external': True}",
"[alignak.modulesmanager] Imported 'alignak_module_ws' for web-services",
"[alignak.modulesmanager] Loaded Python module 'alignak_module_ws' (web-services)",
"[alignak.module] Give an instance of alignak_module_ws for alias: web-services",
# "[alignak.module] Give an instance of alignak_module_ws for alias: web-services",
"[alignak.module.web-services] Alignak host creation allowed: False",
"[alignak.module.web-services] Alignak service creation allowed: False",
"[alignak.module.web-services] Alignak external commands, set timestamp: True",
Expand Down Expand Up @@ -569,7 +569,7 @@ def test_daemons_modules_backend(self):
"[alignak.modulesmanager] Module properties: {'daemons': ['arbiter'], 'phases': ['configuration'], 'type': 'backend_arbiter', 'external': False}",
"[alignak.modulesmanager] Imported 'alignak_module_backend.arbiter' for backend_arbiter",
"[alignak.modulesmanager] Loaded Python module 'alignak_module_backend.arbiter' (backend_arbiter)",
"[alignak.module] Give an instance of alignak_module_backend.arbiter for alias: backend_arbiter",
# "[alignak.module] Give an instance of alignak_module_backend.arbiter for alias: backend_arbiter",
"[alignak.module.backend_arbiter] Number of processes used by backend client: 1",
"[alignak.module.backend_arbiter] Alignak backend is not available for login. No backend connection, attempt: 1",
"[alignak.module.backend_arbiter] Alignak backend is not available for login. No backend connection, attempt: 2",
Expand All @@ -589,7 +589,7 @@ def test_daemons_modules_backend(self):
"[alignak.modulesmanager] Module properties: {'daemons': ['broker'], 'type': 'backend_broker', 'external': True}",
"[alignak.modulesmanager] Imported 'alignak_module_backend.broker' for backend_broker",
"[alignak.modulesmanager] Loaded Python module 'alignak_module_backend.broker' (backend_broker)",
"[alignak.module] Give an instance of alignak_module_backend.broker for alias: backend_broker",
# "[alignak.module] Give an instance of alignak_module_backend.broker for alias: backend_broker",
"[alignak.module.backend_broker] Number of processes used by backend client: 1",
"[alignak.module.backend_broker] Alignak backend is not available for login. No backend connection, attempt: 1",
"[alignak.module.backend_broker] Alignak backend connection is not available. Checking if livestate update is allowed is not possible.",
Expand All @@ -601,7 +601,7 @@ def test_daemons_modules_backend(self):
"[alignak.modulesmanager] Module properties: {'daemons': ['scheduler'], 'phases': ['running'], 'type': 'backend_scheduler', 'external': False}",
"[alignak.modulesmanager] Imported 'alignak_module_backend.scheduler' for backend_scheduler",
"[alignak.modulesmanager] Loaded Python module 'alignak_module_backend.scheduler' (backend_scheduler)",
"[alignak.module] Give an instance of alignak_module_backend.scheduler for alias: backend_scheduler",
# "[alignak.module] Give an instance of alignak_module_backend.scheduler for alias: backend_scheduler",
"[alignak.module.backend_scheduler] Number of processes used by backend client: 1",
"[alignak.module.backend_scheduler] Alignak backend is not available for login. No backend connection, attempt: 1",
"[alignak.modulesmanager] Trying to initialize module: backend_scheduler",
Expand Down
1 change: 1 addition & 0 deletions test_run/test_launch_daemons_realms_and_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def test_correct_checks_launch_and_result(self):
# Set an environment variable to activate the logging of checks execution
# With this the pollers/schedulers will raise WARNING logs about the checks execution
os.environ['TEST_LOG_ACTIONS'] = 'INFO'
os.environ['ALIGNAK_DAEMONS_MONITORING'] = '2'

# Run daemons for 2 minutes
self.run_and_check_alignak_daemons(120)
Expand Down

0 comments on commit deea52d

Please sign in to comment.