Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add service check for gunicorn #1163

Merged
merged 4 commits into from
Dec 10, 2014
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions checks.d/gunicorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class GUnicornCheck(AgentCheck):
# Worker state tags.
IDLE_TAGS = ["state:idle"]
WORKING_TAGS = ["state:working"]
SVC_NAME = "gunicorn.is_running"

def get_library_versions(self):
return {"psutil": psutil.__version__}
Expand All @@ -45,6 +46,17 @@ def check(self, instance):
worker_procs = master_proc.get_children()
working, idle = self._count_workers(worker_procs)

# if no workers are running, alert critical

msg = "%s working and %s idle workers for %s" % (working, idle, proc_name)

if working == 0 and idle == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not clean this up a bit?

status = CRIT if working == 0 and idle == 0 else OK
 self.service_check(self.SVC_NAME, status, tags=['app:'+ proc_name], message=msg)

it makes the intent clearer. always submit a value, just with a different status and less duplicated code.

self.service_check(self.SVC_NAME, AgentCheck.CRITICAL, tags=['app:'+ proc_name],
message=msg)
else:
self.service_check(self.SVC_NAME, AgentCheck.OK, tags=['app:'+ proc_name],
message=msg)

# Submit the data.
self.log.debug("instance %s procs - working:%s idle:%s" % (proc_name, working, idle))
self.gauge("gunicorn.workers", working, self.WORKING_TAGS)
Expand All @@ -66,17 +78,16 @@ def _count_workers(self, worker_procs):
except psutil.NoSuchProcess:
self.warning('Process %s disappeared while scanning' % proc.name)
continue

# Let them do a little bit more work.
time.sleep(self.CPU_SLEEP_SECS)

# Processes which have used more CPU are considered active (this is a very
# Processes which have used more CPU are considered active (this is a very
# naive check, but gunicorn exposes no stats API)
for proc in worker_procs:
if proc.pid not in cpu_time_by_pid:
# The process is not running anymore, we didn't collect initial cpu times
continue

try:
cpu_time = sum(proc.get_cpu_times())
except Exception:
Expand All @@ -95,6 +106,9 @@ def _get_master_proc_by_name(self, name):
master_name = GUnicornCheck._get_master_proc_name(name)
master_procs = [p for p in psutil.process_iter() if p.cmdline() and p.cmdline()[0] == master_name]
if len(master_procs) == 0:
# process not found, it's dead.
self.service_check(self.SVC_NAME, AgentCheck.CRITICAL, tags=['app:'+ name],
message="No gunicorn process with name %s found" % name)
raise GUnicornCheckError("Found no master process with name: %s" % master_name)
elif len(master_procs) > 1:
raise GUnicornCheckError("Found more than one master process with name: %s" % master_name)
Expand All @@ -113,4 +127,3 @@ def _get_master_proc_name(name):

class GUnicornCheckError(Exception):
pass