-
Notifications
You must be signed in to change notification settings - Fork 815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Marathon and Mesos svc #1205
Marathon and Mesos svc #1205
Changes from 4 commits
6c68400
a69cd62
f98583c
78b6b5b
9f0daf6
268e77d
198eb01
f38f7a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,9 @@ | |
import requests | ||
|
||
class Marathon(AgentCheck): | ||
|
||
check_name = "marathon.can_connect" | ||
|
||
def check(self, instance): | ||
if 'url' not in instance: | ||
raise Exception('Marathon instance missing "url" value.') | ||
|
@@ -28,35 +31,61 @@ def check(self, instance): | |
for app in response['apps']: | ||
tags = ['app_id:' + app['id'], 'version:' + app['version']] + instance_tags | ||
for attr in ['taskRateLimit','instances','cpus','mem','tasksStaged','tasksRunning']: | ||
self.gauge('marathon.' + attr, app[attr], tags=tags) | ||
if hasattr(app, attr): | ||
self.gauge('marathon.' + attr, app[attr], tags=tags) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we log something in debug if the attribute is not present ? (See comments in that PR: #1228 ) |
||
else: | ||
self.warning('Marathon application (id: %s) has no attribute %s' % (app['id'], attr)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that should be self.log.debug as it can be expected in some cases depending on the version of marathon. |
||
versions_reply = self.get_v2_app_versions(url, app['id'], timeout) | ||
if versions_reply is not None: | ||
self.gauge('marathon.versions', len(versions_reply['versions']), tags=tags) | ||
|
||
def get_v2_apps(self, url, timeout): | ||
# Use a hash of the URL as an aggregation key | ||
aggregation_key = md5(url).hexdigest() | ||
tags = ['url:%s' % url] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may also want the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we ? We don't for other service checks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on what Remi said. We don't use custom tags in service checks for now. They should all share the same keys in the list of key:val tags |
||
msg = None | ||
status = None | ||
try: | ||
r = requests.get(url + "/v2/apps", timeout=timeout) | ||
if r.status_code != 200: | ||
self.status_code_event(url, r, aggregation_key) | ||
status = AgentCheck.CRITICAL | ||
msg = "Got %s when hitting %s" % (r.status_code, url) | ||
except requests.exceptions.Timeout as e: | ||
# If there's a timeout | ||
self.timeout_event(url, timeout, aggregation_key) | ||
raise Exception("Timeout when hitting %s" % url) | ||
msg = "%s seconds timeout when hitting %s" % (timeout, url) | ||
status = AgentCheck.CRITICAL | ||
except Exception as e: | ||
msg = e.message | ||
status = AgentCheck.CRITICAL | ||
finally: | ||
if status is AgentCheck.CRITICAL: | ||
self.service_check(self.check_name, status, tags=tags, message=msg) | ||
raise Exception(msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for do both |
||
|
||
if r.status_code != 200: | ||
self.status_code_event(url, r, aggregation_key) | ||
raise Exception("Got %s when hitting %s" % (r.status_code, url)) | ||
req_json = r.json() | ||
app_count = len(req_json['apps']) | ||
|
||
# Condition for request v1.x backward compatibility | ||
if hasattr(r.json, '__call__'): | ||
return r.json() | ||
if app_count is 0: | ||
status = AgentCheck.WARN | ||
msg = "No marathon applications detected at %s" % url | ||
else: | ||
return r.json | ||
instance_count = 0 | ||
for app in req_json['apps']: | ||
instance_count += app['instances'] | ||
status = AgentCheck.CRITICAL if instance_count is 0 else AgentCheck.OK | ||
msg = "%s Marathon app(s) detected with %s instances running at %s" % (app_count, instance_count, url) | ||
|
||
|
||
self.service_check(self.check_name, status, tags=tags, message=msg) | ||
|
||
return req_json | ||
|
||
def get_v2_app_versions(self, url, app_id, timeout): | ||
# Use a hash of the URL as an aggregation key | ||
aggregation_key = md5(url).hexdigest() | ||
|
||
try: | ||
r = requests.get(url + "/v2/apps/" + app_id + "/versions", timeout=timeout) | ||
except requests.exceptions.Timeout as e: | ||
|
@@ -68,7 +97,6 @@ def get_v2_app_versions(self, url, app_id, timeout): | |
if r.status_code != 200: | ||
self.status_code_event(url, r, aggregation_key) | ||
self.warning("Got %s when hitting %s" % (r.status_code, url)) | ||
return None | ||
|
||
return r.json() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
import requests | ||
|
||
class Mesos(AgentCheck): | ||
check_name = "mesos.can_connect" | ||
|
||
def check(self, instance): | ||
if 'url' not in instance: | ||
raise Exception('Mesos instance missing "url" value.') | ||
|
@@ -71,25 +73,33 @@ def get_master_state(self, url, timeout): | |
def get_json(self, url, timeout): | ||
# Use a hash of the URL as an aggregation key | ||
aggregation_key = md5(url).hexdigest() | ||
tags = ["url:%s" % url] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here with the concat of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment. We don't send custom tags in service checks for other checks. |
||
msg = None | ||
status = None | ||
try: | ||
r = requests.get(url, timeout=timeout) | ||
if r.status_code != 200: | ||
self.status_code_event(url, r, aggregation_key) | ||
status = AgentCheck.CRITICAL | ||
msg = "Got %s when hitting %s" % (r.status_code, url) | ||
else: | ||
status = AgentCheck.OK | ||
msg = "Mesos master instance detected at %s " % url | ||
except requests.exceptions.Timeout as e: | ||
# If there's a timeout | ||
self.timeout_event(url, timeout, aggregation_key) | ||
self.warning("Timeout when hitting %s" % url) | ||
return None | ||
|
||
if r.status_code != 200: | ||
self.status_code_event(url, r, aggregation_key) | ||
self.warning("Got %s when hitting %s" % (r.status_code, url)) | ||
return None | ||
|
||
# Condition for request v1.x backward compatibility | ||
if hasattr(r.json, '__call__'): | ||
return r.json() | ||
else: | ||
return r.json | ||
|
||
msg = "%s seconds timeout when hitting %s" % (timeout, url) | ||
status = AgentCheck.CRITICAL | ||
except Exception as e: | ||
msg = e.message | ||
status = AgentCheck.CRITICAL | ||
finally: | ||
self.service_check(self.check_name, status, tags=tags, message=msg) | ||
if status is AgentCheck.CRITICAL: | ||
self.warning(msg) | ||
return None | ||
|
||
return r.json() | ||
|
||
def timeout_event(self, url, timeout, aggregation_key): | ||
self.event({ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably be upper case (https://www.python.org/dev/peps/pep-0008/#constants)
I'll fix it.