diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java index 78f88dd96e..1ad1f483ac 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java @@ -149,4 +149,5 @@ public String getTaskS3LogOmitPrefix() { public void setTaskS3LogOmitPrefix(String taskS3LogOmitPrefix) { this.taskS3LogOmitPrefix = taskS3LogOmitPrefix; } + } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java b/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java index 5b4823e79e..b1abae7ba7 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java @@ -37,6 +37,8 @@ public class IndexView extends View { private final String taskS3LogOmitPrefix; + private final Integer warnIfScheduledJobIsRunningPastNextRunPct; + public IndexView(String singularityUriBase, String appRoot, SingularityConfiguration configuration) { super("index.mustache"); @@ -72,6 +74,8 @@ public IndexView(String singularityUriBase, String appRoot, SingularityConfigura this.commonHostnameSuffixToOmit = configuration.getCommonHostnameSuffixToOmit().or(""); this.taskS3LogOmitPrefix = configuration.getUiConfiguration().getTaskS3LogOmitPrefix(); + + this.warnIfScheduledJobIsRunningPastNextRunPct = configuration.getWarnIfScheduledJobIsRunningPastNextRunPct(); } public String getAppRoot() { @@ -150,6 +154,10 @@ public String getTaskS3LogOmitPrefix() { return taskS3LogOmitPrefix; } + public Integer getWarnIfScheduledJobIsRunningPastNextRunPct() { + return warnIfScheduledJobIsRunningPastNextRunPct; + } + @Override public String toString() { return "IndexView[" + @@ -171,6 +179,7 @@ public String toString() { ", runningTaskLogPath='" + runningTaskLogPath + '\'' + ", finishedTaskLogPath='" + finishedTaskLogPath + '\'' + ", commonHostnameSuffixToOmit='" + commonHostnameSuffixToOmit + '\'' + + ", warnIfScheduledJobIsRunningPastNextRunPct='" + warnIfScheduledJobIsRunningPastNextRunPct + '\'' + ']'; } } diff --git a/SingularityUI/app/assets/_index.mustache b/SingularityUI/app/assets/_index.mustache index f9ec667e42..a563c6aee4 100644 --- a/SingularityUI/app/assets/_index.mustache +++ b/SingularityUI/app/assets/_index.mustache @@ -36,6 +36,7 @@ commonHostnameSuffixToOmit: "{{{ commonHostnameSuffixToOmit }}}", taskS3LogOmitPrefix: "{{{ taskS3LogOmitPrefix }}}", slaveHttpPort: {{{slaveHttpPort}}}, + warnIfScheduledJobIsRunningPastNextRunPct: {{{warnIfScheduledJobIsRunningPastNextRunPct}}}, {{#slaveHttpsPort}} slaveHttpsPort: {{{slaveHttpsPort}}} {{/slaveHttpsPort}} diff --git a/SingularityUI/app/collections/Alerts.coffee b/SingularityUI/app/collections/Alerts.coffee new file mode 100644 index 0000000000..338d2f8be2 --- /dev/null +++ b/SingularityUI/app/collections/Alerts.coffee @@ -0,0 +1,8 @@ +Collection = require './collection' + +class Alerts extends Collection + + initialize: (models) => + + +module.exports = Alerts diff --git a/SingularityUI/app/controllers/TaskDetail.coffee b/SingularityUI/app/controllers/TaskDetail.coffee index d5998bb733..c7f55a3de1 100644 --- a/SingularityUI/app/controllers/TaskDetail.coffee +++ b/SingularityUI/app/controllers/TaskDetail.coffee @@ -7,6 +7,8 @@ TaskS3Logs = require '../collections/TaskS3Logs' TaskFiles = require '../collections/TaskFiles' TaskCleanups = require '../collections/TaskCleanups' Deploys = require '../collections/Deploys' +DeployDetails = require '../models/DeployDetails' +Alerts = require '../collections/Alerts' FileBrowserSubview = require '../views/fileBrowserSubview' ExpandableTableSubview = require '../views/expandableTableSubview' @@ -28,6 +30,7 @@ class TaskDetailController extends Controller info: require '../templates/taskDetail/taskInfo' environment: require '../templates/taskDetail/taskEnvironment' resourceUsage: require '../templates/taskDetail/taskResourceUsage' + alerts: require '../templates/alerts' initialize: ({@taskId, @filePath}) -> @title @taskId @@ -50,6 +53,8 @@ class TaskDetailController extends Controller @collections.pendingDeploys = new Deploys state: 'pending' + @collections.alerts = new Alerts + # # Subviews # @@ -97,6 +102,10 @@ class TaskDetailController extends Controller model: @models.resourceUsage template: @templates.resourceUsage + @subviews.alerts = new SimpleSubview + collection: @collections.alerts + template: @templates.alerts + # # Getting stuff in gear # @@ -125,6 +134,47 @@ class TaskDetailController extends Controller app.caughtError() delete @models.resourceUsage + getAlerts: => + alerts = [] + task = @models.task + requestId = @models.task.attributes.task.taskRequest.request.id + deployId = @models.task.attributes.task.taskRequest.deploy.id + + # Is this a scheduled task that has been running much longer than previous ones? + if task.attributes.task.taskRequest.request.requestType == 'SCHEDULED' and task.get('isStillRunning') + deployInfo = new DeployDetails + deployId: deployId + requestId: requestId + deployPromise = deployInfo.fetch() + deployPromise.done => + avg = deployInfo.get('deployStatistics')?.averageRuntimeMillis + current = new Date().getTime() - task.get('task').taskId.startedAt + threshold = window.config.warnIfScheduledJobIsRunningPastNextRunPct / 100 + # Alert if current uptime is longer than the average * the configurable percentage + if current > (avg * threshold) + alerts.push + title: 'Warning:', + message: "This scheduled task has been running longer than #{threshold} times the average for the request and may be stuck.", + level: 'warning' + # Was this task killed by a decommissioning slave? + if !task.get('isStillRunning') + updates = task.get('taskUpdates') + decomMessage = updates.filter (u) => + return u.statusMessage?.indexOf('DECOMISSIONING') != -1 and u.taskState == 'TASK_CLEANING' + killedMessage = updates.filter (u) => + return u.taskState == 'TASK_KILLED' + if decomMessage.length > 0 and killedMessage.length > 0 + alerts.push + title: 'Alert:', + message: 'This task was killed due to a slave decommissioning.', + level: 'danger' + + if deployPromise + deployPromise.done => + @collections.alerts.reset(alerts) + else + @collections.alerts.reset(alerts) + refresh: -> @resourcesFetched = false @@ -135,12 +185,13 @@ class TaskDetailController extends Controller @models.task.fetch() .done => @fetchResourceUsage() if @models.task.get('isStillRunning') + .success => + @getAlerts() .error => # If this 404s the task doesn't exist app.caughtError() app.router.notFound() - if @collections.s3Logs?.currentPage is 1 @collections.s3Logs.fetch().error => # It probably means S3 logs haven't been configured diff --git a/SingularityUI/app/templates/alerts.hbs b/SingularityUI/app/templates/alerts.hbs new file mode 100644 index 0000000000..f2f0b0289d --- /dev/null +++ b/SingularityUI/app/templates/alerts.hbs @@ -0,0 +1,5 @@ +{{#each data}} + +{{/each}} diff --git a/SingularityUI/app/templates/taskDetail/taskBase.hbs b/SingularityUI/app/templates/taskDetail/taskBase.hbs index ce354831ae..a52e436c73 100644 --- a/SingularityUI/app/templates/taskDetail/taskBase.hbs +++ b/SingularityUI/app/templates/taskDetail/taskBase.hbs @@ -4,39 +4,43 @@
+
+ +
+
- +
- +
- +
- +
- +
- +
- +
- +
- +
diff --git a/SingularityUI/app/views/task.coffee b/SingularityUI/app/views/task.coffee index cd5a5e8e70..313168282c 100644 --- a/SingularityUI/app/views/task.coffee +++ b/SingularityUI/app/views/task.coffee @@ -18,6 +18,7 @@ class TaskView extends View # Plop subview contents in there. It'll take care of everything itself @$('#overview').html @subviews.overview.$el + @$('#alerts').html @subviews.alerts.$el @$('#healthcheck-notification').html @subviews.healthcheckNotification.$el @$('#history').html @subviews.history.$el @$('#file-browser').html @subviews.fileBrowser.$el diff --git a/SingularityUI/config.coffee b/SingularityUI/config.coffee index 986da4ef06..537869624d 100644 --- a/SingularityUI/config.coffee +++ b/SingularityUI/config.coffee @@ -28,7 +28,7 @@ exports.config = # When running SingularityUI via brunch server we need to make an index.html for it # based on the template that's shared with SingularityService - # + # # After we compile the static files, compile index.html using some required configs onCompile: => destination = path.resolve @config.paths.public, 'index.html' @@ -54,6 +54,7 @@ exports.config = finishedTaskLogPath: process.env.SINGULARITY_FINISHED_TASK_LOG_PATH ? "stdout" commonHostnameSuffixToOmit: process.env.SINGULARITY_COMMON_HOSTNAME_SUFFIX_TO_OMIT ? "" taskS3LogOmitPrefix: process.env.SINGULARITY_TASK_S3_LOG_OMIT_PREFIX ? '' + warnIfScheduledJobIsRunningPastNextRunPct: process.env.SINGULARITY_WARN_IF_SCHEDULED_JOB_IS_RUNNING_PAST_NEXT_RUN_PCT ? 200 compiledTemplate = handlebars.compile(indexTemplate)(templateData) fs.writeFileSync destination, compiledTemplate