diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java
index 78f88dd96e..1ad1f483ac 100644
--- a/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java
+++ b/SingularityService/src/main/java/com/hubspot/singularity/config/UIConfiguration.java
@@ -149,4 +149,5 @@ public String getTaskS3LogOmitPrefix() {
public void setTaskS3LogOmitPrefix(String taskS3LogOmitPrefix) {
this.taskS3LogOmitPrefix = taskS3LogOmitPrefix;
}
+
}
diff --git a/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java b/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java
index 5b4823e79e..b1abae7ba7 100644
--- a/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java
+++ b/SingularityService/src/main/java/com/hubspot/singularity/views/IndexView.java
@@ -37,6 +37,8 @@ public class IndexView extends View {
private final String taskS3LogOmitPrefix;
+ private final Integer warnIfScheduledJobIsRunningPastNextRunPct;
+
public IndexView(String singularityUriBase, String appRoot, SingularityConfiguration configuration) {
super("index.mustache");
@@ -72,6 +74,8 @@ public IndexView(String singularityUriBase, String appRoot, SingularityConfigura
this.commonHostnameSuffixToOmit = configuration.getCommonHostnameSuffixToOmit().or("");
this.taskS3LogOmitPrefix = configuration.getUiConfiguration().getTaskS3LogOmitPrefix();
+
+ this.warnIfScheduledJobIsRunningPastNextRunPct = configuration.getWarnIfScheduledJobIsRunningPastNextRunPct();
}
public String getAppRoot() {
@@ -150,6 +154,10 @@ public String getTaskS3LogOmitPrefix() {
return taskS3LogOmitPrefix;
}
+ public Integer getWarnIfScheduledJobIsRunningPastNextRunPct() {
+ return warnIfScheduledJobIsRunningPastNextRunPct;
+ }
+
@Override
public String toString() {
return "IndexView[" +
@@ -171,6 +179,7 @@ public String toString() {
", runningTaskLogPath='" + runningTaskLogPath + '\'' +
", finishedTaskLogPath='" + finishedTaskLogPath + '\'' +
", commonHostnameSuffixToOmit='" + commonHostnameSuffixToOmit + '\'' +
+ ", warnIfScheduledJobIsRunningPastNextRunPct='" + warnIfScheduledJobIsRunningPastNextRunPct + '\'' +
']';
}
}
diff --git a/SingularityUI/app/assets/_index.mustache b/SingularityUI/app/assets/_index.mustache
index f9ec667e42..a563c6aee4 100644
--- a/SingularityUI/app/assets/_index.mustache
+++ b/SingularityUI/app/assets/_index.mustache
@@ -36,6 +36,7 @@
commonHostnameSuffixToOmit: "{{{ commonHostnameSuffixToOmit }}}",
taskS3LogOmitPrefix: "{{{ taskS3LogOmitPrefix }}}",
slaveHttpPort: {{{slaveHttpPort}}},
+ warnIfScheduledJobIsRunningPastNextRunPct: {{{warnIfScheduledJobIsRunningPastNextRunPct}}},
{{#slaveHttpsPort}}
slaveHttpsPort: {{{slaveHttpsPort}}}
{{/slaveHttpsPort}}
diff --git a/SingularityUI/app/collections/Alerts.coffee b/SingularityUI/app/collections/Alerts.coffee
new file mode 100644
index 0000000000..338d2f8be2
--- /dev/null
+++ b/SingularityUI/app/collections/Alerts.coffee
@@ -0,0 +1,8 @@
+Collection = require './collection'
+
+class Alerts extends Collection
+
+ initialize: (models) =>
+
+
+module.exports = Alerts
diff --git a/SingularityUI/app/controllers/TaskDetail.coffee b/SingularityUI/app/controllers/TaskDetail.coffee
index d5998bb733..c7f55a3de1 100644
--- a/SingularityUI/app/controllers/TaskDetail.coffee
+++ b/SingularityUI/app/controllers/TaskDetail.coffee
@@ -7,6 +7,8 @@ TaskS3Logs = require '../collections/TaskS3Logs'
TaskFiles = require '../collections/TaskFiles'
TaskCleanups = require '../collections/TaskCleanups'
Deploys = require '../collections/Deploys'
+DeployDetails = require '../models/DeployDetails'
+Alerts = require '../collections/Alerts'
FileBrowserSubview = require '../views/fileBrowserSubview'
ExpandableTableSubview = require '../views/expandableTableSubview'
@@ -28,6 +30,7 @@ class TaskDetailController extends Controller
info: require '../templates/taskDetail/taskInfo'
environment: require '../templates/taskDetail/taskEnvironment'
resourceUsage: require '../templates/taskDetail/taskResourceUsage'
+ alerts: require '../templates/alerts'
initialize: ({@taskId, @filePath}) ->
@title @taskId
@@ -50,6 +53,8 @@ class TaskDetailController extends Controller
@collections.pendingDeploys = new Deploys state: 'pending'
+ @collections.alerts = new Alerts
+
#
# Subviews
#
@@ -97,6 +102,10 @@ class TaskDetailController extends Controller
model: @models.resourceUsage
template: @templates.resourceUsage
+ @subviews.alerts = new SimpleSubview
+ collection: @collections.alerts
+ template: @templates.alerts
+
#
# Getting stuff in gear
#
@@ -125,6 +134,47 @@ class TaskDetailController extends Controller
app.caughtError()
delete @models.resourceUsage
+ getAlerts: =>
+ alerts = []
+ task = @models.task
+ requestId = @models.task.attributes.task.taskRequest.request.id
+ deployId = @models.task.attributes.task.taskRequest.deploy.id
+
+ # Is this a scheduled task that has been running much longer than previous ones?
+ if task.attributes.task.taskRequest.request.requestType == 'SCHEDULED' and task.get('isStillRunning')
+ deployInfo = new DeployDetails
+ deployId: deployId
+ requestId: requestId
+ deployPromise = deployInfo.fetch()
+ deployPromise.done =>
+ avg = deployInfo.get('deployStatistics')?.averageRuntimeMillis
+ current = new Date().getTime() - task.get('task').taskId.startedAt
+ threshold = window.config.warnIfScheduledJobIsRunningPastNextRunPct / 100
+ # Alert if current uptime is longer than the average * the configurable percentage
+ if current > (avg * threshold)
+ alerts.push
+ title: 'Warning:',
+ message: "This scheduled task has been running longer than #{threshold}
times the average for the request and may be stuck.",
+ level: 'warning'
+ # Was this task killed by a decommissioning slave?
+ if !task.get('isStillRunning')
+ updates = task.get('taskUpdates')
+ decomMessage = updates.filter (u) =>
+ return u.statusMessage?.indexOf('DECOMISSIONING') != -1 and u.taskState == 'TASK_CLEANING'
+ killedMessage = updates.filter (u) =>
+ return u.taskState == 'TASK_KILLED'
+ if decomMessage.length > 0 and killedMessage.length > 0
+ alerts.push
+ title: 'Alert:',
+ message: 'This task was killed due to a slave decommissioning.',
+ level: 'danger'
+
+ if deployPromise
+ deployPromise.done =>
+ @collections.alerts.reset(alerts)
+ else
+ @collections.alerts.reset(alerts)
+
refresh: ->
@resourcesFetched = false
@@ -135,12 +185,13 @@ class TaskDetailController extends Controller
@models.task.fetch()
.done =>
@fetchResourceUsage() if @models.task.get('isStillRunning')
+ .success =>
+ @getAlerts()
.error =>
# If this 404s the task doesn't exist
app.caughtError()
app.router.notFound()
-
if @collections.s3Logs?.currentPage is 1
@collections.s3Logs.fetch().error =>
# It probably means S3 logs haven't been configured
diff --git a/SingularityUI/app/templates/alerts.hbs b/SingularityUI/app/templates/alerts.hbs
new file mode 100644
index 0000000000..f2f0b0289d
--- /dev/null
+++ b/SingularityUI/app/templates/alerts.hbs
@@ -0,0 +1,5 @@
+{{#each data}}
+