Skip to content

Commit

Permalink
when cancelling a job after multiple internal errors, enclose the err…
Browse files Browse the repository at this point in the history
…or message in the report for the frontend
  • Loading branch information
janbuchar committed Oct 5, 2017
1 parent c2c6fdf commit 2b21d0f
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
13 changes: 7 additions & 6 deletions src/handlers/broker_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ void broker_handler::process_worker_done(
if (!failed_request->data.is_complete()) {
status_notifier.rejected_job(
failed_request->data.get_job_id(), "Job failed with '" + message.at(3) + "' and cannot be reassigned");
} else if (check_failure_count(failed_request, status_notifier, respond)) {
} else if (check_failure_count(failed_request, status_notifier, respond, message.at(3))) {
reassign_request(failed_request, respond);
} else {
auto new_request = queue_->assign_request(worker);
Expand Down Expand Up @@ -344,6 +344,7 @@ void broker_handler::process_timer(const message_container &message, handler_int
}

reactor_status_notifier status_notifier(respond, broker_connect::KEY_STATUS_NOTIFIER);
const static std::string failure_msg = "Worker timed out and its job cannot be reassigned";

for (auto worker : to_remove) {
logger_->info("Worker {} expired", worker->get_description());
Expand All @@ -355,13 +356,12 @@ void broker_handler::process_timer(const message_container &message, handler_int

for (auto request : *requests) {
if (!request->data.is_complete()) {
status_notifier.rejected_job(
request->data.get_job_id(), "Worker timed out and its job cannot be reassigned");
status_notifier.rejected_job(request->data.get_job_id(), failure_msg);
notify_monitor(request, "FAILED", respond);
continue;
}

if (!check_failure_count(request, status_notifier, respond)) {
if (!check_failure_count(request, status_notifier, respond, failure_msg)) {
continue;
}

Expand Down Expand Up @@ -413,11 +413,12 @@ void broker_handler::send_request(worker_registry::worker_ptr worker, request_pt
}

bool broker_handler::check_failure_count(worker::request_ptr request, status_notifier_interface &status_notifier,
response_cb respond)
response_cb respond, const std::string &failure_msg)
{
if (request->failure_count >= config_->get_max_request_failures()) {
status_notifier.job_failed(request->data.get_job_id(),
"Job was reassigned too many (" + std::to_string(request->failure_count - 1) + ") times");
"Job was reassigned too many (" + std::to_string(request->failure_count - 1) + ") times. Last"
" failure message was: " + failure_msg);
notify_monitor(request, "FAILED", respond);
return false;
}
Expand Down
4 changes: 3 additions & 1 deletion src/handlers/broker_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,12 @@ class broker_handler : public handler_interface
* Check if a request can be reassigned one more time and notify the frontend if not.
* @param request the request to be checked
* @param status_notifier used to notify the frontend when the request doesn't pass the check
* @param respond a callback to notify the monitor in case of failure
* @param failure_msg a message describing the failure of the last request
* @return true if the request can be reassigned, false otherwise
*/
bool check_failure_count(worker::request_ptr request, status_notifier_interface &status_notifier,
response_cb respond);
response_cb respond, const std::string &failure_msg);

/**
* Notify the monitor about an error that might not have been reported by a worker
Expand Down
3 changes: 2 additions & 1 deletion tests/broker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ TEST(broker, worker_expiration_cancel_job)
"status",
"FAILED",
"message",
"Job was reassigned too many (1) times"}),
"Job was reassigned too many (1) times. Last failure message was: Worker timed out "
"and its job cannot be reassigned"}),
message_container(broker_connect::KEY_MONITOR, broker_connect::MONITOR_IDENTITY, {
"job_id",
"FAILED"
Expand Down

0 comments on commit 2b21d0f

Please sign in to comment.