Skip to content

Commit

Permalink
Merge pull request #752 from davidvossel/pcmk-remote-failcount
Browse files Browse the repository at this point in the history
High: pengine: clear failures after reconnect_interval expires
  • Loading branch information
davidvossel committed Jul 9, 2015
2 parents d81f5f1 + a109cee commit ef86b49
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
8 changes: 6 additions & 2 deletions lib/pengine/unpack.c
Expand Up @@ -2834,8 +2834,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod

node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node && remote_node->details->remote_was_fenced == 0) {

crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id);
if (strstr(ID(xml_op), "last_failure")) {
crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id);
}
/* disabling failure timeout for this operation because we believe
* fencing of the remote node should occur first. */
failure_timeout = 0;
Expand Down Expand Up @@ -2866,6 +2867,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod
} else {
expired = FALSE;
}
} else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) {
/* always clear last failure when reconnect interval is set */
clear_failcount = 1;
}
}

Expand Down
30 changes: 29 additions & 1 deletion pengine/allocate.c
Expand Up @@ -1681,10 +1681,38 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
resource_t *remote_rsc = NULL;
resource_t *container = NULL;

if (action->rsc == NULL) {
continue;
}

/* Special case. */
if (action->rsc &&
action->rsc->is_remote_node &&
safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {

/* if we are clearing the failcount of an actual remote node connect
* resource, then make sure this happens before allowing the connection
* to start if we are planning on starting the connection during this
* transition */
custom_action_order(action->rsc,
NULL,
action,
action->rsc,
generate_op_key(action->rsc->id, RSC_START, 0),
NULL,
pe_order_optional,
data_set);

continue;
}

/* detect if the action occurs on a remote node. if so create
* ordering constraints that guarantee the action occurs while
* the remote node is active (after start, before stop...) things
* like that */
if (action->node == NULL ||
is_remote_node(action->node) == FALSE ||
action->node->details->remote_rsc == NULL ||
action->rsc == NULL ||
is_set(action->flags, pe_action_pseudo)) {
continue;
}
Expand Down

0 comments on commit ef86b49

Please sign in to comment.