From a109cee3b48f4b6ebccf9f51989973e206af41a9 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Thu, 2 Jul 2015 16:32:37 -0400 Subject: [PATCH] High: pengine: clear failures after reconnect_interval expires --- lib/pengine/unpack.c | 8 ++++++-- pengine/allocate.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 73c44a82dfa..106c6741887 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2834,8 +2834,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && remote_node->details->remote_was_fenced == 0) { - - crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); + if (strstr(ID(xml_op), "last_failure")) { + crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); + } /* disabling failure timeout for this operation because we believe * fencing of the remote node should occur first. */ failure_timeout = 0; @@ -2866,6 +2867,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod } else { expired = FALSE; } + } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { + /* always clear last failure when reconnect interval is set */ + clear_failcount = 1; } } diff --git a/pengine/allocate.c b/pengine/allocate.c index 4b6fca1c991..68cafd47cdd 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1681,10 +1681,38 @@ apply_remote_node_ordering(pe_working_set_t *data_set) resource_t *remote_rsc = NULL; resource_t *container = NULL; + if (action->rsc == NULL) { + continue; + } + + /* Special case. */ + if (action->rsc && + action->rsc->is_remote_node && + safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { + + /* if we are clearing the failcount of an actual remote node connect + * resource, then make sure this happens before allowing the connection + * to start if we are planning on starting the connection during this + * transition */ + custom_action_order(action->rsc, + NULL, + action, + action->rsc, + generate_op_key(action->rsc->id, RSC_START, 0), + NULL, + pe_order_optional, + data_set); + + continue; + } + + /* detect if the action occurs on a remote node. if so create + * ordering constraints that guarantee the action occurs while + * the remote node is active (after start, before stop...) things + * like that */ if (action->node == NULL || is_remote_node(action->node) == FALSE || action->node->details->remote_rsc == NULL || - action->rsc == NULL || is_set(action->flags, pe_action_pseudo)) { continue; }