Skip to content

Commit

Permalink
Merge pull request #1263 from kgaillot/fixes
Browse files Browse the repository at this point in the history
Better stonith failure handling in crmd, plus a few unrelated fixes
  • Loading branch information
kgaillot committed Apr 18, 2017
2 parents 2124e04 + 0c15dfd commit 0ea7a77
Show file tree
Hide file tree
Showing 21 changed files with 528 additions and 100 deletions.
5 changes: 4 additions & 1 deletion crmd/crmd_utils.h
Expand Up @@ -19,6 +19,7 @@
# define CRMD_UTILS__H

# include <crm/crm.h>
# include <crm/transition.h>
# include <crm/common/xml.h>
# include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
# include "crmd_alerts.h"
Expand Down Expand Up @@ -100,8 +101,10 @@ int crmd_join_phase_count(enum crm_join_phase phase);
void crmd_join_phase_log(int level);

const char *get_timer_desc(fsa_timer_t * timer);
gboolean too_many_st_failures(void);
void st_fail_count_reset(const char * target);
void st_fail_count_increment(const char *target);
void abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason);
void crmd_peer_down(crm_node_t *peer, bool full);

/* Convenience macro for registering a CIB callback
Expand Down
24 changes: 24 additions & 0 deletions crmd/join_client.c
Expand Up @@ -30,6 +30,26 @@ void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, v

extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);

/*!
* \internal
* \brief Remember if DC is shutting down as we join
*
* If we're joining while the current DC is shutting down, update its expected
* state, so we don't fence it if we become the new DC. (We weren't a peer
* when it broadcast its shutdown request.)
*
* \param[in] msg A join message from the DC
*/
static void
update_dc_expected(xmlNode *msg)
{
if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);

crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
}
}

/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
Expand Down Expand Up @@ -128,6 +148,8 @@ do_cl_join_offer_respond(long long action,
return;
}

update_dc_expected(input->msg);

CRM_LOG_ASSERT(input != NULL);
query_call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
Expand Down Expand Up @@ -250,6 +272,8 @@ do_cl_join_finalize_respond(long long action,
return;
}

update_dc_expected(input->msg);

/* send our status section to the DC */
tmp1 = do_lrm_query(TRUE, fsa_our_uname);
if (tmp1 != NULL) {
Expand Down
38 changes: 30 additions & 8 deletions crmd/join_dc.c
Expand Up @@ -106,6 +106,30 @@ initialize_join(gboolean before)
}
}

/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
CRM_SYSTEM_DC, NULL);

/* Identify which election this is a part of */
crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);

/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
is_set(fsa_input_register, R_SHUTDOWN));
return msg;
}

static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
Expand Down Expand Up @@ -147,10 +171,8 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)

crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);

offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);

crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);

Expand Down Expand Up @@ -242,8 +264,10 @@ do_dc_join_offer_one(long long action,
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
if (strcmp(join_to, fsa_our_uname) != 0) {
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
}

/* this was a genuine join request, cancel any existing
* transition and invoke the PE
Expand Down Expand Up @@ -586,9 +610,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
}

/* send the ack/nack to the node */
acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);

crm_debug("join-%d: ACK'ing join request from %s",
current_join_id, join_to);
Expand Down
6 changes: 6 additions & 0 deletions crmd/messages.c
Expand Up @@ -870,6 +870,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)

} else {
reap_crm_member(id, name);

/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}

} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
Expand Down
12 changes: 4 additions & 8 deletions crmd/te_actions.c
Expand Up @@ -726,15 +726,11 @@ notify_crmd(crm_graph_t * graph)
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (too_many_st_failures() == FALSE) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
event = I_PE_CALC;
}
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
event = I_TE_SUCCESS;
event = I_PE_CALC;
}

} else if (fsa_state == S_POLICY_ENGINE) {
Expand Down
96 changes: 79 additions & 17 deletions crmd/te_callbacks.c
Expand Up @@ -635,8 +635,8 @@ struct st_fail_rec {
int count;
};

gboolean
too_many_st_failures(void)
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
Expand All @@ -646,32 +646,63 @@ too_many_st_failures(void)
return FALSE;
}

g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
if (value->count > stonith_max_attempts ) {
crm_warn("Too many failures to fence %s (%d), giving up", key, value->count);
return TRUE;
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;

too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}

/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
struct st_fail_rec *rec = NULL;

if (stonith_failures) {
rec = g_hash_table_lookup(stonith_failures, target);
if (stonith_failures == NULL) {
return;
}

if (rec) {
rec->count = 0;
if (target) {
struct st_fail_rec *rec = NULL;

rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;

g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}

static void
st_fail_count_increment(const char *target, int rc)
void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;

Expand All @@ -694,6 +725,27 @@ st_fail_count_increment(const char *target, int rc)
}
}

/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}

void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
Expand Down Expand Up @@ -755,12 +807,22 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)

} else {
const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
enum transition_action abort_action = tg_restart;

action->failed = TRUE;
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
call_id, target, pcmk_strerror(rc));
abort_transition(INFINITY, tg_restart, "Stonith failed", NULL);
st_fail_count_increment(target, rc);

/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (rc == -ENODEV) {
crm_warn("No devices found in cluster to fence %s, giving up",
target);
abort_action = tg_stop;
}

abort_for_stonith_failure(abort_action, target, NULL);
}

update_graph(transition_graph, action);
Expand Down
11 changes: 7 additions & 4 deletions crmd/te_utils.c
Expand Up @@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t * graph)

if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}

Expand Down Expand Up @@ -259,9 +259,12 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
return;
}

if (st_event->result == pcmk_ok &&
safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
st_fail_count_reset(st_event->target);
if (safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
if (st_event->result == pcmk_ok) {
st_fail_count_reset(st_event->target);
} else {
st_fail_count_increment(st_event->target);
}
}

crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
Expand Down
4 changes: 2 additions & 2 deletions doc/Pacemaker_Explained/en-US/Ch-Options.txt
Expand Up @@ -230,8 +230,8 @@ How long to wait for STONITH actions (reboot, on, off) to complete
| stonith-max-attempts | 10 |
indexterm:[stonith-max-attempts,Cluster Option]
indexterm:[Cluster,Option,stonith-max-attempts]
How many times stonith can fail before it will no longer be attempted on a target.
Positive non-zero values are allowed. '(since 1.1.17)'
How many times fencing can fail for a target before the cluster will no longer
immediately re-attempt it. '(since 1.1.17)'

| concurrent-fencing | FALSE |
indexterm:[concurrent-fencing,Cluster Option]
Expand Down

0 comments on commit 0ea7a77

Please sign in to comment.