Permalink
Browse files

Merge pull request #1263 from kgaillot/fixes

Better stonith failure handling in crmd, plus a few unrelated fixes
2 parents 2124e04 + 0c15dfd commit 0ea7a77e12094f7410870a96be5c635f8eb272cf @kgaillot kgaillot committed on GitHub Apr 18, 2017
View
@@ -19,6 +19,7 @@
# define CRMD_UTILS__H
# include <crm/crm.h>
+# include <crm/transition.h>
# include <crm/common/xml.h>
# include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
# include "crmd_alerts.h"
@@ -100,8 +101,10 @@ int crmd_join_phase_count(enum crm_join_phase phase);
void crmd_join_phase_log(int level);
const char *get_timer_desc(fsa_timer_t * timer);
-gboolean too_many_st_failures(void);
void st_fail_count_reset(const char * target);
+void st_fail_count_increment(const char *target);
+void abort_for_stonith_failure(enum transition_action abort_action,
+ const char *target, xmlNode *reason);
void crmd_peer_down(crm_node_t *peer, bool full);
/* Convenience macro for registering a CIB callback
View
@@ -30,6 +30,26 @@ void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, v
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
+/*!
+ * \internal
+ * \brief Remember if DC is shutting down as we join
+ *
+ * If we're joining while the current DC is shutting down, update its expected
+ * state, so we don't fence it if we become the new DC. (We weren't a peer
+ * when it broadcast its shutdown request.)
+ *
+ * \param[in] msg A join message from the DC
+ */
+static void
+update_dc_expected(xmlNode *msg)
+{
+ if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
+ crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
+
+ crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
+ }
+}
+
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
@@ -128,6 +148,8 @@ do_cl_join_offer_respond(long long action,
return;
}
+ update_dc_expected(input->msg);
+
CRM_LOG_ASSERT(input != NULL);
query_call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
@@ -250,6 +272,8 @@ do_cl_join_finalize_respond(long long action,
return;
}
+ update_dc_expected(input->msg);
+
/* send our status section to the DC */
tmp1 = do_lrm_query(TRUE, fsa_our_uname);
if (tmp1 != NULL) {
View
@@ -106,6 +106,30 @@ initialize_join(gboolean before)
}
}
+/*!
+ * \internal
+ * \brief Create a join message from the DC
+ *
+ * \param[in] join_op Join operation name
+ * \param[in] host_to Recipient of message
+ */
+static xmlNode *
+create_dc_message(const char *join_op, const char *host_to)
+{
+ xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_DC, NULL);
+
+ /* Identify which election this is a part of */
+ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
+
+ /* Add a field specifying whether the DC is shutting down. This keeps the
+ * joining node from fencing the old DC if it becomes the new DC.
+ */
+ crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
+ is_set(fsa_input_register, R_SHUTDOWN));
+ return msg;
+}
+
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
@@ -147,10 +171,8 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);
- offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
- CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
+ offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
- crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
@@ -242,8 +264,10 @@ do_dc_join_offer_one(long long action,
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
- member = crm_get_peer(0, fsa_our_uname);
- join_make_offer(NULL, member, NULL);
+ if (strcmp(join_to, fsa_our_uname) != 0) {
+ member = crm_get_peer(0, fsa_our_uname);
+ join_make_offer(NULL, member, NULL);
+ }
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
@@ -586,9 +610,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
}
/* send the ack/nack to the node */
- acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
- CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
- crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
crm_debug("join-%d: ACK'ing join request from %s",
current_join_id, join_to);
View
@@ -870,6 +870,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
} else {
reap_crm_member(id, name);
+
+ /* If we're forgetting this node, also forget any failures to fence
+ * it, so we don't carry that over to any node added later with the
+ * same name.
+ */
+ st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
View
@@ -726,15 +726,11 @@ notify_crmd(crm_graph_t * graph)
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
- if (too_many_st_failures() == FALSE) {
- if (transition_timer->period_ms > 0) {
- crm_timer_stop(transition_timer);
- crm_timer_start(transition_timer);
- } else {
- event = I_PE_CALC;
- }
+ if (transition_timer->period_ms > 0) {
+ crm_timer_stop(transition_timer);
+ crm_timer_start(transition_timer);
} else {
- event = I_TE_SUCCESS;
+ event = I_PE_CALC;
}
} else if (fsa_state == S_POLICY_ENGINE) {
View
@@ -635,8 +635,8 @@ struct st_fail_rec {
int count;
};
-gboolean
-too_many_st_failures(void)
+static gboolean
+too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
@@ -646,32 +646,63 @@ too_many_st_failures(void)
return FALSE;
}
- g_hash_table_iter_init(&iter, stonith_failures);
- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
- if (value->count > stonith_max_attempts ) {
- crm_warn("Too many failures to fence %s (%d), giving up", key, value->count);
- return TRUE;
+ if (target == NULL) {
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ if (value->count >= stonith_max_attempts) {
+ target = (const char*)key;
+ goto too_many;
+ }
+ }
+ } else {
+ value = g_hash_table_lookup(stonith_failures, target);
+ if ((value != NULL) && (value->count >= stonith_max_attempts)) {
+ goto too_many;
}
}
return FALSE;
+
+too_many:
+ crm_warn("Too many failures (%d) to fence %s, giving up",
+ value->count, target);
+ return TRUE;
}
+/*!
+ * \internal
+ * \brief Reset a stonith fail count
+ *
+ * \param[in] target Name of node to reset, or NULL for all
+ */
void
st_fail_count_reset(const char *target)
{
- struct st_fail_rec *rec = NULL;
-
- if (stonith_failures) {
- rec = g_hash_table_lookup(stonith_failures, target);
+ if (stonith_failures == NULL) {
+ return;
}
- if (rec) {
- rec->count = 0;
+ if (target) {
+ struct st_fail_rec *rec = NULL;
+
+ rec = g_hash_table_lookup(stonith_failures, target);
+ if (rec) {
+ rec->count = 0;
+ }
+ } else {
+ GHashTableIter iter;
+ const char *key = NULL;
+ struct st_fail_rec *rec = NULL;
+
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &rec)) {
+ rec->count = 0;
+ }
}
}
-static void
-st_fail_count_increment(const char *target, int rc)
+void
+st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
@@ -694,6 +725,27 @@ st_fail_count_increment(const char *target, int rc)
}
}
+/*!
+ * \internal
+ * \brief Abort transition due to stonith failure
+ *
+ * \param[in] abort_action Whether to restart or stop transition
+ * \param[in] target Don't restart if this (NULL for any) has too many failures
+ * \param[in] reason Log this stonith action XML as abort reason (or NULL)
+ */
+void
+abort_for_stonith_failure(enum transition_action abort_action,
+ const char *target, xmlNode *reason)
+{
+ /* If stonith repeatedly fails, we eventually give up on starting a new
+ * transition for that reason.
+ */
+ if ((abort_action != tg_stop) && too_many_st_failures(target)) {
+ abort_action = tg_stop;
+ }
+ abort_transition(INFINITY, abort_action, "Stonith failed", reason);
+}
+
void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
@@ -755,12 +807,22 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
} else {
const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
+ enum transition_action abort_action = tg_restart;
action->failed = TRUE;
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
call_id, target, pcmk_strerror(rc));
- abort_transition(INFINITY, tg_restart, "Stonith failed", NULL);
- st_fail_count_increment(target, rc);
+
+ /* If no fence devices were available, there's no use in immediately
+ * checking again, so don't start a new transition in that case.
+ */
+ if (rc == -ENODEV) {
+ crm_warn("No devices found in cluster to fence %s, giving up",
+ target);
+ abort_action = tg_stop;
+ }
+
+ abort_for_stonith_failure(abort_action, target, NULL);
}
update_graph(transition_graph, action);
View
@@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t * graph)
if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
- abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
+ abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
@@ -259,9 +259,12 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
return;
}
- if (st_event->result == pcmk_ok &&
- safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
- st_fail_count_reset(st_event->target);
+ if (safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
+ if (st_event->result == pcmk_ok) {
+ st_fail_count_reset(st_event->target);
+ } else {
+ st_fail_count_increment(st_event->target);
+ }
}
crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
@@ -230,8 +230,8 @@ How long to wait for STONITH actions (reboot, on, off) to complete
| stonith-max-attempts | 10 |
indexterm:[stonith-max-attempts,Cluster Option]
indexterm:[Cluster,Option,stonith-max-attempts]
-How many times stonith can fail before it will no longer be attempted on a target.
-Positive non-zero values are allowed. '(since 1.1.17)'
+How many times fencing can fail for a target before the cluster will no longer
+immediately re-attempt it. '(since 1.1.17)'
| concurrent-fencing | FALSE |
indexterm:[concurrent-fencing,Cluster Option]
Oops, something went wrong.

0 comments on commit 0ea7a77

Please sign in to comment.