From 8d2f237dc5f900381adf62a6e949ec71d1ee54e5 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 7 Mar 2012 11:22:00 +1100 Subject: [PATCH] High: PE: Bug cl#5028 - Unmanaged services should block shutdown unless in maintainence mode 'Unmanaged' in this context means failed with on-fail=block. Not doing this could lead to services being active in more than one location as the node is considered safely stopped. Resources that are configured to be unmanaged will not block shutdown /unless/ there is another resource that cannot shutdown until the unmanaged one has - since that is impossible. --- include/crm/pengine/status.h | 3 ++- lib/pengine/native.c | 1 + lib/pengine/unpack.c | 1 + pengine/graph.c | 19 ++++++++++++------- pengine/group.c | 2 +- pengine/native.c | 14 ++++++++++++-- 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 71ee6e41051..debc770a642 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -148,6 +148,7 @@ struct node_s { # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL +# define pe_rsc_block 0x00000004ULL /* Further operations are prohibited due to failure policy */ # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL @@ -319,7 +320,7 @@ enum pe_ordering { pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ - pe_order_restart = 0x1000, /* stop-start constraint */ + pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 611e6b875a0..8c3bb53e501 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -77,6 +77,7 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) break; case recovery_block: clear_bit(rsc->flags, pe_rsc_managed); + set_bit(rsc->flags, pe_rsc_block); break; } crm_debug("%s is active on %d nodes including %s: %s", diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 8b2fad05196..d3de08c8796 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1286,6 +1286,7 @@ process_rsc_state(resource_t * rsc, node_t * node, * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); + set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: diff --git a/pengine/graph.c b/pengine/graph.c index e81dad686c2..441b7e284b7 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -437,22 +437,27 @@ shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * d if (action->rsc == NULL || action->node == NULL) { continue; - } else if(is_not_set(action->rsc->flags, pe_rsc_managed)) { - /* Ignore unmanaged resources - * However if someone depends on those unmanaged resources, - * we will still end up blocking - */ - continue; } else if(action->node->details != node->details) { continue; + } else if(is_set(data_set->flags, pe_flag_maintenance_mode)) { + crm_trace("Skipping %s: maintainence mode", action->uuid); + continue; } else if(safe_str_neq(action->task, RSC_STOP)) { continue; + } else if(is_not_set(action->rsc->flags, pe_rsc_managed) + && is_not_set(action->rsc->flags, pe_rsc_block)) { + /* + * If another action depends on this one, we may still end up blocking + */ + crm_trace("Skipping %s: unmanaged", action->uuid); + continue; } crm_trace("Ordering %s before shutdown on %s", action->uuid, node->details->uname); + clear_bit_inplace(action->flags, pe_action_optional); custom_action_order(action->rsc, NULL, action, NULL, crm_strdup(CRM_OP_SHUTDOWN), shutdown_op, - pe_order_optional, data_set); + pe_order_optional|pe_order_runnable_left, data_set); } return TRUE; diff --git a/pengine/group.c b/pengine/group.c index 7b37b9f1089..1238b5d8124 100644 --- a/pengine/group.c +++ b/pengine/group.c @@ -247,7 +247,7 @@ group_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) child_rsc->restart_type = pe_restart_restart; order_start_start(last_rsc, child_rsc, start); - order_stop_stop(child_rsc, last_rsc, pe_order_optional); + order_stop_stop(child_rsc, last_rsc, pe_order_optional|pe_order_restart); if (top->variant == pe_master) { new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set); diff --git a/pengine/native.c b/pengine/native.c index d17f65b91ec..3ea615a8112 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1453,6 +1453,7 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); + set_bit(rsc_lh->flags, pe_rsc_block); } break; } @@ -1582,7 +1583,7 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ if (is_set(type, pe_order_restart)) { const char *reason = NULL; - CRM_ASSERT(then->rsc == first->rsc); + CRM_ASSERT(first->rsc->variant == pe_native); CRM_ASSERT(then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0) { @@ -1598,6 +1599,12 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid); clear_bit_inplace(first->flags, pe_action_optional); } + + if (reason && is_not_set(first->flags, pe_action_optional) + && is_not_set(first->flags, pe_action_runnable)) { + crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid); + clear_bit_inplace(then->flags, pe_action_runnable); + } } if (then_flags != then->flags) { @@ -1981,7 +1988,10 @@ StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * d for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; - stop_action(rsc, current, optional); + action_t *stop = stop_action(rsc, current, optional); + if(is_not_set(rsc->flags, pe_rsc_managed)) { + update_action_flags(stop, pe_action_runnable|pe_action_clear); + } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set);