High: PE: Bug cl#5028 - Unmanaged services should block shutdown unle…

…ss in maintainence mode 'Unmanaged' in this context means failed with on-fail=block. Not doing this could lead to services being active in more than one location as the node is considered safely stopped. Resources that are configured to be unmanaged will not block shutdown /unless/ there is another resource that cannot shutdown until the unmanaged one has - since that is impossible.
ClusterLabs · Mar 7, 2012 · 8d2f237 · gao-yan · Sep 4, 2015 · beekhof
1 parent e898842
commit 8d2f237
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 11 deletions.
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
@@ -148,6 +148,7 @@ struct node_s {
 
 #  define pe_rsc_orphan		0x00000001ULL
 #  define pe_rsc_managed	0x00000002ULL
+#  define pe_rsc_block          0x00000004ULL /* Further operations are prohibited due to failure policy */
 
 #  define pe_rsc_notify		0x00000010ULL
 #  define pe_rsc_unique		0x00000020ULL
@@ -319,7 +320,7 @@ enum pe_ordering {
 
     pe_order_runnable_left         = 0x100,     /* 'then' requires 'first' to be runnable */
 
-    pe_order_restart               = 0x1000,  /* stop-start constraint */
+    pe_order_restart               = 0x1000,    /* 'then' is runnable if 'first' is optional or runnable */
     pe_order_stonith_stop          = 0x2000,     /* only applies if the action is non-pseudo */
     pe_order_serialize_only        = 0x4000,   /* serialize */
 

diff --git a/lib/pengine/native.c b/lib/pengine/native.c
@@ -77,6 +77,7 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
                 break;
             case recovery_block:
                 clear_bit(rsc->flags, pe_rsc_managed);
+                set_bit(rsc->flags, pe_rsc_block);
                 break;
         }
         crm_debug("%s is active on %d nodes including %s: %s",

diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
@@ -1286,6 +1286,7 @@ process_rsc_state(resource_t * rsc, node_t * node,
              * actions being sent for the resource
              */
             clear_bit(rsc->flags, pe_rsc_managed);
+            set_bit(rsc->flags, pe_rsc_block);
             break;
 
         case action_fail_migrate:

diff --git a/pengine/graph.c b/pengine/graph.c
@@ -437,22 +437,27 @@ shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * d
 
         if (action->rsc == NULL || action->node == NULL) {
             continue;
-        } else if(is_not_set(action->rsc->flags, pe_rsc_managed)) {
-            /* Ignore unmanaged resources
-             * However if someone depends on those unmanaged resources,
-             *  we will still end up blocking
-             */
-            continue;
         } else if(action->node->details != node->details) {
             continue;
+        } else if(is_set(data_set->flags, pe_flag_maintenance_mode)) {
+            crm_trace("Skipping %s: maintainence mode", action->uuid);
+            continue;
         } else if(safe_str_neq(action->task, RSC_STOP)) {
             continue;
+        } else if(is_not_set(action->rsc->flags, pe_rsc_managed)
+                  && is_not_set(action->rsc->flags, pe_rsc_block)) {
+            /*
+             * If another action depends on this one, we may still end up blocking
+             */
+            crm_trace("Skipping %s: unmanaged", action->uuid);
+            continue;
         }
 
         crm_trace("Ordering %s before shutdown on %s", action->uuid, node->details->uname);
+        clear_bit_inplace(action->flags, pe_action_optional);
         custom_action_order(action->rsc, NULL, action,
                             NULL, crm_strdup(CRM_OP_SHUTDOWN), shutdown_op,
-                            pe_order_optional, data_set);
+                            pe_order_optional|pe_order_runnable_left, data_set);
     }
 
     return TRUE;

diff --git a/pengine/group.c b/pengine/group.c
@@ -247,7 +247,7 @@ group_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
             child_rsc->restart_type = pe_restart_restart;
 
             order_start_start(last_rsc, child_rsc, start);
-            order_stop_stop(child_rsc, last_rsc, pe_order_optional);
+            order_stop_stop(child_rsc, last_rsc, pe_order_optional|pe_order_restart);
 
             if (top->variant == pe_master) {
                 new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set);

diff --git a/pengine/native.c b/pengine/native.c
@@ -1453,6 +1453,7 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working
                 }
                 if (g_list_length(rsc_lh->running_on) > 0) {
                     clear_bit(rsc_lh->flags, pe_rsc_managed);
+                    set_bit(rsc_lh->flags, pe_rsc_block);
                 }
                 break;
         }
@@ -1582,7 +1583,7 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_
     if (is_set(type, pe_order_restart)) {
         const char *reason = NULL;
 
-        CRM_ASSERT(then->rsc == first->rsc);
+        CRM_ASSERT(first->rsc->variant == pe_native);
         CRM_ASSERT(then->rsc->variant == pe_native);
 
         if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0) {
@@ -1598,6 +1599,12 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_
             crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid);
             clear_bit_inplace(first->flags, pe_action_optional);
         }
+
+        if (reason && is_not_set(first->flags, pe_action_optional)
+            && is_not_set(first->flags, pe_action_runnable)) {
+            crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid);
+            clear_bit_inplace(then->flags, pe_action_runnable);
+        }
     }
 
     if (then_flags != then->flags) {
@@ -1981,7 +1988,10 @@ StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * d
     for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
         node_t *current = (node_t *) gIter->data;
 
-        stop_action(rsc, current, optional);
+        action_t *stop = stop_action(rsc, current, optional);
+        if(is_not_set(rsc->flags, pe_rsc_managed)) {
+            update_action_flags(stop, pe_action_runnable|pe_action_clear);
+        }
 
         if (is_set(data_set->flags, pe_flag_remove_after_stop)) {
             DeleteRsc(rsc, current, optional, data_set);