diff --git a/attrd/commands.c b/attrd/commands.c index c6586c735d9..9e656d862cc 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -765,10 +765,15 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da { if ((kind == crm_status_nstate) || (kind == crm_status_rstate)) { if (safe_str_eq(peer->state, CRM_NODE_MEMBER)) { - if ((election_state(writer) == election_won)) { + /* If we're the writer, send new peers a list of all attributes + * (unless it's a remote node, which doesn't run its own attrd) + */ + if ((election_state(writer) == election_won) + && !is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { + /* Remove all attribute values associated with lost nodes */ attrd_peer_remove(peer->id, peer->uname, FALSE, __FUNCTION__); if (peer_writer && safe_str_eq(peer->uname, peer_writer)) { free(peer_writer); diff --git a/crmd/callbacks.c b/crmd/callbacks.c index 38fb30b9553..b780c43c59e 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -109,7 +109,14 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d bool appeared = FALSE; const char *status = NULL; - set_bit(fsa_input_register, R_PEER_DATA); + /* Crmd waits to receive some information from the membership layer before + * declaring itself operational. If this is being called for a cluster node, + * indicate that we have it. + */ + if (!is_set(node->flags, crm_remote_node)) { + set_bit(fsa_input_register, R_PEER_DATA); + } + if (node->uname == NULL) { return; } @@ -132,20 +139,9 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d return; } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) { - GListPtr gIter = stonith_cleanup_list; - appeared = TRUE; - - while (gIter != NULL) { - GListPtr tmp = gIter; - char *target = tmp->data; - - gIter = gIter->next; - if(safe_str_eq(node->uname, target)) { - crm_trace("Removing %s from the cleanup list", target); - stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); - free(target); - } + if (!is_set(node->flags, crm_remote_node)) { + remove_stonith_cleanup(node->uname); } } diff --git a/crmd/cib.c b/crmd/cib.c index 41e9efbfcfd..4502ebb162c 100644 --- a/crmd/cib.c +++ b/crmd/cib.c @@ -136,8 +136,7 @@ do_cib_replaced(const char *event, xmlNode * msg) } /* start the join process again so we get everyone's LRM status */ - populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | node_update_join - | node_update_expected, __FUNCTION__); + populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h index 78432dff52c..826ab1cf32b 100644 --- a/crmd/crmd_lrm.h +++ b/crmd/crmd_lrm.h @@ -73,8 +73,6 @@ struct pending_deletion_op_s { ha_msg_input_t *input; }; -xmlNode *do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace); - /*! * \brief Is this the local ipc connection to the lrmd */ diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h index 7e8c3e6b13d..9ef7a71e5b0 100644 --- a/crmd/crmd_utils.h +++ b/crmd/crmd_utils.h @@ -67,6 +67,7 @@ enum node_update_flags { node_update_peer = 0x0020, node_update_join = 0x0040, node_update_expected = 0x0100, + node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected, }; gboolean crm_timer_stop(fsa_timer_t * timer); @@ -74,10 +75,6 @@ gboolean crm_timer_start(fsa_timer_t * timer); gboolean crm_timer_popped(gpointer data); gboolean is_timer_started(fsa_timer_t * timer); -xmlNode *create_node_state(const char *uname, const char *in_cluster, - const char *is_peer, const char *join_state, - const char *exp_state, gboolean clear_shutdown, const char *src); - int crmd_exit(int rc); int crmd_fast_exit(int rc); gboolean stop_subsystem(struct crm_subsystem_s *centry, gboolean force_quit); diff --git a/crmd/election.c b/crmd/election.c index adab4e3a7c1..dee475069b8 100644 --- a/crmd/election.c +++ b/crmd/election.c @@ -172,24 +172,12 @@ do_dc_takeover(long long action, { int rc = pcmk_ok; xmlNode *cib = NULL; - GListPtr gIter = NULL; const char *cluster_type = name_for_cluster_type(get_cluster_type()); const char *watchdog = NULL; crm_info("Taking over DC status for this partition"); set_bit(fsa_input_register, R_THE_DC); - - for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { - char *target = gIter->data; - crm_node_t *target_node = crm_get_peer(0, target); - const char *uuid = crm_peer_uuid(target_node); - - crm_notice("Marking %s, target of a previous stonith action, as clean", target); - send_stonith_update(NULL, target, uuid); - free(target); - } - g_list_free(stonith_cleanup_list); - stonith_cleanup_list = NULL; + execute_stonith_cleanup(); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { diff --git a/crmd/fsa.c b/crmd/fsa.c index a3b3a90e8cd..cf54d193e86 100644 --- a/crmd/fsa.c +++ b/crmd/fsa.c @@ -540,8 +540,7 @@ do_state_transition(long long actions, } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { - populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | - node_update_join | node_update_expected, __FUNCTION__); + populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); } switch (next_state) { @@ -554,18 +553,8 @@ do_state_transition(long long actions, break; case S_NOT_DC: election_trigger->counter = 0; - if (stonith_cleanup_list) { - GListPtr gIter = NULL; + purge_stonith_cleanup(); - for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { - char *target = gIter->data; - - crm_info("Purging %s from stonith cleanup list", target); - free(target); - } - g_list_free(stonith_cleanup_list); - stonith_cleanup_list = NULL; - } if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit(tmp, A_SHUTDOWN_REQ); diff --git a/crmd/join_dc.c b/crmd/join_dc.c index 5280b6e6e10..89172319cc8 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -49,6 +49,11 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase return; } + /* Remote nodes do not participate in joins */ + if (is_set(node->flags, crm_remote_node)) { + return; + } + last = node->join; if(phase == last) { diff --git a/crmd/lrm.c b/crmd/lrm.c index ac246eae32d..8dabdb240af 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -831,27 +831,21 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) return FALSE; } -xmlNode * -do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace) +static xmlNode * +do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *uuid = NULL; - if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) { + if (lrm_state_is_local(lrm_state)) { crm_node_t *peer = crm_get_peer(0, lrm_state->node_name); - xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__); - /* The next two lines shouldn't be necessary for newer DCs */ - crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); - crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); + xml_state = do_update_node_cib(peer, update_flags, NULL, __FUNCTION__); uuid = fsa_our_uuid; } else { - xml_state = create_xml_node(NULL, XML_CIB_TAG_STATE); - crm_xml_add(xml_state, XML_NODE_IS_REMOTE, "true"); - crm_xml_add(xml_state, XML_ATTR_ID, lrm_state->node_name); - crm_xml_add(xml_state, XML_ATTR_UNAME, lrm_state->node_name); + xml_state = simple_remote_node_status(lrm_state->node_name, NULL, __FUNCTION__); uuid = lrm_state->node_name; } @@ -871,12 +865,23 @@ xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); + xmlNode *xml_state; if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } - return do_lrm_query_internal(lrm_state, is_replace); + xml_state = do_lrm_query_internal(lrm_state, + node_update_cluster|node_update_peer); + + /* In case this function is called to generate a join confirmation to + * send to the DC, force the current and expected join state to member. + * This isn't necessary for newer DCs but is backward compatible. + */ + crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); + crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); + + return xml_state; } static void @@ -1541,7 +1546,7 @@ do_lrm_invoke(long long action, if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; - xmlNode *fragment = do_lrm_query_internal(lrm_state, TRUE); + xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); @@ -1562,7 +1567,7 @@ do_lrm_invoke(long long action, free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { - xmlNode *data = do_lrm_query_internal(lrm_state, FALSE); + xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { diff --git a/crmd/te_actions.c b/crmd/te_actions.c index 90ff3398ead..d2267b8833a 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -82,11 +82,7 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid) } crmd_peer_down(peer, TRUE); - node_state = - do_update_node_cib(peer, - node_update_cluster | node_update_peer | node_update_join | - node_update_expected, NULL, __FUNCTION__); - + node_state = do_update_node_cib(peer, node_update_all, NULL, __FUNCTION__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 22551ba0d3b..36eecdb3063 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -31,7 +31,96 @@ #include crm_trigger_t *stonith_reconnect = NULL; -GListPtr stonith_cleanup_list = NULL; + +/* + * stonith cleanup list + * + * If the DC is shot, proper notifications might not go out. + * The stonith cleanup list allows the cluster to (re-)send + * notifications once a new DC is elected. + */ + +static GListPtr stonith_cleanup_list = NULL; + +/*! + * \internal + * \brief Add a node to the stonith cleanup list + * + * \param[in] target Name of node to add + */ +void +add_stonith_cleanup(const char *target) { + stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); +} + +/*! + * \internal + * \brief Remove a node from the stonith cleanup list + * + * \param[in] Name of node to remove + */ +void +remove_stonith_cleanup(const char *target) +{ + GListPtr iter = stonith_cleanup_list; + + while (iter != NULL) { + GListPtr tmp = iter; + char *iter_name = tmp->data; + + iter = iter->next; + if (safe_str_eq(target, iter_name)) { + crm_trace("Removing %s from the cleanup list", iter_name); + stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); + free(iter_name); + } + } +} + +/*! + * \internal + * \brief Purge all entries from the stonith cleanup list + */ +void +purge_stonith_cleanup() +{ + if (stonith_cleanup_list) { + GListPtr iter = NULL; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + + crm_info("Purging %s from stonith cleanup list", target); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; + } +} + +/*! + * \internal + * \brief Send stonith updates for all entries in cleanup list, then purge it + */ +void +execute_stonith_cleanup() +{ + GListPtr iter; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + crm_node_t *target_node = crm_get_peer(0, target); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); + send_stonith_update(NULL, target, uuid); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; +} + +/* end stonith cleanup list functions */ static gboolean fail_incompletable_stonith(crm_graph_t * graph) @@ -251,7 +340,9 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) } /* Assume it was our leader if we dont currently have one */ - } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { + } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) + && !is_set(peer->flags, crm_remote_node)) { + crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); @@ -263,8 +354,7 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } - stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); - + add_stonith_cleanup(st_event->target); } crmd_peer_down(peer, TRUE); diff --git a/crmd/tengine.h b/crmd/tengine.h index 1765dd439e0..94a869314a2 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -23,10 +23,15 @@ # include # include extern stonith_t *stonith_api; -extern GListPtr stonith_cleanup_list; extern void send_stonith_update(crm_action_t * stonith_action, const char *target, const char *uuid); +/* stonith cleanup list */ +void add_stonith_cleanup(const char *target); +void remove_stonith_cleanup(const char *target); +void purge_stonith_cleanup(void); +void execute_stonith_cleanup(void); + /* tengine */ extern crm_action_t *match_down_event(int rc, const char *target, const char *filter, bool quiet); extern crm_action_t *get_cancel_action(const char *id, const char *node); diff --git a/cts/CTStests.py b/cts/CTStests.py index ddd8c4a4eae..814a42186a7 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1693,6 +1693,19 @@ def __init__(self, cm): self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() + def _is_managed(self, node): + is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -Q -G -d true", 1) + is_managed = is_managed[:-1] # Strip off the newline + return is_managed == "true" + + def _set_unmanaged(self, node): + self.debug("Disable resource management") + self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false") + + def _set_managed(self, node): + self.debug("Re-enable resource management") + self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D") + def setup(self, node): attempt = 0 if not self.startall(None): @@ -1717,17 +1730,11 @@ def teardown(self, node): start = StartTest(self.CM) start(node) - is_managed = self.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1) - is_managed = is_managed[:-1] # Strip off the newline - if is_managed != "true": - self.logger.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed)) - managed = self.create_watch(["is-managed-default"], 60) - managed.setwatch() - - self.rsh(node, "crm_attribute -V -D -n is-managed-default") - - if not managed.lookforall(): - self.logger.log("Patterns not found: " + repr(managed.unmatched)) + if not self._is_managed(node): + self.logger.log("Attempting to re-enable resource management on %s" % node) + self._set_managed(node) + self.CM.cluster_stable() + if not self._is_managed(node): self.logger.log("Could not re-enable resource management") return 0 @@ -1744,11 +1751,12 @@ def __call__(self, node): self.incr("calls") pats = [] - managed = self.create_watch(["is-managed-default"], 60) + # Conveniently, pengine will display this message when disabling management, + # even if fencing is not enabled, so we can rely on it. + managed = self.create_watch(["Delaying fencing operations"], 60) managed.setwatch() - self.debug("Disable resource management") - self.rsh(node, "crm_attribute -V -n is-managed-default -v false") + self._set_unmanaged(node) if not managed.lookforall(): self.logger.log("Patterns not found: " + repr(managed.unmatched)) @@ -1767,37 +1775,28 @@ def __call__(self, node): self.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: - self.debug("Re-enable resource management") - self.rsh(node, "crm_attribute -V -D -n is-managed-default") + self._set_managed(node) return self.failure("Couldn't shut down the cluster") self.debug("Bringing the cluster back up") ret = self.startall(None) time.sleep(5) # allow ping to update the CIB if not ret: - self.debug("Re-enable resource management") - self.rsh(node, "crm_attribute -V -D -n is-managed-default") + self._set_managed(node) return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): - self.debug("Re-enable resource management") - self.rsh(node, "crm_attribute -V -D -n is-managed-default") + self._set_managed(node) return self.failure("Resources stopped or started during cluster restart") watch = self.create_watch(pats, 60, "StartupActivity") watch.setwatch() - managed = self.create_watch(["is-managed-default"], 60) - managed.setwatch() - - self.debug("Re-enable resource management") - self.rsh(node, "crm_attribute -V -D -n is-managed-default") - - if not managed.lookforall(): - self.logger.log("Patterns not found: " + repr(managed.unmatched)) - return self.failure("Resource management not enabled") - + # Re-enable resource management (and verify it happened). + self._set_managed(node) self.CM.cluster_stable() + if not self._is_managed(node): + return self.failure("Could not re-enable resource management") # Ignore actions for STONITH resources ignore = [] @@ -2638,30 +2637,31 @@ def errorstoignore(self): AllTestClasses.append(RemoteLXC) -################################################################### class RemoteDriver(CTSTest): -################################################################### + def __init__(self, cm): CTSTest.__init__(self,cm) - self.name = "RemoteDriver" + self.name = self.__class__.__name__ self.is_docker_unsafe = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.stop = StopTest(cm) + self.remote_rsc = "remote-rsc" + self.cib_cmd = """cibadmin -C -o %s -X '%s' """ + self.reset() + + def reset(self): self.pcmk_started = 0 - self.failed = 0 + self.failed = False self.fail_string = "" self.remote_node_added = 0 self.remote_rsc_added = 0 - self.remote_rsc = "remote-rsc" - self.remote_use_reconnect_interval = self.Env.RandomGen.choice(["true","false"]) - self.cib_cmd = """cibadmin -C -o %s -X '%s' """ + self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False]) def fail(self, msg): """ Mark test as failed. """ - # TODO: It's a boolean. Use True/False. - self.failed = 1 + self.failed = True # Always log the failure. self.logger.log(msg) @@ -2700,11 +2700,11 @@ def add_primitive_rsc(self, node): """ % (self.remote_rsc) self.add_rsc(node, rsc_xml) - if self.failed == 0: + if not self.failed: self.remote_rsc_added = 1 def add_connection_rsc(self, node): - if self.remote_use_reconnect_interval == "true": + if self.remote_use_reconnect_interval: # use reconnect interval and make sure to set cluster-recheck-interval as well. rsc_xml = """ @@ -2734,7 +2734,7 @@ def add_connection_rsc(self, node): """ % (self.remote_node, node) self.add_rsc(node, rsc_xml) - if self.failed == 0: + if not self.failed: self.remote_node_added = 1 def stop_pcmk_remote(self, node): @@ -2788,7 +2788,7 @@ def start_metal(self, node): self.fail("Unmatched patterns: %s" % watch.unmatched) def migrate_connection(self, node): - if self.failed == 1: + if self.failed: return pats = [ ] @@ -2812,7 +2812,7 @@ def migrate_connection(self, node): return def fail_rsc(self, node): - if self.failed == 1: + if self.failed: return watchpats = [ ] @@ -2834,7 +2834,7 @@ def fail_rsc(self, node): self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched) def fail_connection(self, node): - if self.failed == 1: + if self.failed: return watchpats = [ ] @@ -2881,7 +2881,7 @@ def fail_connection(self, node): return def add_dummy_rsc(self, node): - if self.failed == 1: + if self.failed: return # verify we can put a resource on the remote node @@ -2907,7 +2907,7 @@ def add_dummy_rsc(self, node): self.fail("Unmatched patterns: %s" % watch.unmatched) def test_attributes(self, node): - if self.failed == 1: + if self.failed: return # This verifies permanent attributes can be set on a remote-node. It also @@ -2943,7 +2943,7 @@ def cleanup_metal(self, node): self.set_timer("remoteMetalCleanup") - if self.remote_use_reconnect_interval == "true": + if self.remote_use_reconnect_interval: self.debug("Cleaning up re-check interval") self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"]) @@ -3013,9 +3013,9 @@ def is_applicable(self): return False return True - def __call__(self, node): - '''Perform the 'RemoteBaremetal' test. ''' + def start_new_test(self, node): self.incr("calls") + self.reset() ret = self.startall(None) if not ret: @@ -3024,15 +3024,9 @@ def __call__(self, node): self.setup_env(node) self.start_metal(node) self.add_dummy_rsc(node) - self.test_attributes(node) - self.cleanup_metal(node) - self.debug("Waiting for the cluster to recover") - self.CM.cluster_stable() - if self.failed == 1: - return self.failure(self.fail_string) - - return self.success() + def __call__(self, node): + return self.failure("This base class is not meant to be called directly.") def errorstoignore(self): '''Return list of errors which should be ignored''' @@ -3041,83 +3035,45 @@ def errorstoignore(self): """Failed to send remote""", ] -# Remote driver is called by other tests. +# RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses -################################################################### -class RemoteBasic(CTSTest): -################################################################### - def __init__(self, cm): - CTSTest.__init__(self,cm) - self.name = "RemoteBasic" - self.start = StartTest(cm) - self.startall = SimulStartLite(cm) - self.driver = RemoteDriver(cm) - self.is_docker_unsafe = 1 + +class RemoteBasic(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteBaremetal' test. ''' - self.incr("calls") - ret = self.startall(None) - if not ret: - return self.failure("Setup failed, start all nodes failed.") - - self.driver.setup_env(node) - self.driver.start_metal(node) - self.driver.add_dummy_rsc(node) - self.driver.test_attributes(node) - self.driver.cleanup_metal(node) + self.start_new_test(node) + self.test_attributes(node) + self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() - if self.driver.failed == 1: - return self.failure(self.driver.fail_string) + if self.failed: + return self.failure(self.fail_string) return self.success() - def is_applicable(self): - return self.driver.is_applicable() - - def errorstoignore(self): - return self.driver.errorstoignore() - AllTestClasses.append(RemoteBasic) -################################################################### -class RemoteStonithd(CTSTest): -################################################################### - def __init__(self, cm): - CTSTest.__init__(self,cm) - self.name = "RemoteStonithd" - self.start = StartTest(cm) - self.startall = SimulStartLite(cm) - self.driver = RemoteDriver(cm) - self.is_docker_unsafe = 1 +class RemoteStonithd(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteStonithd' test. ''' - self.incr("calls") - ret = self.startall(None) - if not ret: - return self.failure("Setup failed, start all nodes failed.") - - self.driver.setup_env(node) - self.driver.start_metal(node) - self.driver.add_dummy_rsc(node) - - self.driver.fail_connection(node) - self.driver.cleanup_metal(node) + self.start_new_test(node) + self.fail_connection(node) + self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() - if self.driver.failed == 1: - return self.failure(self.driver.fail_string) + if self.failed: + return self.failure(self.fail_string) return self.success() def is_applicable(self): - if not self.driver.is_applicable(): + if not RemoteDriver.is_applicable(self): return False if "DoFencing" in self.Env.keys(): @@ -3134,101 +3090,59 @@ def errorstoignore(self): r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery", ] - ignore_pats.extend(self.driver.errorstoignore()) + ignore_pats.extend(RemoteDriver.errorstoignore(self)) return ignore_pats AllTestClasses.append(RemoteStonithd) -################################################################### -class RemoteMigrate(CTSTest): -################################################################### - def __init__(self, cm): - CTSTest.__init__(self,cm) - self.name = "RemoteMigrate" - self.start = StartTest(cm) - self.startall = SimulStartLite(cm) - self.driver = RemoteDriver(cm) - self.is_docker_unsafe = 1 + +class RemoteMigrate(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteMigrate' test. ''' - self.incr("calls") - ret = self.startall(None) - if not ret: - return self.failure("Setup failed, start all nodes failed.") - - self.driver.setup_env(node) - self.driver.start_metal(node) - self.driver.add_dummy_rsc(node) - self.driver.migrate_connection(node) - self.driver.cleanup_metal(node) + self.start_new_test(node) + self.migrate_connection(node) + self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() - if self.driver.failed == 1: - return self.failure(self.driver.fail_string) + if self.failed: + return self.failure(self.fail_string) return self.success() - def is_applicable(self): - return self.driver.is_applicable() - - def errorstoignore(self): - return self.driver.errorstoignore() - AllTestClasses.append(RemoteMigrate) -################################################################### -class RemoteRscFailure(CTSTest): -################################################################### - def __init__(self, cm): - - # fail a rsc on a remote node, verify recovery. - CTSTest.__init__(self,cm) - self.name = "RemoteRscFailure" - self.start = StartTest(cm) - self.startall = SimulStartLite(cm) - self.driver = RemoteDriver(cm) - self.is_docker_unsafe = 1 +class RemoteRscFailure(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteRscFailure' test. ''' - self.incr("calls") - - ret = self.startall(None) - if not ret: - return self.failure("Setup failed, start all nodes failed.") - self.driver.setup_env(node) - self.driver.start_metal(node) - self.driver.add_dummy_rsc(node) + self.start_new_test(node) # This is an important step. We are migrating the connection # before failing the resource. This verifies that the migration # has properly maintained control over the remote-node. - self.driver.migrate_connection(node) + self.migrate_connection(node) - self.driver.fail_rsc(node) - self.driver.cleanup_metal(node) + self.fail_rsc(node) + self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() - if self.driver.failed == 1: - return self.failure(self.driver.fail_string) + if self.failed: + return self.failure(self.fail_string) return self.success() - def is_applicable(self): - return self.driver.is_applicable() - def errorstoignore(self): ignore_pats = [ r"pengine.*: Recover remote-rsc\s*\(.*\)", ] - ignore_pats.extend(self.driver.errorstoignore()) + ignore_pats.extend(RemoteDriver.errorstoignore(self)) return ignore_pats AllTestClasses.append(RemoteRscFailure) diff --git a/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt index 328a52eb0f6..72a90765922 100644 --- a/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt +++ b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt @@ -10,20 +10,45 @@ as possible. == Configure the Physical Host == -=== SElinux and Firewall === +[NOTE] +====== +For this example, we will use a single physical host named *example-host*. +A production cluster would likely have multiple physical hosts, in which case +you would run the commands here on each one, unless noted otherwise. +====== -In order to simplify this tutorial, we will disable SELinux and the local -firewall on the host. This may create significant security issues and should -not be performed on machines that will be exposed to the outside world, but may -be appropriate during development and testing on a protected host. +=== Configure Firewall on Host === + +On the physical host, allow cluster-related services through the local firewall: ---- -# setenforce 0 -# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -# systemctl disable firewalld.service -# systemctl stop firewalld.service -# iptables --flush +# firewall-cmd --permanent --add-service=high-availability +success +# firewall-cmd --reload +success ---- +[NOTE] +====== +If you are using iptables directly, or some other firewall solution besides +firewalld, simply open the following ports, which can be used by various +clustering components: TCP ports 2224, 3121, and 21064, and UDP port 5405. + +If you run into any problems during testing, you might want to disable +the firewall and SELinux entirely until you have everything working. +This may create significant security issues and should not be performed on +machines that will be exposed to the outside world, but may be appropriate +during development and testing on a protected host. + +To disable security measures: +---- +[root@pcmk-1 ~]# setenforce 0 +[root@pcmk-1 ~]# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config +[root@pcmk-1 ~]# systemctl disable firewalld.service +[root@pcmk-1 ~]# systemctl stop firewalld.service +[root@pcmk-1 ~]# iptables --flush +---- +====== + === Install Cluster Software === ---- @@ -33,15 +58,41 @@ be appropriate during development and testing on a protected host. === Configure Corosync === Corosync handles pacemaker's cluster membership and messaging. The corosync -config file is located in /etc/corosync/corosync.conf. That config file must be -initialized with information about the cluster nodes before pacemaker can +config file is located in +/etc/corosync/corosync.conf+. That config file must +be initialized with information about the cluster nodes before pacemaker can start. -To initialize the corosync config file, execute the following pcs command on both nodes filling in the information in <> with your nodes' information. +To initialize the corosync config file, execute the following `pcs` command, +replacing the cluster name and hostname as desired: ---- -# pcs cluster setup --force --local --name mycluster +# pcs cluster setup --force --local --name mycluster example-host ---- +[NOTE] +====== +If you have multiple physical hosts, you would execute the setup command on +only one host, but list all of them at the end of the command. +====== + +=== Configure Pacemaker for Remote Node Communication === + +Create a place to hold an authentication key for use with pacemaker_remote: +---- +# mkdir -p --mode=0750 /etc/pacemaker +# chgrp haclient /etc/pacemaker +---- + +Generate a key: +---- +# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 +---- + +[NOTE] +====== +If you have multiple physical hosts, you would generate the key on only one +host, and copy it to the same location on all hosts. +====== + === Verify Cluster Software === Start the cluster @@ -50,46 +101,109 @@ Start the cluster ---- Verify corosync membership ----- +.... # pcs status corosync Membership information +---------------------- Nodeid Votes Name -1795270848 1 example-host (local) ----- + 1 1 example-host (local) +.... Verify pacemaker status. At first, the output will look like this: ---- # pcs status +Cluster name: mycluster +WARNING: no stonith devices and stonith-enabled is not false +Last updated: Fri Oct 9 15:18:32 2015 Last change: Fri Oct 9 12:42:21 2015 by root via cibadmin on example-host +Stack: corosync +Current DC: NONE +1 node and 0 resources configured + +Node example-host: UNCLEAN (offline) + +Full list of resources: + + +PCSD Status: + example-host: Online + +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled +---- + +After a short amount of time, you should see your host as a single node in the +cluster: +---- +# pcs status +Cluster name: mycluster +WARNING: no stonith devices and stonith-enabled is not false +Last updated: Fri Oct 9 15:20:05 2015 Last change: Fri Oct 9 12:42:21 2015 by root via cibadmin on example-host +Stack: corosync +Current DC: example-host (version 1.1.13-a14efad) - partition WITHOUT quorum +1 node and 0 resources configured - Last updated: Thu Mar 14 12:26:00 2013 - Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host - Stack: corosync - Current DC: - Version: 1.1.10 - 1 Nodes configured, unknown expected votes - 0 Resources configured. +Online: [ example-host ] + +Full list of resources: + + +PCSD Status: + example-host: Online + +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled ---- -After about a minute you should see your host as a single node in the cluster. +=== Disable STONITH and Quorum === +Now, enable the cluster to work without quorum or stonith. This is required +for the sake of getting this tutorial to work with a single cluster node. + +---- +# pcs property set stonith-enabled=false +# pcs property set no-quorum-policy=ignore +---- + +[WARNING] +========= +The use of `stonith-enabled=false` is completely inappropriate for a production +cluster. It tells the cluster to simply pretend that failed nodes are safely +powered off. Some vendors will refuse to support clusters that have STONITH +disabled. We disable STONITH here only to focus the discussion on +pacemaker_remote, and to be able to use a single physical host in the example. +========= + +Now, the status output should look similar to this: ---- # pcs status +Cluster name: mycluster +Last updated: Fri Oct 9 15:22:49 2015 Last change: Fri Oct 9 15:22:46 2015 by root via cibadmin on example-host +Stack: corosync +Current DC: example-host (version 1.1.13-a14efad) - partition with quorum +1 node and 0 resources configured + +Online: [ example-host ] + +Full list of resources: + - Last updated: Thu Mar 14 12:28:23 2013 - Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host - Stack: corosync - Current DC: example-host (1795270848) - partition WITHOUT quorum - Version: 1.1.8-9b13ea1 - 1 Nodes configured, unknown expected votes - 0 Resources configured. +PCSD Status: + example-host: Online - Online: [ example-host ] +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled ---- Go ahead and stop the cluster for now after verifying everything is in order. ---- -# pcs cluster stop +# pcs cluster stop --force ---- === Install Virtualization Software === @@ -99,7 +213,7 @@ Go ahead and stop the cluster for now after verifying everything is in order. # systemctl enable libvirtd.service ---- -reboot the host +Reboot the host. [NOTE] ====== @@ -113,93 +227,41 @@ only to support usual commands (start, stop, etc.); Pacemaker implements the === Create Guest === -I am not going to outline the installation steps required to create a KVM +We will not outline here the installation steps required to create a KVM guest. There are plenty of tutorials available elsewhere that do that. +Just be sure to configure the guest with a hostname and a static IP address +(as an example here, we will use guest1 and 192.168.122.10). -=== Configure Guest Network === +=== Configure Firewall on Guest === -Run the commands below to set up a static ip address (192.168.122.10) and hostname (guest1). +On each guest, allow cluster-related services through the local firewall, +following the same procedure as in <<_configure_firewall_on_host>>. ----- -export remote_hostname=guest1 -export remote_ip=192.168.122.10 -export remote_gateway=192.168.122.1 - -yum remove -y NetworkManager - -rm -f /etc/hostname -cat << END >> /etc/hostname -$remote_hostname -END - -hostname $remote_hostname +=== Verify Connectivity === -cat << END >> /etc/sysconfig/network -HOSTNAME=$remote_hostname -GATEWAY=$remote_gateway -END - -sed -i.bak "s/.*BOOTPROTO=.*/BOOTPROTO=none/g" /etc/sysconfig/network-scripts/ifcfg-eth0 - -cat << END >> /etc/sysconfig/network-scripts/ifcfg-eth0 -IPADDR0=$remote_ip -PREFIX0=24 -GATEWAY0=$remote_gateway -DNS1=$remote_gateway -END +At this point, you should be able to ping and ssh into guests from hosts, and +vice versa. -systemctl restart network -systemctl enable network.service -systemctl enable sshd -systemctl start sshd +=== Configure pacemaker_remote === -echo "checking connectivity" -ping www.google.com +Install pacemaker_remote, and enable it to run at start-up. Here, we also +install the pacemaker package; it is not required, but it contains the dummy +resource agent that we will use later for testing. ---- - -To simplify the tutorial we'll go ahead and disable selinux on the guest. We'll also need to poke a hole through the firewall on port 3121 (the default port for pacemaker_remote) so the host can contact the guest. - +# yum install -y pacemaker pacemaker-remote resource-agents +# systemctl enable pacemaker_remote.service ---- -# setenforce 0 -# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config - -# firewall-cmd --add-port 3121/tcp --permanent ----- - -If you still encounter connection issues, just disable firewalld on the guest -like we did on the host, to guarantee you'll be able to contact the guest from -the host. - -At this point you should be able to ssh into the guest from the host. - -=== Configure pacemaker_remote === - -On the 'host' machine, run these commands to generate an authkey and copy it to -the /etc/pacemaker folder on both the host and guest. +Copy the authentication key from a host: ---- # mkdir -p --mode=0750 /etc/pacemaker # chgrp haclient /etc/pacemaker -# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -# scp -r /etc/pacemaker root@192.168.122.10:/etc/ +# scp root@example-host:/etc/pacemaker/authkey /etc/pacemaker ---- -Now on the 'guest', install the pacemaker-remote package, and enable the daemon -to run at startup. In the commands below, you will notice the pacemaker -package is also installed. It is not required; the only reason it is being -installed for this tutorial is because it contains the Dummy resource agent -that we will use later for testing. - +Start pacemaker_remote, and verify the start was successful: ---- -# yum install -y pacemaker pacemaker-remote resource-agents -# systemctl enable pacemaker_remote.service ----- - -Now start pacemaker_remote on the guest and verify the start was successful. - ----- -# systemctl start pacemaker_remote.service - +# systemctl start pacemaker_remote # systemctl status pacemaker_remote pacemaker_remote.service - Pacemaker Remote Service @@ -221,7 +283,9 @@ on port 3121. Here's a trick you can use. Connect using ssh from the host. The connection will get destroyed, but how it is destroyed tells you whether it worked or not. -First add guest1 to the host machine's /etc/hosts file if you haven't already. This is required unless you have dns setup in a way where guest1's address can be discovered. +First add guest1 to the host machine's +/etc/hosts+ file if you haven't +already. This is required unless you have DNS setup in a way where guest1's +address can be discovered. ---- # cat << END >> /etc/hosts @@ -236,11 +300,15 @@ output before disconnecting, the connection works. ssh_exchange_identification: read: Connection reset by peer ---- -If you see this, the connection is not working. +If you see one of these, the connection is not working. ---- # ssh -p 3121 guest1 ssh: connect to host guest1 port 3121: No route to host ---- +---- +# ssh -p 3121 guest1 +ssh: connect to host guest1 port 3121: Connection refused +---- Once you can successfully connect to the guest from the host, shutdown the guest. Pacemaker will be managing the virtual machine from this point forward. @@ -256,33 +324,13 @@ On the host, start pacemaker. ---- Wait for the host to become the DC. The output of `pcs status` should look -similar to this after about a minute. - ----- -Last updated: Thu Mar 14 16:41:22 2013 -Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host -Stack: corosync -Current DC: example-host (1795270848) - partition WITHOUT quorum -Version: 1.1.10 -1 Nodes configured, unknown expected votes -0 Resources configured. - - -Online: [ example-host ] ----- - -Now enable the cluster to work without quorum or stonith. This is required -just for the sake of getting this tutorial to work with a single cluster node. - ----- -# pcs property set stonith-enabled=false -# pcs property set no-quorum-policy=ignore ----- +as it did in <<_disable_stonith_and_quorum>>. === Integrate as Guest Node === -If you didn't already do this earlier in the verify host to guest connection section, add the KVM guest's ip to the host's /etc/hosts file so we can connect by hostname. The command below will do that if you used the same ip address I used earlier. - +If you didn't already do this earlier in the verify host to guest connection +section, add the KVM guest's IP address to the host's +/etc/hosts+ file so we +can connect by hostname. For this example: ---- # cat << END >> /etc/hosts 192.168.122.10 guest1 @@ -304,34 +352,58 @@ you just created from the output of this list. In my case I named it guest1. Dump the xml to a file somewhere on the host using the following command. ---- -# virsh dumpxml guest1 > /root/guest1.xml +# virsh dumpxml guest1 > /etc/pacemaker/guest1.xml ---- Now just register the resource with pacemaker and you're set! ---- -# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" config="/root/guest1.xml" meta remote-node=guest1 +# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" \ + config="/etc/pacemaker/guest1.xml" meta remote-node=guest1 ---- +[NOTE] +====== +This example puts the guest XML under /etc/pacemaker because the +permissions and SELinux labeling should not need any changes. +If you run into trouble with this or any step, try disabling SELinux +with `setenforce 0`. If it works after that, see SELinux documentation +for how to troubleshoot, if you wish to reenable SELinux. +====== + +[NOTE] +====== +Pacemaker will automatically monitor pacemaker_remote connections for failure, +so it is not necessary to create a recurring monitor on the VirtualDomain +resource. +====== + Once the *vm-guest1* resource is started you will see *guest1* appear in the `pcs status` output as a node. The final `pcs status` output should look something like this. ---- -Last updated: Fri Mar 15 09:30:30 2013 -Last change: Thu Mar 14 17:21:35 2013 via cibadmin on example-host +# pcs status +Cluster name: mycluster +Last updated: Fri Oct 9 18:00:45 2015 Last change: Fri Oct 9 17:53:44 2015 by root via crm_resource on example-host Stack: corosync -Current DC: example-host (1795270848) - partition WITHOUT quorum -Version: 1.1.10 -2 Nodes configured, unknown expected votes -2 Resources configured. - +Current DC: example-host (version 1.1.13-a14efad) - partition with quorum +2 nodes and 2 resources configured -Online: [ example-host guest1 ] +Online: [ example-host ] +GuestOnline: [ guest1@example-host ] Full list of resources: vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host + +PCSD Status: + example-host: Online + +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled ---- === Starting Resources on KVM Guest === @@ -371,7 +443,7 @@ purposes, I am picking FAKE3 from the output above. We can force FAKE3 to run on *guest1* in the exact same way we would any other node. ---- -# pcs constraint FAKE3 prefers guest1 +# pcs constraint location FAKE3 prefers guest1 ---- Now, looking at the bottom of the `pcs status` output you'll see FAKE3 is on @@ -403,62 +475,96 @@ ssh into the guest and run this command. # kill -9 `pidof pacemaker_remoted` ---- -After a few seconds or so, you'll see this in your `pcs status` output. The -*guest1* node will be show as offline as it is being recovered. - +Within a few seconds, your `pcs status` output will show a monitor failure, +and the *guest1* node will not be shown while it is being recovered. ---- -Last updated: Fri Mar 15 11:00:31 2013 -Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host +# pcs status +Cluster name: mycluster +Last updated: Fri Oct 9 18:08:35 2015 Last change: Fri Oct 9 18:07:00 2015 by root via cibadmin on example-host Stack: corosync -Current DC: example-host (1795270848) - partition WITHOUT quorum -Version: 1.1.10 -2 Nodes configured, unknown expected votes -7 Resources configured. - +Current DC: example-host (version 1.1.13-a14efad) - partition with quorum +2 nodes and 7 resources configured Online: [ example-host ] -OFFLINE: [ guest1 ] Full list of resources: - vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host - FAKE1 (ocf::pacemaker:Dummy): Stopped - FAKE2 (ocf::pacemaker:Dummy): Stopped - FAKE3 (ocf::pacemaker:Dummy): Stopped + vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host + FAKE1 (ocf::pacemaker:Dummy): Stopped + FAKE2 (ocf::pacemaker:Dummy): Stopped + FAKE3 (ocf::pacemaker:Dummy): Stopped FAKE4 (ocf::pacemaker:Dummy): Started example-host FAKE5 (ocf::pacemaker:Dummy): Started example-host -Failed actions: - guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running +Failed Actions: +* guest1_monitor_30000 on example-host 'unknown error' (1): call=8, status=Error, exitreason='none', + last-rc-change='Fri Oct 9 18:08:29 2015', queued=0ms, exec=0ms + + +PCSD Status: + example-host: Online + +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled ---- +[NOTE] +====== +A guest node involves two resources: the one you explicitly configured creates the guest, +and Pacemaker creates an implicit resource for the pacemaker_remote connection, which +will be named the same as the value of the *remote-node* attribute of the explicit resource. +When we killed pacemaker_remote, it is the implicit resource that failed, which is why +the failed action starts with *guest1* and not *vm-guest1*. +====== + Once recovery of the guest is complete, you'll see it automatically get re-integrated into the cluster. The final `pcs status` output should look something like this. ---- -Last updated: Fri Mar 15 11:03:17 2013 -Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host +Cluster name: mycluster +Last updated: Fri Oct 9 18:18:30 2015 Last change: Fri Oct 9 18:07:00 2015 by root via cibadmin on example-host Stack: corosync -Current DC: example-host (1795270848) - partition WITHOUT quorum -Version: 1.1.10 -2 Nodes configured, unknown expected votes -7 Resources configured. +Current DC: example-host (version 1.1.13-a14efad) - partition with quorum +2 nodes and 7 resources configured - -Online: [ example-host guest1 ] +Online: [ example-host ] +GuestOnline: [ guest1@example-host ] Full list of resources: vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host - FAKE1 (ocf::pacemaker:Dummy): Started guest1 - FAKE2 (ocf::pacemaker:Dummy): Started guest1 - FAKE3 (ocf::pacemaker:Dummy): Started guest1 + FAKE1 (ocf::pacemaker:Dummy): Started guest1 + FAKE2 (ocf::pacemaker:Dummy): Started guest1 + FAKE3 (ocf::pacemaker:Dummy): Started guest1 FAKE4 (ocf::pacemaker:Dummy): Started example-host FAKE5 (ocf::pacemaker:Dummy): Started example-host -Failed actions: - guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running +Failed Actions: +* guest1_monitor_30000 on example-host 'unknown error' (1): call=8, status=Error, exitreason='none', + last-rc-change='Fri Oct 9 18:08:29 2015', queued=0ms, exec=0ms + + +PCSD Status: + example-host: Online + +Daemon Status: + corosync: active/disabled + pacemaker: active/disabled + pcsd: active/enabled +---- + +Normally, once you've investigated and addressed a failed action, you can clear the +failure. However Pacemaker does not yet support cleanup for the implicitly +created connection resource while the explicit resource is active. If you want +to clear the failed action from the status output, stop the guest resource before +clearing it. For example: +---- +# pcs resource disable vm-guest1 --wait +# pcs resource cleanup guest1 +# pcs resource enable vm-guest1 ---- === Accessing Cluster Tools from Guest Node === diff --git a/fencing/main.c b/fencing/main.c index ba9857f158b..8fdec647f10 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1233,7 +1233,7 @@ struct qb_ipcs_service_handlers ipc_callbacks = { static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { - if (type != crm_status_processes) { + if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() diff --git a/include/crm/cluster.h b/include/crm/cluster.h index 20ed829c7ef..c60dbd1bb16 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -59,9 +59,9 @@ enum crm_node_flags { /* node is not a cluster node and should not be considered for cluster membership */ crm_remote_node = 0x0001, - /* This node is a remote node living within a container resource */ + + /* deprecated (not used by cluster) */ crm_remote_container = 0x0002, - /* This node is a bare metal remote-node */ crm_remote_baremetal = 0x0004, }; /* *INDENT-ON* */ diff --git a/include/crm/common/xml.h b/include/crm/common/xml.h index 1ea46da0fc5..a9fef3e4b2f 100644 --- a/include/crm/common/xml.h +++ b/include/crm/common/xml.h @@ -252,6 +252,8 @@ xmlNode *first_named_child(xmlNode * parent, const char *name); xmlNode *sorted_xml(xmlNode * input, xmlNode * parent, gboolean recursive); xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path); +void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, + void (*helper)(xmlNode*, void*), void *user_data); gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs); xmlNode *expand_idref(xmlNode * input, xmlNode * top); diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c index 341ea9019d3..e1646327ffd 100644 --- a/lib/cib/cib_attrs.c +++ b/lib/cib/cib_attrs.c @@ -418,9 +418,15 @@ found_remote_node_xpath(cib_t *the_cib, const char *xpath) static int get_remote_node_uuid(cib_t * the_cib, const char *uname, char **uuid) { -#define CONTAINER_REMOTE_NODE_XPATH "//" XML_CIB_TAG_NVPAIR "[@name='remote-node'][@value='%s']" +#define CONTAINER_REMOTE_NODE_XPATH "//" XML_CIB_TAG_NVPAIR \ + "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][@value='%s']" + #define BAREMETAL_REMOTE_NODE_XPATH "//" XML_CIB_TAG_RESOURCE "[@type='remote'][@provider='pacemaker'][@id='%s']" -#define ORPHAN_REMOTE_NODE_XPATH "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE "[@id='%s'][@remote_node='true']" + +#define ORPHAN_REMOTE_NODE_XPATH \ + "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \ + "[@" XML_ATTR_UUID "='%s'][@" XML_NODE_IS_REMOTE "='true']" + int len = 128 + strlen(uname); int rc = pcmk_ok; char *xpath_string = calloc(1, len); diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 3081e54365c..aef7f99b6d5 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -71,7 +71,7 @@ crm_remote_peer_cache_remove(const char *node_name) } static void -remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field, int flags) +remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field) { const char *remote = NULL; crm_node_t *node = NULL; @@ -92,7 +92,7 @@ remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field, if (remote) { crm_trace("added %s to remote cache", remote); node = calloc(1, sizeof(crm_node_t)); - node->flags = flags; + node->flags = crm_remote_node; CRM_ASSERT(node); node->uname = strdup(remote); node->uuid = strdup(remote); @@ -103,24 +103,40 @@ remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field, freeXpathObject(xpathObj); } +/* search string to find CIB resources entries for guest nodes */ +#define XPATH_GUEST_NODE_CONFIG \ + "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ + "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \ + "[@name='" XML_RSC_ATTR_REMOTE_NODE "']" + +/* search string to find CIB resources entries for remote nodes */ +#define XPATH_REMOTE_NODE_CONFIG \ + "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ + "[@type='remote'][@provider='pacemaker']" + +/* search string to find CIB node status entries for pacemaker_remote nodes */ +#define XPATH_REMOTE_NODE_STATUS \ + "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \ + "[@" XML_NODE_IS_REMOTE "='true']" + +/*! + * \brief Repopulate the remote peer cache based on CIB XML + * + * \param[in] xmlNode CIB XML to parse + */ void crm_remote_peer_cache_refresh(xmlNode *cib) { - const char *xpath = NULL; - g_hash_table_remove_all(crm_remote_peer_cache); /* remote nodes associated with a cluster resource */ - xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR "[@name='remote-node']"; - remote_cache_refresh_helper(cib, xpath, "value", crm_remote_node | crm_remote_container); + remote_cache_refresh_helper(cib, XPATH_GUEST_NODE_CONFIG, "value"); /* baremetal nodes defined by connection resources*/ - xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "[@type='remote'][@provider='pacemaker']"; - remote_cache_refresh_helper(cib, xpath, "id", crm_remote_node | crm_remote_baremetal); + remote_cache_refresh_helper(cib, XPATH_REMOTE_NODE_CONFIG, "id"); /* baremetal nodes we have seen in the config that may or may not have connection * resources associated with them anymore */ - xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE "[@remote_node='true']"; - remote_cache_refresh_helper(cib, xpath, "id", crm_remote_node | crm_remote_baremetal); + remote_cache_refresh_helper(cib, XPATH_REMOTE_NODE_STATUS, "id"); } gboolean @@ -680,7 +696,7 @@ crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t see * * \return NULL if any node was reaped from peer caches, value of node otherwise * - * \note If this function returns TRUE, the supplied node object was likely + * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. @@ -694,6 +710,11 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return NULL); + /* Pacemaker doesn't spawn processes on remote nodes */ + if (is_set(node->flags, crm_remote_node)) { + return node; + } + last = node->processes; if (status == NULL) { node->processes = flag; @@ -763,6 +784,11 @@ crm_update_peer_expected(const char *source, crm_node_t * node, const char *expe CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); + /* Remote nodes don't participate in joins */ + if (is_set(node->flags, crm_remote_node)) { + return; + } + last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); @@ -787,13 +813,13 @@ crm_update_peer_expected(const char *source, crm_node_t * node, const char *expe * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID + * \param[in] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely - * freed and should not be used again. This function should not be - * called within a cache iteration if reaping is possible, - * otherwise reaping could invalidate the iterator. + * freed and should not be used again. This function may be called from + * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter) @@ -821,14 +847,15 @@ crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *st } free(last); - if (!is_member && crm_autoreap) { + if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) { + /* We only autoreap from the peer cache, not the remote peer cache, + * because the latter should be managed only by + * crm_remote_peer_cache_refresh(). + */ if(iter) { crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); - } else if (status_type == crm_status_rstate) { - crm_remote_peer_cache_remove(node->uname); - } else { reap_crm_member(node->id, node->uname); } @@ -842,6 +869,21 @@ crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *st return node; } +/*! + * \brief Update a node's state and membership information + * + * \param[in] source Caller's function name (for log messages) + * \param[in] node Node object to update + * \param[in] state Node's new state + * \param[in] membership Node's new membership ID + * + * \return NULL if any node was reaped, value of node otherwise + * + * \note If this function returns NULL, the supplied node object was likely + * freed and should not be used again. This function should not be + * called within a cache iteration if reaping is possible, + * otherwise reaping could invalidate the iterator. + */ crm_node_t * crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) { diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 56afb376f8e..b0caa5984f4 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -33,7 +33,8 @@ lib_LTLIBRARIES = libcrmcommon.la CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC libcrmcommon_la_SOURCES = compat.c digest.c ipc.c io.c procfs.c utils.c xml.c \ - iso8601.c remote.c mainloop.c logging.c watchdog.c + iso8601.c remote.c mainloop.c logging.c watchdog.c \ + xpath.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif diff --git a/lib/common/xml.c b/lib/common/xml.c index 850c8641801..d2f996d5900 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -5692,149 +5692,6 @@ update_validation(xmlNode ** xml_blob, int *best, int max, gboolean transform, g return rc; } -/* - * From xpath2.c - * - * All the elements returned by an XPath query are pointers to - * elements from the tree *except* namespace nodes where the XPath - * semantic is different from the implementation in libxml2 tree. - * As a result when a returned node set is freed when - * xmlXPathFreeObject() is called, that routine must check the - * element type. But node from the returned set may have been removed - * by xmlNodeSetContent() resulting in access to freed data. - * - * This can be exercised by running - * valgrind xpath2 test3.xml '//discarded' discarded - * - * There is 2 ways around it: - * - make a copy of the pointers to the nodes from the result set - * then call xmlXPathFreeObject() and then modify the nodes - * or - * - remove the references from the node set, if they are not - namespace nodes, before calling xmlXPathFreeObject(). - */ -void -freeXpathObject(xmlXPathObjectPtr xpathObj) -{ - int lpc, max = numXpathResults(xpathObj); - - if(xpathObj == NULL) { - return; - } - - for(lpc = 0; lpc < max; lpc++) { - if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { - xpathObj->nodesetval->nodeTab[lpc] = NULL; - } - } - - /* _Now_ its safe to free it */ - xmlXPathFreeObject(xpathObj); -} - -xmlNode * -getXpathResult(xmlXPathObjectPtr xpathObj, int index) -{ - xmlNode *match = NULL; - int max = numXpathResults(xpathObj); - - CRM_CHECK(index >= 0, return NULL); - CRM_CHECK(xpathObj != NULL, return NULL); - - if (index >= max) { - crm_err("Requested index %d of only %d items", index, max); - return NULL; - - } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { - /* Previously requested */ - return NULL; - } - - match = xpathObj->nodesetval->nodeTab[index]; - CRM_CHECK(match != NULL, return NULL); - - if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { - /* See the comment for freeXpathObject() */ - xpathObj->nodesetval->nodeTab[index] = NULL; - } - - if (match->type == XML_DOCUMENT_NODE) { - /* Will happen if section = '/' */ - match = match->children; - - } else if (match->type != XML_ELEMENT_NODE - && match->parent && match->parent->type == XML_ELEMENT_NODE) { - /* reurning the parent instead */ - match = match->parent; - - } else if (match->type != XML_ELEMENT_NODE) { - /* We only support searching nodes */ - crm_err("We only support %d not %d", XML_ELEMENT_NODE, match->type); - match = NULL; - } - return match; -} - -void -dedupXpathResults(xmlXPathObjectPtr xpathObj) -{ - int lpc, max = numXpathResults(xpathObj); - - if (xpathObj == NULL) { - return; - } - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *xml = NULL; - gboolean dedup = FALSE; - - if (xpathObj->nodesetval->nodeTab[lpc] == NULL) { - continue; - } - - xml = xpathObj->nodesetval->nodeTab[lpc]->parent; - - for (; xml; xml = xml->parent) { - int lpc2 = 0; - - for (lpc2 = 0; lpc2 < max; lpc2++) { - if (xpathObj->nodesetval->nodeTab[lpc2] == xml) { - xpathObj->nodesetval->nodeTab[lpc] = NULL; - dedup = TRUE; - break; - } - } - - if (dedup) { - break; - } - } - } -} - -/* the caller needs to check if the result contains a xmlDocPtr or xmlNodePtr */ -xmlXPathObjectPtr -xpath_search(xmlNode * xml_top, const char *path) -{ - xmlDocPtr doc = NULL; - xmlXPathObjectPtr xpathObj = NULL; - xmlXPathContextPtr xpathCtx = NULL; - const xmlChar *xpathExpr = (const xmlChar *)path; - - CRM_CHECK(path != NULL, return NULL); - CRM_CHECK(xml_top != NULL, return NULL); - CRM_CHECK(strlen(path) > 0, return NULL); - - doc = getDocPtr(xml_top); - - xpathCtx = xmlXPathNewContext(doc); - CRM_ASSERT(xpathCtx != NULL); - - xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); - xmlXPathFreeContext(xpathCtx); - return xpathObj; -} - gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs) { @@ -5939,81 +5796,6 @@ expand_idref(xmlNode * input, xmlNode * top) return result; } -xmlNode * -get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) -{ - int len = 0; - xmlNode *result = NULL; - char *xpath_full = NULL; - char *xpath_prefix = NULL; - - if (xml_obj == NULL || xpath == NULL) { - return NULL; - } - - xpath_prefix = (char *)xmlGetNodePath(xml_obj); - len += strlen(xpath_prefix); - len += strlen(xpath); - - xpath_full = strdup(xpath_prefix); - xpath_full = realloc_safe(xpath_full, len + 1); - strncat(xpath_full, xpath, len); - - result = get_xpath_object(xpath_full, xml_obj, error_level); - - free(xpath_prefix); - free(xpath_full); - return result; -} - -xmlNode * -get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) -{ - int max; - xmlNode *result = NULL; - xmlXPathObjectPtr xpathObj = NULL; - char *nodePath = NULL; - char *matchNodePath = NULL; - - if (xpath == NULL) { - return xml_obj; /* or return NULL? */ - } - - xpathObj = xpath_search(xml_obj, xpath); - nodePath = (char *)xmlGetNodePath(xml_obj); - max = numXpathResults(xpathObj); - - if (max < 1) { - do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); - crm_log_xml_explicit(xml_obj, "Unexpected Input"); - - } else if (max > 1) { - int lpc = 0; - - do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if(match != NULL) { - matchNodePath = (char *)xmlGetNodePath(match); - do_crm_log(error_level, "%s[%d] = %s", xpath, lpc, crm_str(matchNodePath)); - free(matchNodePath); - } - } - crm_log_xml_explicit(xml_obj, "Bad Input"); - - } else { - result = getXpathResult(xpathObj, 0); - } - - freeXpathObject(xpathObj); - free(nodePath); - - return result; -} - const char * crm_element_value(xmlNode * data, const char *name) { diff --git a/lib/common/xpath.c b/lib/common/xpath.c new file mode 100644 index 00000000000..9a058192b2a --- /dev/null +++ b/lib/common/xpath.c @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2004 Andrew Beekhof + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +/* + * From xpath2.c + * + * All the elements returned by an XPath query are pointers to + * elements from the tree *except* namespace nodes where the XPath + * semantic is different from the implementation in libxml2 tree. + * As a result when a returned node set is freed when + * xmlXPathFreeObject() is called, that routine must check the + * element type. But node from the returned set may have been removed + * by xmlNodeSetContent() resulting in access to freed data. + * + * This can be exercised by running + * valgrind xpath2 test3.xml '//discarded' discarded + * + * There is 2 ways around it: + * - make a copy of the pointers to the nodes from the result set + * then call xmlXPathFreeObject() and then modify the nodes + * or + * - remove the references from the node set, if they are not + namespace nodes, before calling xmlXPathFreeObject(). + */ +void +freeXpathObject(xmlXPathObjectPtr xpathObj) +{ + int lpc, max = numXpathResults(xpathObj); + + if (xpathObj == NULL) { + return; + } + + for (lpc = 0; lpc < max; lpc++) { + if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { + xpathObj->nodesetval->nodeTab[lpc] = NULL; + } + } + + /* _Now_ it's safe to free it */ + xmlXPathFreeObject(xpathObj); +} + +xmlNode * +getXpathResult(xmlXPathObjectPtr xpathObj, int index) +{ + xmlNode *match = NULL; + int max = numXpathResults(xpathObj); + + CRM_CHECK(index >= 0, return NULL); + CRM_CHECK(xpathObj != NULL, return NULL); + + if (index >= max) { + crm_err("Requested index %d of only %d items", index, max); + return NULL; + + } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { + /* Previously requested */ + return NULL; + } + + match = xpathObj->nodesetval->nodeTab[index]; + CRM_CHECK(match != NULL, return NULL); + + if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { + /* See the comment for freeXpathObject() */ + xpathObj->nodesetval->nodeTab[index] = NULL; + } + + if (match->type == XML_DOCUMENT_NODE) { + /* Will happen if section = '/' */ + match = match->children; + + } else if (match->type != XML_ELEMENT_NODE + && match->parent && match->parent->type == XML_ELEMENT_NODE) { + /* Return the parent instead */ + match = match->parent; + + } else if (match->type != XML_ELEMENT_NODE) { + /* We only support searching nodes */ + crm_err("We only support %d not %d", XML_ELEMENT_NODE, match->type); + match = NULL; + } + return match; +} + +void +dedupXpathResults(xmlXPathObjectPtr xpathObj) +{ + int lpc, max = numXpathResults(xpathObj); + + if (xpathObj == NULL) { + return; + } + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *xml = NULL; + gboolean dedup = FALSE; + + if (xpathObj->nodesetval->nodeTab[lpc] == NULL) { + continue; + } + + xml = xpathObj->nodesetval->nodeTab[lpc]->parent; + + for (; xml; xml = xml->parent) { + int lpc2 = 0; + + for (lpc2 = 0; lpc2 < max; lpc2++) { + if (xpathObj->nodesetval->nodeTab[lpc2] == xml) { + xpathObj->nodesetval->nodeTab[lpc] = NULL; + dedup = TRUE; + break; + } + } + + if (dedup) { + break; + } + } + } +} + +/* the caller needs to check if the result contains a xmlDocPtr or xmlNodePtr */ +xmlXPathObjectPtr +xpath_search(xmlNode * xml_top, const char *path) +{ + xmlDocPtr doc = NULL; + xmlXPathObjectPtr xpathObj = NULL; + xmlXPathContextPtr xpathCtx = NULL; + const xmlChar *xpathExpr = (const xmlChar *)path; + + CRM_CHECK(path != NULL, return NULL); + CRM_CHECK(xml_top != NULL, return NULL); + CRM_CHECK(strlen(path) > 0, return NULL); + + doc = getDocPtr(xml_top); + + xpathCtx = xmlXPathNewContext(doc); + CRM_ASSERT(xpathCtx != NULL); + + xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); + xmlXPathFreeContext(xpathCtx); + return xpathObj; +} + +/*! + * \brief Run a supplied function for each result of an xpath search + * + * \param[in] xml XML to search + * \param[in] xpath XPath search string + * \param[in] helper Function to call for each result + * \param[in/out] user_data Data to pass to supplied function + * + * \note The helper function will be passed the XML node of the result, + * and the supplied user_data. This function does not otherwise + * use user_data. + */ +void +crm_foreach_xpath_result(xmlNode *xml, const char *xpath, + void (*helper)(xmlNode*, void*), void *user_data) +{ + xmlXPathObjectPtr xpathObj = xpath_search(xml, xpath); + int nresults = numXpathResults(xpathObj); + int i; + + for (i = 0; i < nresults; i++) { + xmlNode *result = getXpathResult(xpathObj, i); + + CRM_LOG_ASSERT(result != NULL); + if (result) { + (*helper)(result, user_data); + } + } + freeXpathObject(xpathObj); +} + +xmlNode * +get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) +{ + int len = 0; + xmlNode *result = NULL; + char *xpath_full = NULL; + char *xpath_prefix = NULL; + + if (xml_obj == NULL || xpath == NULL) { + return NULL; + } + + xpath_prefix = (char *)xmlGetNodePath(xml_obj); + len += strlen(xpath_prefix); + len += strlen(xpath); + + xpath_full = strdup(xpath_prefix); + xpath_full = realloc_safe(xpath_full, len + 1); + strncat(xpath_full, xpath, len); + + result = get_xpath_object(xpath_full, xml_obj, error_level); + + free(xpath_prefix); + free(xpath_full); + return result; +} + +xmlNode * +get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) +{ + int max; + xmlNode *result = NULL; + xmlXPathObjectPtr xpathObj = NULL; + char *nodePath = NULL; + char *matchNodePath = NULL; + + if (xpath == NULL) { + return xml_obj; /* or return NULL? */ + } + + xpathObj = xpath_search(xml_obj, xpath); + nodePath = (char *)xmlGetNodePath(xml_obj); + max = numXpathResults(xpathObj); + + if (max < 1) { + do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); + crm_log_xml_explicit(xml_obj, "Unexpected Input"); + + } else if (max > 1) { + int lpc = 0; + + do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { + matchNodePath = (char *)xmlGetNodePath(match); + do_crm_log(error_level, "%s[%d] = %s", xpath, lpc, crm_str(matchNodePath)); + free(matchNodePath); + } + } + crm_log_xml_explicit(xml_obj, "Bad Input"); + + } else { + result = getXpathResult(xpathObj, 0); + } + + freeXpathObject(xpathObj); + free(nodePath); + + return result; +} diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in index 750b174ede0..21efb587f9b 100755 --- a/lrmd/regression.py.in +++ b/lrmd/regression.py.in @@ -779,7 +779,7 @@ if __name__ == "__main__": timeout = "-t 20000" iterations = 25 - test = self.new_test("ocf_stress", "Verify systemd dbus connection works under load") + test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i))