Skip to content

Conversation

@inouekazu
Copy link
Contributor

With cibadmin --replace --scope=configuration, <node> will disappear from CIB.

# crm_mon -1
Cluster Summary:
  * Stack: corosync
  * Current DC: r81-1 (version 2.0.3-432290022) - partition with quorum
  * Last updated: Tue Apr 28 12:13:47 2020
  * Last change:  Tue Apr 28 12:13:32 2020 by hacluster via crmd on r81-1
  * 2 nodes configured
  * 0 resource instances configured

Node List:
  * Online: [ r81-1 r81-2 ]

Active Resources:
  * No active resources

# pcs cluster cib
<cib crm_feature_set="3.3.0" validate-with="pacemaker-3.3" epoch="5" num_updates="4" admin_epoch="0" cib-last-written="Tue Apr 28 12:13:32 2020" update-origin="r81-1" update-client="crmd" update-user="hacluster" have-quorum="1" dc-uuid="1">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" value="false"/>
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.0.3-432290022"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="my_cluster"/>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="1" uname="r81-1"/>
      <node id="2" uname="r81-2"/>
    </nodes>
    <resources/>
    <constraints/>
  </configuration>
  <status>
    <node_state id="1" uname="r81-1" in_ccm="true" crmd="online" crm-debug-origin="do_state_transition" join="member" expected="member">
      <lrm id="1">
        <lrm_resources/>
      </lrm>
    </node_state>
    <node_state id="2" uname="r81-2" in_ccm="true" crmd="online" crm-debug-origin="do_state_transition" join="member" expected="member">
      <lrm id="2">
        <lrm_resources/>
      </lrm>
    </node_state>
  </status>
</cib>

# cibadmin --empty > xxx.xml
# pcs -f xxx.xml resource create dummy ocf:pacemaker:Dummy

# pcs cluster cib-push xxx.xml --config --debug
Running: /usr/sbin/cibadmin --replace --xml-file xxx.xml --scope=configuration
Return Value: 0
--Debug Output Start--
--Debug Output End--

CIB updated

# crm_mon -1
Cluster Summary:
  * Stack: unknown
  * Current DC: NONE
  * Last updated: Tue Apr 28 12:14:57 2020
  * Last change:  Tue Apr 28 12:14:47 2020 by root via cibadmin on r81-1
  * 0 nodes configured
  * 1 resource instance configured


Active Resources:
  * No active resources

# pcs cluster cib
<cib crm_feature_set="3.3.0" validate-with="pacemaker-3.3" epoch="6" num_updates="0" admin_epoch="0" cib-last-written="Tue Apr 28 12:14:47 2020" update-origin="r81-1" update-client="cibadmin" update-user="root" have-quorum="1" dc-uuid="1">
  <configuration>
    <crm_config/>
    <nodes/>
    <resources>
      <primitive class="ocf" id="dummy" provider="pacemaker" type="Dummy">
        <operations>
          <op id="dummy-migrate_from-interval-0s" interval="0s" name="migrate_from" timeout="20s"/>
          <op id="dummy-migrate_to-interval-0s" interval="0s" name="migrate_to" timeout="20s"/>
          <op id="dummy-monitor-interval-10s" interval="10s" name="monitor" timeout="20s"/>
          <op id="dummy-reload-interval-0s" interval="0s" name="reload" timeout="20s"/>
          <op id="dummy-start-interval-0s" interval="0s" name="start" timeout="20s"/>
          <op id="dummy-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
        </operations>
      </primitive>
    </resources>
    <constraints/>
  </configuration>
  <status>
    <node_state id="1" uname="r81-1" in_ccm="true" crmd="online" crm-debug-origin="do_state_transition" join="member" expected="member">
      <lrm id="1">
        <lrm_resources/>
      </lrm>
    </node_state>
    <node_state id="2" uname="r81-2" in_ccm="true" crmd="online" crm-debug-origin="do_state_transition" join="member" expected="member">
      <lrm id="2">
        <lrm_resources/>
      </lrm>
    </node_state>
  </status>
</cib>

# cat /var/log/pacemaker/pacemaker.log
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_process_request@based_callbacks.c:991)   info: Forwarding cib_replace operation for section configuration to all (origin=local/cibadmin/2)
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_process_replace@cib_ops.c:172)   trace: Processing "cib_replace" event for section=configuration
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:863)    info: Diff: --- 0.5.4 2
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:865)    info: Diff: +++ 0.6.0 f6b4b5bb81b02cc0fc27077b008ec0ae
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:945)    info: -- /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:945)    info: -- /cib/configuration/nodes/node[@id='1']
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:945)    info: -- /cib/configuration/nodes/node[@id='2']
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:931)    info: +  /cib:  @epoch=6, @num_updates=0
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:887)    info: ++ /cib/configuration/resources:  <primitive class="ocf" id="dummy" provider="pacemaker" type="Dummy"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                  <operations>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-migrate_from-interval-0s" interval="0s" name="migrate_from" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-migrate_to-interval-0s" interval="0s" name="migrate_to" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-monitor-interval-10s" interval="10s" name="monitor" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-reload-interval-0s" interval="0s" name="reload" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-start-interval-0s" interval="0s" name="start" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                    <op id="dummy-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                  </operations>
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_perform_op@xml.c:894)    info: ++                                </primitive>

Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_process_request@based_callbacks.c:1053)  info: Completed cib_replace operation for section configuration: OK (rc=0, origin=r81-1/cibadmin/2, version=0.6.0)
### cib_replace_notify() is not executed in pacemaker-based when the section is "configuration".
### That's why do_cib_replaced()/populate_cib_nodes() of pacemaker-controld is not executed and the <nodes> section is not populated.

Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (te_update_diff@controld_te_callbacks.c:583)  debug: Processing (cib_replace) diff: 0.5.4 -> 0.6.0 (S_IDLE)
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (abort_transition_graph@controld_te_utils.c:237)   info: Transition 0 aborted by deletion of cluster_property_set[@id='cib-bootstrap-options']: Configuration change | cib=0.6.0 source=te_update_diff_v2:490 path=/cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options'] complete=true
Apr 28 12:14:47 r81-1 pacemaker-fenced    [14761] (update_cib_stonith_devices_v2@pacemaker-fenced.c:765)     info: Updating device list from the cib: create resources
Apr 28 12:14:47 r81-1 pacemaker-fenced    [14761] (cib_devices_update@pacemaker-fenced.c:694)   info: Updating devices to version 0.6.0
Apr 28 12:14:47 r81-1 pacemaker-fenced    [14761] (unpack_nodes@unpack.c:559)   info: Creating a fake local node
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (s_crmd_fsa@controld_fsa.c:204)       debug: Processing I_PE_CALC: [ state=S_IDLE cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (do_state_transition@controld_fsa.c:540)      notice: State transition S_IDLE -> S_POLICY_ENGINE | input=I_PE_CALC cause=C_FSA_INTERNAL origin=abort_transition_graph
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (check_join_counts@controld_fsa.c:483)        debug: All 2 active cluster nodes are fully joined
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (do_pe_invoke@controld_schedulerd.c:342)      debug: Query 35: Requesting the current CIB: S_POLICY_ENGINE
Apr 28 12:14:47 r81-1 pacemaker-controld  [14765] (do_pe_invoke_callback@controld_schedulerd.c:430)  debug: Re-asking for the CIB: 1 other peer updates still pending
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_file_backup@cib_file.c:286)      info: Archived previous version as /var/lib/pacemaker/cib/cib-4.raw
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_file_write_with_digest@cib_file.c:416)   info: Wrote version 0.6.0 of the CIB to disk (digest: 512743f2ced7c8e93bafbd2b04d29aad)
Apr 28 12:14:47 r81-1 pacemaker-based     [14760] (cib_file_write_with_digest@cib_file.c:445)   info: Reading cluster configuration file /var/lib/pacemaker/cib/cib.KcrZ87 (digest: /var/lib/pacemaker/cib/cib.5mMZdM)
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_pe_invoke_callback@controld_schedulerd.c:432)  debug: do_pe_invoke_callback added action A_PE_INVOKE to the FSA
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (config_query_callback@controld_control.c:696)     debug: Call 36 : Parsing CIB options
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (config_query_callback@controld_control.c:727)     debug: Shutdown escalation occurs if DC has not responded to request in 1200000ms
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (config_query_callback@controld_control.c:734)     debug: Re-run scheduler after 900000ms of inactivity
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_pe_invoke@controld_schedulerd.c:342)      debug: Query 37: Requesting the current CIB: S_POLICY_ENGINE
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_pe_invoke_callback@controld_schedulerd.c:465)  debug: Invoking the scheduler: query=37, ref=pe_calc-dc-1588076088-11, seq=1080, quorate=1
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:217)  debug: STONITH timeout: 60000
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:221)  debug: STONITH of failed nodes is enabled
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:234)  debug: Concurrent fencing is disabled
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:245)  debug: Stop all active resources: false
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:249)  debug: Cluster is symmetric - resources can run anywhere by default
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:287)  debug: On loss of quorum: Stop ALL resources
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_config@unpack.c:334)  debug: Node scores: 'red' = -INFINITY, 'yellow' = 0, 'green' = 0
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_resources@unpack.c:790)       error: Resource start-up disabled since no STONITH resources have been defined
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_resources@unpack.c:791)       error: Either configure some or disable STONITH with the stonith-enabled option
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_resources@unpack.c:792)       error: NOTE: Clusters with shared data need STONITH to ensure data integrity
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_status@unpack.c:1126)         warning: Ignoring recorded node status for 'r81-1' because no longer in configuration
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_status@unpack.c:1126)         warning: Ignoring recorded node status for 'r81-2' because no longer in configuration
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_node_loop@unpack.c:1017)      info: Node 1 is unknown
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_node_loop@unpack.c:1017)      info: Node 2 is unknown
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_node_loop@unpack.c:1017)      info: Node 1 is unknown
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (unpack_node_loop@unpack.c:1017)      info: Node 2 is unknown
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (log_list_item@output_log.c:182)      info: dummy  (ocf::pacemaker:Dummy):  Stopped
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (pcmk__native_allocate@pcmk_sched_native.c:597)    info: Unmanaged resource dummy allocated to no node: inactive
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (native_assign_node@pcmk_sched_utils.c:305)   debug: Could not allocate a node for dummy
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (RecurringOp@pcmk_sched_native.c:834)         debug: n/a      dummy_monitor_10000 (cancelled : start un-runnable)
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (stage6@pcmk_sched_allocate.c:1626)   notice: Delaying fencing operations until there are resources to manage
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (LogActions@pcmk_sched_native.c:2506)         info: Leave   dummy  (Stopped unmanaged)
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (pcmk__log_transition_summary@pcmk_sched_allocate.c:2894)  notice: Calculated transition 1, saving inputs in /var/lib/pacemaker/pengine/pe-input-487.bz2
Apr 28 12:14:48 r81-1 pacemaker-schedulerd[14764] (pcmk__log_transition_summary@pcmk_sched_allocate.c:2898)  notice: Configuration errors found during scheduler processing,  please run "crm_verify -L" to identify issues
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (s_crmd_fsa@controld_fsa.c:204)       debug: Processing I_PE_SUCCESS: [ state=S_POLICY_ENGINE cause=C_IPC_MESSAGE origin=handle_response ]
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_state_transition@controld_fsa.c:540)      info: State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE | input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (unpack_graph@pcmk_trans_unpack.c:229)        debug: Unpacked transition 1: 0 actions in 0 synapses
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_te_invoke@controld_transition.c:190)      info: Processing graph 1 (ref=pe_calc-dc-1588076088-11) derived from /var/lib/pacemaker/pengine/pe-input-487.bz2
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (print_graph@pcmk_trans_utils.c:242)  debug: Empty transition graph
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (run_graph@pcmk_trans_graph.c:332)    notice: Transition 1 (Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0, Source=/var/lib/pacemaker/pengine/pe-input-487.bz2): Complete
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (te_graph_trigger@controld_te_utils.c:85)     debug: Transition 1 is now complete
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (notify_crmd@controld_te_actions.c:584)       debug: Processing transition completion in state S_TRANSITION_ENGINE
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (notify_crmd@controld_te_actions.c:631)       debug: Transition 1 status: done - <null>
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (s_crmd_fsa@controld_fsa.c:204)       debug: Processing I_TE_SUCCESS: [ state=S_TRANSITION_ENGINE cause=C_FSA_INTERNAL origin=notify_crmd ]
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_log@controld_fsa.c:134)   info: Input I_TE_SUCCESS received in state S_TRANSITION_ENGINE from notify_crmd
Apr 28 12:14:48 r81-1 pacemaker-controld  [14765] (do_state_transition@controld_fsa.c:540)      notice: State transition S_TRANSITION_ENGINE -> S_IDLE | input=I_TE_SUCCESS cause=C_FSA_INTERNAL origin=notify_crmd

@kgaillot
Copy link

Hi Kazunori,

Thanks for the investigation and fix!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants