From 6337d704096b48970f7080c1eafc52e053de0e56 Mon Sep 17 00:00:00 2001 From: Devananda van der Veen Date: Mon, 13 May 2013 19:13:36 -0700 Subject: [PATCH] Handle IPMI transient failures better. Sometimes, IPMI "power status" doesn't return either "on" or "off". When this happens, the IPMI driver shouldn't assume what the state is, and the baremetal driver shouldn't assume that NOSTATE means SHUTDOWN. This patch also improves the doc strings for the IPMI driver. Fix bug 1178378. Change-Id: I54a5e6309f68ab8e5601e65039366d7d87c03478 --- nova/tests/baremetal/test_driver.py | 34 ++++++++++++++++++++ nova/tests/baremetal/test_ipmi.py | 15 +++++++-- nova/virt/baremetal/driver.py | 18 +++++++---- nova/virt/baremetal/ipmi.py | 50 ++++++++++++++++++++++------- 4 files changed, 98 insertions(+), 19 deletions(-) diff --git a/nova/tests/baremetal/test_driver.py b/nova/tests/baremetal/test_driver.py index f017253704c..3b0295a92f2 100644 --- a/nova/tests/baremetal/test_driver.py +++ b/nova/tests/baremetal/test_driver.py @@ -22,6 +22,7 @@ from oslo.config import cfg +from nova.compute import power_state from nova import exception from nova import test from nova.tests.baremetal.db import base as bm_db_base @@ -327,3 +328,36 @@ def test_list_instances(self): self.driver.destroy(**node2['destroy_params']) self.assertEqual([], self.driver.list_instances()) + + def test_get_info_no_such_node(self): + node = self._create_node() + self.assertRaises(exception.InstanceNotFound, + self.driver.get_info, + node['instance']) + + def test_get_info_ok(self): + node = self._create_node() + db.bm_node_associate_and_update(self.context, node['node']['uuid'], + {'instance_uuid': node['instance']['uuid'], + 'instance_name': node['instance']['hostname'], + 'task_state': baremetal_states.ACTIVE}) + res = self.driver.get_info(node['instance']) + self.assertEqual(res['state'], power_state.RUNNING) + + def test_get_info_with_defunct_pm(self): + # test fix for bug 1178378 + node = self._create_node() + db.bm_node_associate_and_update(self.context, node['node']['uuid'], + {'instance_uuid': node['instance']['uuid'], + 'instance_name': node['instance']['hostname'], + 'task_state': baremetal_states.ACTIVE}) + + # fake the power manager and don't get a power state + self.mox.StubOutWithMock(fake.FakePowerManager, 'is_power_on') + fake.FakePowerManager.is_power_on().AndReturn(None) + self.mox.ReplayAll() + + res = self.driver.get_info(node['instance']) + # prior to the fix, returned power_state was SHUTDOWN + self.assertEqual(res['state'], power_state.NOSTATE) + self.mox.VerifyAll() diff --git a/nova/tests/baremetal/test_ipmi.py b/nova/tests/baremetal/test_ipmi.py index faf800a4667..01bb58d8b6c 100644 --- a/nova/tests/baremetal/test_ipmi.py +++ b/nova/tests/baremetal/test_ipmi.py @@ -85,13 +85,24 @@ def test_exec_ipmitool(self): self.ipmi._exec_ipmitool('A B C') self.mox.VerifyAll() - def test_is_power(self): + def test_is_power_on_ok(self): self.mox.StubOutWithMock(self.ipmi, '_exec_ipmitool') self.ipmi._exec_ipmitool("power status").AndReturn( ["Chassis Power is on\n"]) self.mox.ReplayAll() - self.ipmi._is_power("on") + res = self.ipmi.is_power_on() + self.assertEqual(res, True) + self.mox.VerifyAll() + + def test_is_power_no_answer(self): + self.mox.StubOutWithMock(self.ipmi, '_exec_ipmitool') + self.ipmi._exec_ipmitool("power status").AndReturn( + ["Fake reply\n"]) + self.mox.ReplayAll() + + res = self.ipmi.is_power_on() + self.assertEqual(res, None) self.mox.VerifyAll() def test_power_already_on(self): diff --git a/nova/virt/baremetal/driver.py b/nova/virt/baremetal/driver.py index 736b511b934..376921360b6 100755 --- a/nova/virt/baremetal/driver.py +++ b/nova/virt/baremetal/driver.py @@ -354,15 +354,21 @@ def detach_volume(self, connection_info, instance_name, mountpoint): instance_name, mountpoint) def get_info(self, instance): - # NOTE(deva): compute/manager.py expects to get NotFound exception - # so we convert from InstanceNotFound inst_uuid = instance.get('uuid') node = _get_baremetal_node_by_instance_uuid(inst_uuid) pm = get_power_manager(node=node, instance=instance) - ps = power_state.SHUTDOWN - if pm.is_power_on(): - ps = power_state.RUNNING - return {'state': ps, + + # NOTE(deva): Power manager may not be able to determine power state + # in which case it may return "None" here. + ps = pm.is_power_on() + if ps: + pstate = power_state.RUNNING + elif ps is False: + pstate = power_state.SHUTDOWN + else: + pstate = power_state.NOSTATE + + return {'state': pstate, 'max_mem': node['memory_mb'], 'mem': node['memory_mb'], 'num_cpu': node['cpus'], diff --git a/nova/virt/baremetal/ipmi.py b/nova/virt/baremetal/ipmi.py index d4377e6face..5d4f2b0edf0 100644 --- a/nova/virt/baremetal/ipmi.py +++ b/nova/virt/baremetal/ipmi.py @@ -138,17 +138,13 @@ def _exec_ipmitool(self, command): finally: bm_utils.unlink_without_raise(pwfile) - def _is_power(self, state): - out_err = self._exec_ipmitool("power status") - return out_err[0] == ("Chassis Power is %s\n" % state) - def _power_on(self): """Turn the power to this node ON.""" def _wait_for_power_on(): """Called at an interval until the node's power is on.""" - if self._is_power("on"): + if self.is_power_on(): self.state = baremetal_states.ACTIVE raise loopingcall.LoopingCallDone() if self.retries > CONF.baremetal.ipmi_power_retry: @@ -170,7 +166,7 @@ def _power_off(self): def _wait_for_power_off(): """Called at an interval until the node's power is off.""" - if self._is_power("off"): + if self.is_power_on() is False: self.state = baremetal_states.DELETED raise loopingcall.LoopingCallDone() if self.retries > CONF.baremetal.ipmi_power_retry: @@ -193,8 +189,15 @@ def _set_pxe_for_next_boot(self): LOG.exception(_("IPMI set next bootdev failed")) def activate_node(self): - """Turns the power to node ON.""" - if self._is_power("on") and self.state == baremetal_states.ACTIVE: + """Turns the power to node ON. + + Sets node next-boot to PXE and turns the power on, + waiting up to ipmi_power_retry/2 seconds for confirmation + that the power is on. + + :returns: One of baremetal_states.py, representing the new state. + """ + if self.is_power_on() and self.state == baremetal_states.ACTIVE: LOG.warning(_("Activate node called, but node %s " "is already active") % self.address) self._set_pxe_for_next_boot() @@ -202,19 +205,44 @@ def activate_node(self): return self.state def reboot_node(self): - """Cycles the power to a node.""" + """Cycles the power to a node. + + Turns the power off, sets next-boot to PXE, and turns the power on. + Each action waits up to ipmi_power_retry/2 seconds for confirmation + that the power state has changed. + + :returns: One of baremetal_states.py, representing the new state. + """ self._power_off() self._set_pxe_for_next_boot() self._power_on() return self.state def deactivate_node(self): - """Turns the power to node OFF, regardless of current state.""" + """Turns the power to node OFF. + + Turns the power off, and waits up to ipmi_power_retry/2 seconds + for confirmation that the power is off. + + :returns: One of baremetal_states.py, representing the new state. + """ self._power_off() return self.state def is_power_on(self): - return self._is_power("on") + """Check if the power is currently on. + + :returns: True if on; False if off; None if unable to determine. + """ + # NOTE(deva): string matching based on + # http://ipmitool.cvs.sourceforge.net/ + # viewvc/ipmitool/ipmitool/lib/ipmi_chassis.c + res = self._exec_ipmitool("power status")[0] + if res == ("Chassis Power is on\n"): + return True + elif res == ("Chassis Power is off\n"): + return False + return None def start_console(self): if not self.port: