Skip to content

Commit

Permalink
Handle IPMI transient failures better.
Browse files Browse the repository at this point in the history
Sometimes, IPMI "power status" doesn't return either "on" or "off".
When this happens, the IPMI driver shouldn't assume what the state is,
and the baremetal driver shouldn't assume that NOSTATE means SHUTDOWN.

This patch also improves the doc strings for the IPMI driver.

Fix bug 1178378.

Change-Id: I54a5e6309f68ab8e5601e65039366d7d87c03478
  • Loading branch information
AevaOnline committed May 14, 2013
1 parent 75af47a commit 6337d70
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 19 deletions.
34 changes: 34 additions & 0 deletions nova/tests/baremetal/test_driver.py
Expand Up @@ -22,6 +22,7 @@

from oslo.config import cfg

from nova.compute import power_state
from nova import exception
from nova import test
from nova.tests.baremetal.db import base as bm_db_base
Expand Down Expand Up @@ -327,3 +328,36 @@ def test_list_instances(self):

self.driver.destroy(**node2['destroy_params'])
self.assertEqual([], self.driver.list_instances())

def test_get_info_no_such_node(self):
node = self._create_node()
self.assertRaises(exception.InstanceNotFound,
self.driver.get_info,
node['instance'])

def test_get_info_ok(self):
node = self._create_node()
db.bm_node_associate_and_update(self.context, node['node']['uuid'],
{'instance_uuid': node['instance']['uuid'],
'instance_name': node['instance']['hostname'],
'task_state': baremetal_states.ACTIVE})
res = self.driver.get_info(node['instance'])
self.assertEqual(res['state'], power_state.RUNNING)

def test_get_info_with_defunct_pm(self):
# test fix for bug 1178378
node = self._create_node()
db.bm_node_associate_and_update(self.context, node['node']['uuid'],
{'instance_uuid': node['instance']['uuid'],
'instance_name': node['instance']['hostname'],
'task_state': baremetal_states.ACTIVE})

# fake the power manager and don't get a power state
self.mox.StubOutWithMock(fake.FakePowerManager, 'is_power_on')
fake.FakePowerManager.is_power_on().AndReturn(None)
self.mox.ReplayAll()

res = self.driver.get_info(node['instance'])
# prior to the fix, returned power_state was SHUTDOWN
self.assertEqual(res['state'], power_state.NOSTATE)
self.mox.VerifyAll()
15 changes: 13 additions & 2 deletions nova/tests/baremetal/test_ipmi.py
Expand Up @@ -85,13 +85,24 @@ def test_exec_ipmitool(self):
self.ipmi._exec_ipmitool('A B C')
self.mox.VerifyAll()

def test_is_power(self):
def test_is_power_on_ok(self):
self.mox.StubOutWithMock(self.ipmi, '_exec_ipmitool')
self.ipmi._exec_ipmitool("power status").AndReturn(
["Chassis Power is on\n"])
self.mox.ReplayAll()

self.ipmi._is_power("on")
res = self.ipmi.is_power_on()
self.assertEqual(res, True)
self.mox.VerifyAll()

def test_is_power_no_answer(self):
self.mox.StubOutWithMock(self.ipmi, '_exec_ipmitool')
self.ipmi._exec_ipmitool("power status").AndReturn(
["Fake reply\n"])
self.mox.ReplayAll()

res = self.ipmi.is_power_on()
self.assertEqual(res, None)
self.mox.VerifyAll()

def test_power_already_on(self):
Expand Down
18 changes: 12 additions & 6 deletions nova/virt/baremetal/driver.py
Expand Up @@ -354,15 +354,21 @@ def detach_volume(self, connection_info, instance_name, mountpoint):
instance_name, mountpoint)

def get_info(self, instance):
# NOTE(deva): compute/manager.py expects to get NotFound exception
# so we convert from InstanceNotFound
inst_uuid = instance.get('uuid')
node = _get_baremetal_node_by_instance_uuid(inst_uuid)
pm = get_power_manager(node=node, instance=instance)
ps = power_state.SHUTDOWN
if pm.is_power_on():
ps = power_state.RUNNING
return {'state': ps,

# NOTE(deva): Power manager may not be able to determine power state
# in which case it may return "None" here.
ps = pm.is_power_on()
if ps:
pstate = power_state.RUNNING
elif ps is False:
pstate = power_state.SHUTDOWN
else:
pstate = power_state.NOSTATE

return {'state': pstate,
'max_mem': node['memory_mb'],
'mem': node['memory_mb'],
'num_cpu': node['cpus'],
Expand Down
50 changes: 39 additions & 11 deletions nova/virt/baremetal/ipmi.py
Expand Up @@ -138,17 +138,13 @@ def _exec_ipmitool(self, command):
finally:
bm_utils.unlink_without_raise(pwfile)

def _is_power(self, state):
out_err = self._exec_ipmitool("power status")
return out_err[0] == ("Chassis Power is %s\n" % state)

def _power_on(self):
"""Turn the power to this node ON."""

def _wait_for_power_on():
"""Called at an interval until the node's power is on."""

if self._is_power("on"):
if self.is_power_on():
self.state = baremetal_states.ACTIVE
raise loopingcall.LoopingCallDone()
if self.retries > CONF.baremetal.ipmi_power_retry:
Expand All @@ -170,7 +166,7 @@ def _power_off(self):
def _wait_for_power_off():
"""Called at an interval until the node's power is off."""

if self._is_power("off"):
if self.is_power_on() is False:
self.state = baremetal_states.DELETED
raise loopingcall.LoopingCallDone()
if self.retries > CONF.baremetal.ipmi_power_retry:
Expand All @@ -193,28 +189,60 @@ def _set_pxe_for_next_boot(self):
LOG.exception(_("IPMI set next bootdev failed"))

def activate_node(self):
"""Turns the power to node ON."""
if self._is_power("on") and self.state == baremetal_states.ACTIVE:
"""Turns the power to node ON.
Sets node next-boot to PXE and turns the power on,
waiting up to ipmi_power_retry/2 seconds for confirmation
that the power is on.
:returns: One of baremetal_states.py, representing the new state.
"""
if self.is_power_on() and self.state == baremetal_states.ACTIVE:
LOG.warning(_("Activate node called, but node %s "
"is already active") % self.address)
self._set_pxe_for_next_boot()
self._power_on()
return self.state

def reboot_node(self):
"""Cycles the power to a node."""
"""Cycles the power to a node.
Turns the power off, sets next-boot to PXE, and turns the power on.
Each action waits up to ipmi_power_retry/2 seconds for confirmation
that the power state has changed.
:returns: One of baremetal_states.py, representing the new state.
"""
self._power_off()
self._set_pxe_for_next_boot()
self._power_on()
return self.state

def deactivate_node(self):
"""Turns the power to node OFF, regardless of current state."""
"""Turns the power to node OFF.
Turns the power off, and waits up to ipmi_power_retry/2 seconds
for confirmation that the power is off.
:returns: One of baremetal_states.py, representing the new state.
"""
self._power_off()
return self.state

def is_power_on(self):
return self._is_power("on")
"""Check if the power is currently on.
:returns: True if on; False if off; None if unable to determine.
"""
# NOTE(deva): string matching based on
# http://ipmitool.cvs.sourceforge.net/
# viewvc/ipmitool/ipmitool/lib/ipmi_chassis.c
res = self._exec_ipmitool("power status")[0]
if res == ("Chassis Power is on\n"):
return True
elif res == ("Chassis Power is off\n"):
return False
return None

def start_console(self):
if not self.port:
Expand Down

0 comments on commit 6337d70

Please sign in to comment.