Skip to content

Commit

Permalink
Restore libvirt block storage connections on reboot.
Browse files Browse the repository at this point in the history
Fixes bug 1036902 -- Backported version for stable/essex.

There are a number of cases where block storage connections are not
properly restored, impacting libvirt in particular. The most common
case is a VM which has block storage attached via iSCSI, whereby the
physical system is rebooted. When the system comes back up and
starts nova-compute, the iSCSI connections are NOT recreated for the
instances slated to be resumed (assuming
resume_guests_state_on_host_boot is set).

The patch changes the compute manager to pass block_storage_info via
driver.reboot() and driver.resume_state_on_host_boot(). The fix is
actually only present in the libvirt driver. However, all the other
drivers were updated to accept the additional, optional function
arg.

With the changes in place, iSCSI connections for libvirt are
re-established after a hypervisor reboot with
resume_guests_state_on_host_boot=True and on every hard_reboot.
The latter is intended so that users have a last ditch option for
recovering their VMs without administrative involvement.

Change-Id: I8ab3a138b559ee0aa1535a928282e9c372ec5651
  • Loading branch information
rmk40 committed Aug 15, 2012
1 parent 63dd6be commit 09217ab
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 13 deletions.
1 change: 1 addition & 0 deletions Authors
Expand Up @@ -162,6 +162,7 @@ Paul Voccio <paul@openstack.org>
Peng Yong <ppyy@pubyun.com>
Phil Day <philip.day@hp.com>
Philip Knouff <philip.knouff@mailtrust.com>
Rafi Khardalian <rafi@metacloud.com>
Ralf Haferkamp <rhafer@suse.de>
Renuka Apte <renuka.apte@citrix.com>
Ricardo Carrillo Cruz <emaildericky@gmail.com>
Expand Down
14 changes: 12 additions & 2 deletions nova/compute/manager.py
Expand Up @@ -258,9 +258,15 @@ def init_host(self):
FLAGS.start_guests_on_host_boot):
LOG.info(_('Rebooting instance after nova-compute restart.'),
locals(), instance=instance)

block_device_info = \
self._get_instance_volume_block_device_info(context,
instance['id'])

try:
self.driver.resume_state_on_host_boot(context, instance,
self._legacy_nw_info(net_info))
self._legacy_nw_info(net_info),
block_device_info)
except NotImplementedError:
LOG.warning(_('Hypervisor driver does not support '
'resume guests'), instance=instance)
Expand Down Expand Up @@ -913,8 +919,12 @@ def reboot_instance(self, context, instance_uuid, reboot_type="SOFT"):
context=context)

network_info = self._get_instance_nw_info(context, instance)

block_device_info = self._get_instance_volume_block_device_info(
context, instance['id'])

self.driver.reboot(instance, self._legacy_nw_info(network_info),
reboot_type)
reboot_type, block_device_info)

current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
Expand Down
6 changes: 4 additions & 2 deletions nova/virt/driver.py
Expand Up @@ -209,7 +209,8 @@ def destroy(self, instance, network_info, block_device_info=None):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot the specified instance.
:param instance: Instance object as returned by DB layer.
Expand Down Expand Up @@ -335,7 +336,8 @@ def resume(self, instance):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()

def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
"""resume guest state when a host is booted"""
raise NotImplementedError()

Expand Down
6 changes: 4 additions & 2 deletions nova/virt/fake.py
Expand Up @@ -112,7 +112,8 @@ def snapshot(self, context, instance, name):
if not instance['name'] in self.instances:
raise exception.InstanceNotRunning()

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
pass

@staticmethod
Expand All @@ -131,7 +132,8 @@ def inject_file(self, instance, b64_path, b64_contents):
def agent_update(self, instance, url, md5hash):
pass

def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
pass

def rescue(self, context, instance, network_info, image_meta):
Expand Down
26 changes: 21 additions & 5 deletions nova/virt/libvirt/connection.py
Expand Up @@ -714,7 +714,8 @@ def snapshot(self, context, instance, image_href):
image_file)

@exception.wrap_exception()
def reboot(self, instance, network_info, reboot_type='SOFT'):
def reboot(self, instance, network_info, reboot_type='SOFT',
block_device_info=None):
"""Reboot a virtual machine, given an instance reference."""
if reboot_type == 'SOFT':
# NOTE(vish): This will attempt to do a graceful shutdown/restart.
Expand All @@ -725,7 +726,8 @@ def reboot(self, instance, network_info, reboot_type='SOFT'):
else:
LOG.info(_("Failed to soft reboot instance."),
instance=instance)
return self._hard_reboot(instance, network_info)
return self._hard_reboot(instance, network_info,
block_device_info=block_device_info)

def _soft_reboot(self, instance):
"""Attempt to shutdown and restart the instance gracefully.
Expand Down Expand Up @@ -760,7 +762,8 @@ def _soft_reboot(self, instance):
greenthread.sleep(1)
return False

def _hard_reboot(self, instance, network_info, xml=None):
def _hard_reboot(self, instance, network_info, xml=None,
block_device_info=None):
"""Reboot a virtual machine, given an instance reference.
This method actually destroys and re-creates the domain to ensure the
Expand All @@ -769,6 +772,17 @@ def _hard_reboot(self, instance, network_info, xml=None):
If xml is set, it uses the passed in xml in place of the xml from the
existing domain.
"""

block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)

for vol in block_device_mapping:
connection_info = vol['connection_info']
mount_device = vol['mount_device'].rpartition("/")[2]
self.volume_driver_method('connect_volume',
connection_info,
mount_device)

virt_dom = self._conn.lookupByName(instance['name'])
# NOTE(itoumsn): Use XML delived from the running instance
# instead of using to_xml(instance, network_info). This is almost
Expand Down Expand Up @@ -825,11 +839,13 @@ def resume(self, instance):
dom.create()

@exception.wrap_exception()
def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
"""resume guest state when a host is booted"""
# NOTE(dprince): use hard reboot to ensure network and firewall
# rules are configured
self._hard_reboot(instance, network_info)
self._hard_reboot(instance, network_info,
block_device_info=block_device_info)

@exception.wrap_exception()
def rescue(self, context, instance, network_info, image_meta):
Expand Down
3 changes: 2 additions & 1 deletion nova/virt/vmwareapi_conn.py
Expand Up @@ -138,7 +138,8 @@ def snapshot(self, context, instance, name):
"""Create snapshot from a running VM instance."""
self._vmops.snapshot(context, instance, name)

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot VM instance."""
self._vmops.reboot(instance, network_info)

Expand Down
3 changes: 2 additions & 1 deletion nova/virt/xenapi_conn.py
Expand Up @@ -203,7 +203,8 @@ def snapshot(self, context, instance, image_id):
""" Create snapshot from a running VM instance """
self._vmops.snapshot(context, instance, image_id)

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot VM instance"""
self._vmops.reboot(instance, reboot_type)

Expand Down

0 comments on commit 09217ab

Please sign in to comment.