Skip to content

Commit

Permalink
Restore libvirt block storage connections on reboot.
Browse files Browse the repository at this point in the history
Fixes bug 1036902.

There are a number of cases where block storage connections are not
properly restored, impacting libvirt in particular. The most common
case is a VM which has block storage attached via iSCSI, whereby the
physical system is rebooted. When the system comes back up and
starts nova-compute, the iSCSI connections are NOT recreated for the
instances slated to be resumed (assuming
resume_guests_state_on_host_boot is set).

The patch changes the compute manager to pass block_storage_info via
driver.reboot() and driver.resume_state_on_host_boot(). The fix is
actually only present in the libvirt driver. However, all the other
drivers were updated to accept the additional, optional function
arg.

With the changes in place, iSCSI connections for libvirt are
re-established after a hypervisor reboot with
resume_guests_state_on_host_boot=True and on every hard_reboot.
The latter is intended so that users have a last ditch option for
recovering their VMs without administrative involvement.

Change-Id: Idf5d53f21991a359bec6ce26ae9fe3bd61800ce3
  • Loading branch information
rmk40 committed Aug 14, 2012
1 parent a10be15 commit 9fffd28
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 19 deletions.
19 changes: 15 additions & 4 deletions nova/compute/manager.py
Expand Up @@ -329,10 +329,17 @@ def init_host(self):
LOG.info(
_('Rebooting instance after nova-compute restart.'),
locals(), instance=instance)

block_device_info = \
self._get_instance_volume_block_device_info(
context, instance['uuid'])

try:
self.driver.resume_state_on_host_boot(context,
instance,
self._legacy_nw_info(net_info))
self.driver.resume_state_on_host_boot(
context,
instance,
self._legacy_nw_info(net_info),
block_device_info)
except NotImplementedError:
LOG.warning(_('Hypervisor driver does not support '
'resume guests'), instance=instance)
Expand Down Expand Up @@ -1118,9 +1125,13 @@ def reboot_instance(self, context, instance=None, instance_uuid=None,
context=context, instance=instance)

network_info = self._get_instance_nw_info(context, instance)

block_device_info = self._get_instance_volume_block_device_info(
context, instance['uuid'])

try:
self.driver.reboot(instance, self._legacy_nw_info(network_info),
reboot_type)
reboot_type, block_device_info)
except Exception, exc:
LOG.error(_('Cannot reboot instance: %(exc)s'), locals(),
context=context, instance=instance)
Expand Down
6 changes: 4 additions & 2 deletions nova/virt/driver.py
Expand Up @@ -192,7 +192,8 @@ def destroy(self, instance, network_info, block_device_info=None):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot the specified instance.
:param instance: Instance object as returned by DB layer.
Expand Down Expand Up @@ -301,7 +302,8 @@ def resume(self, instance):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()

def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
"""resume guest state when a host is booted"""
raise NotImplementedError()

Expand Down
6 changes: 4 additions & 2 deletions nova/virt/fake.py
Expand Up @@ -92,7 +92,8 @@ def snapshot(self, context, instance, name):
if not instance['name'] in self.instances:
raise exception.InstanceNotRunning()

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
pass

@staticmethod
Expand All @@ -105,7 +106,8 @@ def set_admin_password(self, instance, new_pass):
def inject_file(self, instance, b64_path, b64_contents):
pass

def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
pass

def rescue(self, context, instance, network_info, image_meta,
Expand Down
3 changes: 2 additions & 1 deletion nova/virt/hyperv/driver.py
Expand Up @@ -95,7 +95,8 @@ def spawn(self, context, instance, image_meta, network_info,
self._vmops.spawn(context, instance, image_meta, network_info,
block_device_info)

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
self._vmops.reboot(instance, network_info, reboot_type)

def destroy(self, instance, network_info=None, cleanup=True):
Expand Down
40 changes: 33 additions & 7 deletions nova/virt/libvirt/driver.py
Expand Up @@ -810,7 +810,8 @@ def snapshot(self, context, instance, image_href):
image_file)

@exception.wrap_exception()
def reboot(self, instance, network_info, reboot_type='SOFT'):
def reboot(self, instance, network_info, reboot_type='SOFT',
block_device_info=None):
"""Reboot a virtual machine, given an instance reference."""
if reboot_type == 'SOFT':
# NOTE(vish): This will attempt to do a graceful shutdown/restart.
Expand All @@ -821,7 +822,7 @@ def reboot(self, instance, network_info, reboot_type='SOFT'):
else:
LOG.warn(_("Failed to soft reboot instance."),
instance=instance)
return self._hard_reboot(instance)
return self._hard_reboot(instance, block_device_info=block_device_info)

def _soft_reboot(self, instance):
"""Attempt to shutdown and restart the instance gracefully.
Expand Down Expand Up @@ -858,7 +859,7 @@ def _soft_reboot(self, instance):
greenthread.sleep(1)
return False

def _hard_reboot(self, instance, xml=None):
def _hard_reboot(self, instance, xml=None, block_device_info=None):
"""Reboot a virtual machine, given an instance reference.
Performs a Libvirt reset (if supported) on the domain.
Expand All @@ -871,6 +872,16 @@ def _hard_reboot(self, instance, xml=None):
existing domain.
"""

block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)

for vol in block_device_mapping:
connection_info = vol['connection_info']
mount_device = vol['mount_device'].rpartition("/")[2]
self.volume_driver_method('connect_volume',
connection_info,
mount_device)

virt_dom = self._lookup_by_name(instance['name'])
# NOTE(itoumsn): Use XML delived from the running instance.
if not xml:
Expand Down Expand Up @@ -934,11 +945,13 @@ def resume(self, instance):
self._create_domain(domain=dom)

@exception.wrap_exception()
def resume_state_on_host_boot(self, context, instance, network_info):
def resume_state_on_host_boot(self, context, instance, network_info,
block_device_info=None):
"""resume guest state when a host is booted"""
virt_dom = self._lookup_by_name(instance['name'])
xml = virt_dom.XMLDesc(0)
self._create_domain_and_network(xml, instance, network_info)
self._create_domain_and_network(xml, instance, network_info,
block_device_info)

@exception.wrap_exception()
def rescue(self, context, instance, network_info, image_meta,
Expand Down Expand Up @@ -1017,7 +1030,8 @@ def spawn(self, context, instance, image_meta, injected_files,
block_device_info=block_device_info,
files=injected_files,
admin_pass=admin_password)
self._create_domain_and_network(xml, instance, network_info)
self._create_domain_and_network(xml, instance, network_info,
block_device_info)
LOG.debug(_("Instance is running"), instance=instance)

def _wait_for_boot():
Expand Down Expand Up @@ -1830,8 +1844,20 @@ def _create_domain(self, xml=None, domain=None, launch_flags=0):
domain.createWithFlags(launch_flags)
return domain

def _create_domain_and_network(self, xml, instance, network_info):
def _create_domain_and_network(self, xml, instance, network_info,
block_device_info=None):

"""Do required network setup and create domain."""
block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)

for vol in block_device_mapping:
connection_info = vol['connection_info']
mount_device = vol['mount_device'].rpartition("/")[2]
self.volume_driver_method('connect_volume',
connection_info,
mount_device)

self.plug_vifs(instance, network_info)
self.firewall_driver.setup_basic_filtering(instance, network_info)
self.firewall_driver.prepare_instance_filter(instance, network_info)
Expand Down
3 changes: 2 additions & 1 deletion nova/virt/powervm/driver.py
Expand Up @@ -138,7 +138,8 @@ def destroy(self, instance, network_info, block_device_info=None):
"""Destroy (shutdown and delete) the specified instance."""
self._powervm.destroy(instance['name'])

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot the specified instance.
:param instance: Instance object as returned by DB layer.
Expand Down
3 changes: 2 additions & 1 deletion nova/virt/vmwareapi/driver.py
Expand Up @@ -135,7 +135,8 @@ def snapshot(self, context, instance, name):
"""Create snapshot from a running VM instance."""
self._vmops.snapshot(context, instance, name)

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot VM instance."""
self._vmops.reboot(instance, network_info)

Expand Down
3 changes: 2 additions & 1 deletion nova/virt/xenapi/driver.py
Expand Up @@ -197,7 +197,8 @@ def snapshot(self, context, instance, image_id):
""" Create snapshot from a running VM instance """
self._vmops.snapshot(context, instance, image_id)

def reboot(self, instance, network_info, reboot_type):
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
"""Reboot VM instance"""
self._vmops.reboot(instance, reboot_type)

Expand Down

0 comments on commit 9fffd28

Please sign in to comment.