Skip to content

Commit

Permalink
Fix race condition in XenAPI when using <object>.get_all
Browse files Browse the repository at this point in the history
Fixes bug 887708

There are a handful of places where <object>.get_all is followed by a
<object>.get_record calls that are potentially racey. This patch fixes
all of these cases to use common code that is tolerant of HANDLE_INVALID
errors that would be indicative of a race between get_all and delete

Change-Id: Ib94adb6d21b6b55e7b26fc1da52ed46d9dba8275
  • Loading branch information
Johannes Erdfelt committed Dec 7, 2011
1 parent c3b7cce commit fb27cc6
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 88 deletions.
6 changes: 4 additions & 2 deletions nova/tests/test_xenapi.py
Expand Up @@ -1110,13 +1110,15 @@ class HostStateTestCase(test.TestCase):
"""Tests HostState, which holds metrics from XenServer that get
reported back to the Schedulers."""

def _fake_safe_find_sr(self, session):
@classmethod
def _fake_safe_find_sr(cls, session):
"""None SR ref since we're ignoring it in FakeSR."""
return None

def test_host_state(self):
self.stubs = stubout.StubOutForTesting()
self.stubs.Set(vm_utils, 'safe_find_sr', self._fake_safe_find_sr)
self.stubs.Set(vm_utils.VMHelper, 'safe_find_sr',
self._fake_safe_find_sr)
host_state = xenapi_conn.HostState(FakeSession())
stats = host_state._stats
self.assertEquals(stats['disk_total'], 10000)
Expand Down
25 changes: 25 additions & 0 deletions nova/virt/xenapi/__init__.py
Expand Up @@ -28,3 +28,28 @@ class HelperBase(object):

def __init__(self):
return

@classmethod
def get_rec(cls, session, record_type, ref):
try:
return session.call_xenapi('%s.get_record' % record_type, ref)
except cls.XenAPI.Failure, e:
if e.details[0] != 'HANDLE_INVALID':
raise

return None

@classmethod
def get_all_refs_and_recs(cls, session, record_type):
"""Retrieve all refs and recs for a Xen record type.
Handles race-conditions where the record may be deleted between
the `get_all` call and the `get_record` call.
"""

for ref in session.call_xenapi('%s.get_all' % record_type):
rec = cls.get_rec(session, record_type, ref)
# Check to make sure the record still exists. It may have
# been deleted between the get_all call and get_record call
if rec:
yield ref, rec
159 changes: 77 additions & 82 deletions nova/virt/xenapi/vm_utils.py
Expand Up @@ -353,7 +353,7 @@ def get_sr_path(cls, session):
This is used when we're dealing with VHDs directly, either by taking
snapshots or by restoring an image in the DISK_VHD format.
"""
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)
sr_rec = session.call_xenapi("SR.get_record", sr_ref)
sr_uuid = sr_rec["uuid"]
return os.path.join(FLAGS.xenapi_sr_base_path, sr_uuid)
Expand Down Expand Up @@ -390,7 +390,7 @@ def upload_image(cls, context, session, instance, vdi_uuids, image_id):
def resize_disk(cls, session, vdi_ref, instance_type):
# Copy VDI over to something we can resize
# NOTE(jerdfelt): Would be nice to just set vdi_ref to read/write
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)
copy_ref = session.call_xenapi('VDI.copy', vdi_ref, sr_ref)
copy_uuid = session.call_xenapi('VDI.get_uuid', copy_ref)

Expand Down Expand Up @@ -455,7 +455,7 @@ def generate_swap(cls, session, instance, vm_ref, userdevice, swap_mb):
4. Create VBD between instance VM and swap VDI
"""
# 1. Create VDI
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)
name_label = instance.name + "-swap"
ONE_MEG = 1024 * 1024
virtual_size = swap_mb * ONE_MEG
Expand Down Expand Up @@ -503,7 +503,7 @@ def fetch_blank_disk(cls, session, instance_type_id):
vdi_size = one_gig * req_size

LOG.debug("ISO vm create: Looking for the SR")
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)

vdi_ref = cls.create_vdi(session, sr_ref, 'blank HD', vdi_size, False)
return vdi_ref
Expand Down Expand Up @@ -533,7 +533,7 @@ def _fetch_image_glance_vhd(cls, context, session, instance, image,
instance_id = instance.id
LOG.debug(_("Asking xapi to fetch vhd image %(image)s")
% locals())
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)

# NOTE(sirp): The Glance plugin runs under Python 2.4
# which does not have the `uuid` module. To work around this,
Expand Down Expand Up @@ -634,10 +634,10 @@ def _fetch_image_glance_disk(cls, context, session, instance, image,
LOG.debug(_("Image Type: %s"), ImageType.to_string(image_type))

if image_type == ImageType.DISK_ISO:
sr_ref = safe_find_iso_sr(session)
sr_ref = cls.safe_find_iso_sr(session)
LOG.debug(_("ISO: Found sr possibly containing the ISO image"))
else:
sr_ref = safe_find_sr(session)
sr_ref = cls.safe_find_sr(session)

glance_client, image_id = glance.get_glance_client(context, image)
glance_client.set_auth_token(getattr(context, 'auth_token', None))
Expand Down Expand Up @@ -787,12 +787,8 @@ def set_vm_name_label(cls, session, vm_ref, name_label):

@classmethod
def list_vms(cls, session):
vm_refs = session.call_xenapi("VM.get_all")
for vm_ref in vm_refs:
vm_rec = session.call_xenapi("VM.get_record", vm_ref)
if vm_rec["is_a_template"]:
continue
elif vm_rec["is_control_domain"]:
for vm_ref, vm_rec in cls.get_all_refs_and_recs(session, 'VM'):
if vm_rec["is_a_template"] or vm_rec["is_control_domain"]:
continue
else:
yield vm_ref, vm_rec
Expand Down Expand Up @@ -925,9 +921,76 @@ def scan_sr(cls, session, instance_id=None, sr_ref=None):
@classmethod
def scan_default_sr(cls, session):
"""Looks for the system default SR and triggers a re-scan"""
sr_ref = find_sr(session)
sr_ref = cls.find_sr(session)
session.call_xenapi('SR.scan', sr_ref)

@classmethod
def safe_find_sr(cls, session):
"""Same as find_sr except raises a NotFound exception if SR cannot be
determined
"""
sr_ref = cls.find_sr(session)
if sr_ref is None:
raise exception.StorageRepositoryNotFound()
return sr_ref

@classmethod
def find_sr(cls, session):
"""Return the storage repository to hold VM images"""
host = session.get_xenapi_host()

for sr_ref, sr_rec in cls.get_all_refs_and_recs(session, 'SR'):
if not ('i18n-key' in sr_rec['other_config'] and
sr_rec['other_config']['i18n-key'] == 'local-storage'):
continue
for pbd_ref in sr_rec['PBDs']:
pbd_rec = cls.get_rec(session, 'PBD', pbd_ref)
if pbd_rec and pbd_rec['host'] == host:
return sr_ref
return None

@classmethod
def safe_find_iso_sr(cls, session):
"""Same as find_iso_sr except raises a NotFound exception if SR
cannot be determined
"""
sr_ref = cls.find_iso_sr(session)
if sr_ref is None:
raise exception.NotFound(_('Cannot find SR of content-type ISO'))
return sr_ref

@classmethod
def find_iso_sr(cls, session):
"""Return the storage repository to hold ISO images"""
host = session.get_xenapi_host()
for sr_ref, sr_rec in cls.get_all_refs_and_recs(session, 'SR'):
LOG.debug(_("ISO: looking at SR %(sr_rec)s") % locals())
if not sr_rec['content_type'] == 'iso':
LOG.debug(_("ISO: not iso content"))
continue
if not 'i18n-key' in sr_rec['other_config']:
LOG.debug(_("ISO: iso content_type, no 'i18n-key' key"))
continue
if not sr_rec['other_config']['i18n-key'] == 'local-storage-iso':
LOG.debug(_("ISO: iso content_type, i18n-key value not "
"'local-storage-iso'"))
continue

LOG.debug(_("ISO: SR MATCHing our criteria"))
for pbd_ref in sr_rec['PBDs']:
LOG.debug(_("ISO: ISO, looking to see if it is host local"))
pbd_rec = cls.get_rec(session, 'PBD', pbd_ref)
if not pbd_rec:
LOG.debug(_("ISO: PBD %(pbd_ref)s disappeared") % locals())
continue
pbd_rec_host = pbd_rec['host']
LOG.debug(_("ISO: PBD matching, want %(pbd_rec)s, " +
"have %(host)s") % locals())
if pbd_rec_host == host:
LOG.debug(_("ISO: SR with local PBD"))
return sr_ref
return None


def get_rrd(host, vm_uuid):
"""Return the VM RRD XML as a string"""
Expand Down Expand Up @@ -1107,74 +1170,6 @@ def _poll_vhds():
return parent_uuid


def safe_find_sr(session):
"""Same as find_sr except raises a NotFound exception if SR cannot be
determined
"""
sr_ref = find_sr(session)
if sr_ref is None:
raise exception.StorageRepositoryNotFound()
return sr_ref


def find_sr(session):
"""Return the storage repository to hold VM images"""
host = session.get_xenapi_host()
sr_refs = session.call_xenapi("SR.get_all")
for sr_ref in sr_refs:
sr_rec = session.call_xenapi("SR.get_record", sr_ref)
if not ('i18n-key' in sr_rec['other_config'] and
sr_rec['other_config']['i18n-key'] == 'local-storage'):
continue
for pbd_ref in sr_rec['PBDs']:
pbd_rec = session.call_xenapi("PBD.get_record", pbd_ref)
if pbd_rec['host'] == host:
return sr_ref
return None


def safe_find_iso_sr(session):
"""Same as find_iso_sr except raises a NotFound exception if SR cannot be
determined
"""
sr_ref = find_iso_sr(session)
if sr_ref is None:
raise exception.NotFound(_('Cannot find SR of content-type ISO'))
return sr_ref


def find_iso_sr(session):
"""Return the storage repository to hold ISO images"""
host = session.get_xenapi_host()
sr_refs = session.call_xenapi("SR.get_all")
for sr_ref in sr_refs:
sr_rec = session.call_xenapi("SR.get_record", sr_ref)

LOG.debug(_("ISO: looking at SR %(sr_rec)s") % locals())
if not sr_rec['content_type'] == 'iso':
LOG.debug(_("ISO: not iso content"))
continue
if not 'i18n-key' in sr_rec['other_config']:
LOG.debug(_("ISO: iso content_type, no 'i18n-key' key"))
continue
if not sr_rec['other_config']['i18n-key'] == 'local-storage-iso':
LOG.debug(_("ISO: iso content_type, i18n-key value not "
"'local-storage-iso'"))
continue

LOG.debug(_("ISO: SR MATCHing our criteria"))
for pbd_ref in sr_rec['PBDs']:
LOG.debug(_("ISO: ISO, looking to see if it is host local"))
pbd_rec = session.call_xenapi("PBD.get_record", pbd_ref)
pbd_rec_host = pbd_rec['host']
LOG.debug(_("ISO: PBD matching, want %(pbd_rec)s, have %(host)s") %
locals())
if pbd_rec_host == host:
LOG.debug(_("ISO: SR with local PBD"))
return sr_ref
return None


def remap_vbd_dev(dev):
"""Return the appropriate location for a plugged-in VBD device
Expand Down
4 changes: 1 addition & 3 deletions nova/virt/xenapi/volume_utils.py
Expand Up @@ -141,9 +141,7 @@ def find_sr_by_uuid(cls, session, sr_uuid):
"""
Return the storage repository given a uuid.
"""
sr_refs = session.call_xenapi("SR.get_all")
for sr_ref in sr_refs:
sr_rec = session.call_xenapi("SR.get_record", sr_ref)
for sr_ref, sr_rec in cls.get_all_refs_and_recs(session, 'SR'):
if sr_rec['uuid'] == sr_uuid:
return sr_ref
return None
Expand Down
2 changes: 1 addition & 1 deletion nova/virt/xenapi_conn.py
Expand Up @@ -622,7 +622,7 @@ def update_status(self):
return
# Get the SR usage
try:
sr_ref = vm_utils.safe_find_sr(self._session)
sr_ref = vm_utils.VMHelper.safe_find_sr(self._session)
except exception.NotFound as e:
# No SR configured
LOG.error(_("Unable to get SR for this host: %s") % e)
Expand Down

0 comments on commit fb27cc6

Please sign in to comment.