Navigation Menu

Skip to content

Commit

Permalink
Retry download_vhd with different glance host each time
Browse files Browse the repository at this point in the history
Fixes bug 944096

Change-Id: I33aa3774ba7f266e85f09c6c569fdd0f895478b4
  • Loading branch information
Johannes Erdfelt committed Mar 2, 2012
1 parent 8a53083 commit c4a2e17
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 54 deletions.
4 changes: 4 additions & 0 deletions nova/exception.py
Expand Up @@ -1015,3 +1015,7 @@ class InstanceNotFound(NotFound):

class InvalidInstanceIDMalformed(Invalid):
message = _("Invalid id: %(val)s (expecting \"i-...\").")


class CouldNotFetchImage(NovaException):
message = _("Could not fetch image %(image)s")
69 changes: 47 additions & 22 deletions nova/virt/xenapi/vm_utils.py
Expand Up @@ -691,6 +691,48 @@ def fetch_image(cls, context, session, instance, image, user_id,
return cls._fetch_image_glance_disk(context,
session, instance, image, image_type)

@classmethod
def _retry_glance_download_vhd(cls, context, session, instance, image):
# NOTE(sirp): The Glance plugin runs under Python 2.4
# which does not have the `uuid` module. To work around this,
# we generate the uuids here (under Python 2.6+) and
# pass them as arguments
uuid_stack = [str(uuid.uuid4()) for i in xrange(3)]

max_attempts = FLAGS.glance_num_retries + 1
sleep_time = 0.5
for attempt_num in xrange(1, max_attempts + 1):
glance_host, glance_port = glance.pick_glance_api_server()
params = {'image_id': image,
'glance_host': glance_host,
'glance_port': glance_port,
'uuid_stack': uuid_stack,
'sr_path': cls.get_sr_path(session),
'num_retries': 0,
'auth_token': getattr(context, 'auth_token', None)}
kwargs = {'params': pickle.dumps(params)}

LOG.info(_('download_vhd %(image)s '
'attempt %(attempt_num)d/%(max_attempts)d '
'from %(glance_host)s:%(glance_port)s') % locals())

task = session.async_call_plugin('glance', 'download_vhd', kwargs)
try:
result = session.wait_for_task(task, instance['uuid'])
return json.loads(result)
except cls.XenAPI.Failure as exc:
_type, method, error = exc.details[:3]
if error == 'RetryableError':
LOG.error(_('download_vhd failed: %r') %
(exc.details[3:],))
else:
raise

time.sleep(sleep_time)
sleep_time = min(2 * sleep_time, 15)

raise exception.CouldNotFetchImage(image=image)

@classmethod
def _fetch_image_glance_vhd(cls, context, session, instance, image,
image_type):
Expand All @@ -703,29 +745,12 @@ def _fetch_image_glance_vhd(cls, context, session, instance, image,
% locals())
sr_ref = cls.safe_find_sr(session)

# NOTE(sirp): The Glance plugin runs under Python 2.4
# which does not have the `uuid` module. To work around this,
# we generate the uuids here (under Python 2.6+) and
# pass them as arguments
uuid_stack = [str(uuid.uuid4()) for i in xrange(3)]
vdis = cls._retry_glance_download_vhd(context, session, instance,
image)

glance_host, glance_port = glance.pick_glance_api_server()
params = {'image_id': image,
'glance_host': glance_host,
'glance_port': glance_port,
'uuid_stack': uuid_stack,
'sr_path': cls.get_sr_path(session),
'num_retries': FLAGS.glance_num_retries,
'auth_token': getattr(context, 'auth_token', None)}

kwargs = {'params': pickle.dumps(params)}
task = session.async_call_plugin('glance', 'download_vhd', kwargs)
result = session.wait_for_task(task, instance['uuid'])
# 'download_vhd' will return a json encoded string containing
# a list of dictionaries describing VDIs. The dictionary will
# contain 'vdi_type' and 'vdi_uuid' keys. 'vdi_type' can be
# 'os' or 'swap' right now.
vdis = json.loads(result)
# 'download_vhd' will return a list of dictionaries describing VDIs.
# The dictionary will contain 'vdi_type' and 'vdi_uuid' keys.
# 'vdi_type' can be 'os' or 'swap' right now.
for vdi in vdis:
LOG.debug(_("xapi 'download_vhd' returned VDI of "
"type '%(vdi_type)s' with UUID '%(vdi_uuid)s'" % vdi))
Expand Down
49 changes: 17 additions & 32 deletions plugins/xenserver/xenapi/etc/xapi.d/plugins/glance
Expand Up @@ -48,7 +48,7 @@ CHUNK_SIZE = 8192
KERNEL_DIR = '/boot/guest'


class RetryException(Exception):
class RetryableError(Exception):
pass


Expand Down Expand Up @@ -86,18 +86,17 @@ def _copy_kernel_vdi(dest, copy_args):
return filename


def _download_tarball(request, staging_path):
"""Make one attempt to download and extract the image tarball"""
def _download_tarball_and_verify(request, staging_path):
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError, error:
raise RetryException(error)
raise RetryableError(error)
except urllib2.URLError, error:
raise RetryException(error)
raise RetryableError(error)
except httplib.HTTPException, error:
# httplib.HTTPException and derivatives (BadStatusLine in particular)
# don't have a useful __repr__ or __str__
raise RetryException('%s: %s' % (error.__class__.__name__, error))
raise RetryableError('%s: %s' % (error.__class__.__name__, error))

tar_cmd = "tar -zx --directory=%(staging_path)s" % locals()
tar_proc = _make_subprocess(tar_cmd, stderr=True, stdin=True)
Expand All @@ -124,24 +123,21 @@ def _download_tarball(request, staging_path):
try:
_finish_subprocess(tar_proc, tar_cmd)
except Exception, error:
raise RetryException(error)
raise RetryableError(error)

checksum = checksum.hexdigest()
if etag is None:
msg = "No ETag found for comparison to checksum %(checksum)s"
logging.info(msg % locals())
elif checksum != etag:
msg = 'ETag %(etag)s does not match computed md5sum %(checksum)s'
raise RetryException(msg % locals())
raise RetryableError(msg % locals())
else:
msg = "Verified image checksum %(checksum)s"
logging.info(msg % locals())

return


def _download_tarball_with_retry(sr_path, image_id, glance_host,
glance_port, auth_token, num_retries):
def _download_tarball(sr_path, image_id, glance_host, glance_port, auth_token):
"""Download the tarball image from Glance and extract it into the staging
area. Retry if there is any failure.
"""
Expand All @@ -155,24 +151,15 @@ def _download_tarball_with_retry(sr_path, image_id, glance_host,
logging.info("Downloading %s" % url)

request = urllib2.Request(url, headers=headers)
sleep_time = 0.5

for try_num in xrange(1, num_retries + 2):
try:
staging_path = _make_staging_area(sr_path)
_download_tarball(request, staging_path)
return staging_path
except RetryException, error:
msg = "Downloading %(url)s attempt %(try_num)d error: %(error)s"
logging.error(msg % locals())

staging_path = _make_staging_area(sr_path)
try:
_download_tarball_and_verify(request, staging_path)
except Exception:
logging.exception('Failed to retrieve %(url)s' % locals())
_cleanup_staging_area(staging_path)
time.sleep(sleep_time)
sleep_time = min(2 * sleep_time, 15)
raise

msg = "Unable to retrieve %(url)s after %(try_num)d attempt(s)." % locals()
logging.error(msg)
raise Exception(msg)
return staging_path


def _import_vhds(sr_path, staging_path, uuid_stack):
Expand Down Expand Up @@ -492,13 +479,11 @@ def download_vhd(session, args):
uuid_stack = params["uuid_stack"]
sr_path = params["sr_path"]
auth_token = params["auth_token"]
num_retries = params["num_retries"]

staging_path = None
try:
staging_path = _download_tarball_with_retry(sr_path, image_id,
glance_host, glance_port,
auth_token, num_retries)
staging_path = _download_tarball(sr_path, image_id, glance_host,
glance_port, auth_token)
# Right now, it's easier to return a single string via XenAPI,
# so we'll json encode the list of VHDs.
return json.dumps(_import_vhds(sr_path, staging_path, uuid_stack))
Expand Down

0 comments on commit c4a2e17

Please sign in to comment.