diff --git a/nova/exception.py b/nova/exception.py index 43d16642f72..c781170b7fe 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -1015,3 +1015,7 @@ class InstanceNotFound(NotFound): class InvalidInstanceIDMalformed(Invalid): message = _("Invalid id: %(val)s (expecting \"i-...\").") + + +class CouldNotFetchImage(NovaException): + message = _("Could not fetch image %(image)s") diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index 83103d6f282..0321d0d59c1 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -691,6 +691,48 @@ def fetch_image(cls, context, session, instance, image, user_id, return cls._fetch_image_glance_disk(context, session, instance, image, image_type) + @classmethod + def _retry_glance_download_vhd(cls, context, session, instance, image): + # NOTE(sirp): The Glance plugin runs under Python 2.4 + # which does not have the `uuid` module. To work around this, + # we generate the uuids here (under Python 2.6+) and + # pass them as arguments + uuid_stack = [str(uuid.uuid4()) for i in xrange(3)] + + max_attempts = FLAGS.glance_num_retries + 1 + sleep_time = 0.5 + for attempt_num in xrange(1, max_attempts + 1): + glance_host, glance_port = glance.pick_glance_api_server() + params = {'image_id': image, + 'glance_host': glance_host, + 'glance_port': glance_port, + 'uuid_stack': uuid_stack, + 'sr_path': cls.get_sr_path(session), + 'num_retries': 0, + 'auth_token': getattr(context, 'auth_token', None)} + kwargs = {'params': pickle.dumps(params)} + + LOG.info(_('download_vhd %(image)s ' + 'attempt %(attempt_num)d/%(max_attempts)d ' + 'from %(glance_host)s:%(glance_port)s') % locals()) + + task = session.async_call_plugin('glance', 'download_vhd', kwargs) + try: + result = session.wait_for_task(task, instance['uuid']) + return json.loads(result) + except cls.XenAPI.Failure as exc: + _type, method, error = exc.details[:3] + if error == 'RetryableError': + LOG.error(_('download_vhd failed: %r') % + (exc.details[3:],)) + else: + raise + + time.sleep(sleep_time) + sleep_time = min(2 * sleep_time, 15) + + raise exception.CouldNotFetchImage(image=image) + @classmethod def _fetch_image_glance_vhd(cls, context, session, instance, image, image_type): @@ -703,29 +745,12 @@ def _fetch_image_glance_vhd(cls, context, session, instance, image, % locals()) sr_ref = cls.safe_find_sr(session) - # NOTE(sirp): The Glance plugin runs under Python 2.4 - # which does not have the `uuid` module. To work around this, - # we generate the uuids here (under Python 2.6+) and - # pass them as arguments - uuid_stack = [str(uuid.uuid4()) for i in xrange(3)] + vdis = cls._retry_glance_download_vhd(context, session, instance, + image) - glance_host, glance_port = glance.pick_glance_api_server() - params = {'image_id': image, - 'glance_host': glance_host, - 'glance_port': glance_port, - 'uuid_stack': uuid_stack, - 'sr_path': cls.get_sr_path(session), - 'num_retries': FLAGS.glance_num_retries, - 'auth_token': getattr(context, 'auth_token', None)} - - kwargs = {'params': pickle.dumps(params)} - task = session.async_call_plugin('glance', 'download_vhd', kwargs) - result = session.wait_for_task(task, instance['uuid']) - # 'download_vhd' will return a json encoded string containing - # a list of dictionaries describing VDIs. The dictionary will - # contain 'vdi_type' and 'vdi_uuid' keys. 'vdi_type' can be - # 'os' or 'swap' right now. - vdis = json.loads(result) + # 'download_vhd' will return a list of dictionaries describing VDIs. + # The dictionary will contain 'vdi_type' and 'vdi_uuid' keys. + # 'vdi_type' can be 'os' or 'swap' right now. for vdi in vdis: LOG.debug(_("xapi 'download_vhd' returned VDI of " "type '%(vdi_type)s' with UUID '%(vdi_uuid)s'" % vdi)) diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance index a0f263403e5..20fce3a2c2f 100755 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance @@ -48,7 +48,7 @@ CHUNK_SIZE = 8192 KERNEL_DIR = '/boot/guest' -class RetryException(Exception): +class RetryableError(Exception): pass @@ -86,18 +86,17 @@ def _copy_kernel_vdi(dest, copy_args): return filename -def _download_tarball(request, staging_path): - """Make one attempt to download and extract the image tarball""" +def _download_tarball_and_verify(request, staging_path): try: response = urllib2.urlopen(request) except urllib2.HTTPError, error: - raise RetryException(error) + raise RetryableError(error) except urllib2.URLError, error: - raise RetryException(error) + raise RetryableError(error) except httplib.HTTPException, error: # httplib.HTTPException and derivatives (BadStatusLine in particular) # don't have a useful __repr__ or __str__ - raise RetryException('%s: %s' % (error.__class__.__name__, error)) + raise RetryableError('%s: %s' % (error.__class__.__name__, error)) tar_cmd = "tar -zx --directory=%(staging_path)s" % locals() tar_proc = _make_subprocess(tar_cmd, stderr=True, stdin=True) @@ -124,7 +123,7 @@ def _download_tarball(request, staging_path): try: _finish_subprocess(tar_proc, tar_cmd) except Exception, error: - raise RetryException(error) + raise RetryableError(error) checksum = checksum.hexdigest() if etag is None: @@ -132,16 +131,13 @@ def _download_tarball(request, staging_path): logging.info(msg % locals()) elif checksum != etag: msg = 'ETag %(etag)s does not match computed md5sum %(checksum)s' - raise RetryException(msg % locals()) + raise RetryableError(msg % locals()) else: msg = "Verified image checksum %(checksum)s" logging.info(msg % locals()) - return - -def _download_tarball_with_retry(sr_path, image_id, glance_host, - glance_port, auth_token, num_retries): +def _download_tarball(sr_path, image_id, glance_host, glance_port, auth_token): """Download the tarball image from Glance and extract it into the staging area. Retry if there is any failure. """ @@ -155,24 +151,15 @@ def _download_tarball_with_retry(sr_path, image_id, glance_host, logging.info("Downloading %s" % url) request = urllib2.Request(url, headers=headers) - sleep_time = 0.5 - - for try_num in xrange(1, num_retries + 2): - try: - staging_path = _make_staging_area(sr_path) - _download_tarball(request, staging_path) - return staging_path - except RetryException, error: - msg = "Downloading %(url)s attempt %(try_num)d error: %(error)s" - logging.error(msg % locals()) - + staging_path = _make_staging_area(sr_path) + try: + _download_tarball_and_verify(request, staging_path) + except Exception: + logging.exception('Failed to retrieve %(url)s' % locals()) _cleanup_staging_area(staging_path) - time.sleep(sleep_time) - sleep_time = min(2 * sleep_time, 15) + raise - msg = "Unable to retrieve %(url)s after %(try_num)d attempt(s)." % locals() - logging.error(msg) - raise Exception(msg) + return staging_path def _import_vhds(sr_path, staging_path, uuid_stack): @@ -492,13 +479,11 @@ def download_vhd(session, args): uuid_stack = params["uuid_stack"] sr_path = params["sr_path"] auth_token = params["auth_token"] - num_retries = params["num_retries"] staging_path = None try: - staging_path = _download_tarball_with_retry(sr_path, image_id, - glance_host, glance_port, - auth_token, num_retries) + staging_path = _download_tarball(sr_path, image_id, glance_host, + glance_port, auth_token) # Right now, it's easier to return a single string via XenAPI, # so we'll json encode the list of VHDs. return json.dumps(_import_vhds(sr_path, staging_path, uuid_stack))