From c9cade2d73194a56f4f0de03f11c4d2d6e481535 Mon Sep 17 00:00:00 2001 From: Andy McCrae Date: Wed, 24 Oct 2012 13:21:33 +0000 Subject: [PATCH] Consistent Rollback for instance creation failures Fixes Bug 1065092 Adds a single QUOTA.rollback for all exceptions rather than performing the rollback individually. Ensures that no stale "reservations" are left in place after a failed instance creation. Change-Id: I354726b651ae8feb6153d5865e0b14b54c73314b --- nova/compute/api.py | 202 ++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 99 deletions(-) diff --git a/nova/compute/api.py b/nova/compute/api.py index c1b5ac3795e..585a19b6e1b 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -391,111 +391,114 @@ def _create_instance(self, context, instance_type, raise exception.InstanceTypeNotFound( instance_type_id=instance_type['id']) - # Check quotas + # Reserve quotas num_instances, quota_reservations = self._check_num_instances_quota( context, instance_type, min_count, max_count) - self._check_metadata_properties_quota(context, metadata) - self._check_injected_file_quota(context, injected_files) - self._check_requested_networks(context, requested_networks) - - (image_service, image_id) = glance.get_remote_image_service(context, - image_href) - image = image_service.show(context, image_id) - - if instance_type['memory_mb'] < int(image.get('min_ram') or 0): - QUOTAS.rollback(context, quota_reservations) - raise exception.InstanceTypeMemoryTooSmall() - if instance_type['root_gb'] < int(image.get('min_disk') or 0): - QUOTAS.rollback(context, quota_reservations) - raise exception.InstanceTypeDiskTooSmall() - - # Handle config_drive - config_drive_id = None - if config_drive and config_drive is not True: - # config_drive is volume id - config_drive_id = config_drive - config_drive = None - - # Ensure config_drive image exists - image_service.show(context, config_drive_id) - - kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk( - context, kernel_id, ramdisk_id, image, image_service) - - if key_data is None and key_name: - key_pair = self.db.key_pair_get(context, context.user_id, key_name) - key_data = key_pair['public_key'] - if reservation_id is None: - reservation_id = utils.generate_uid('r') - - # grab the architecture from glance - architecture = image['properties'].get('architecture', 'Unknown') - - root_device_name = block_device.properties_root_device_name( - image['properties']) - - availability_zone, forced_host = self._handle_availability_zone( - availability_zone) - - base_options = { - 'reservation_id': reservation_id, - 'image_ref': image_href, - 'kernel_id': kernel_id or '', - 'ramdisk_id': ramdisk_id or '', - 'power_state': power_state.NOSTATE, - 'vm_state': vm_states.BUILDING, - 'config_drive_id': config_drive_id or '', - 'config_drive': config_drive or '', - 'user_id': context.user_id, - 'project_id': context.project_id, - 'launch_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), - 'instance_type_id': instance_type['id'], - 'memory_mb': instance_type['memory_mb'], - 'vcpus': instance_type['vcpus'], - 'root_gb': instance_type['root_gb'], - 'ephemeral_gb': instance_type['ephemeral_gb'], - 'display_name': display_name, - 'display_description': display_description or '', - 'user_data': user_data, - 'key_name': key_name, - 'key_data': key_data, - 'locked': False, - 'metadata': metadata, - 'access_ip_v4': access_ip_v4, - 'access_ip_v6': access_ip_v6, - 'availability_zone': availability_zone, - 'root_device_name': root_device_name, - 'architecture': architecture, - 'progress': 0} - - if user_data: - l = len(user_data) - if l > MAX_USERDATA_SIZE: - # NOTE(mikal): user_data is stored in a text column, and the - # database might silently truncate if its over length. - raise exception.InstanceUserDataTooLarge( - length=l, maxsize=MAX_USERDATA_SIZE) + # Try to create the instance + try: + instances = [] + instance_uuids = [] + + self._check_metadata_properties_quota(context, metadata) + self._check_injected_file_quota(context, injected_files) + self._check_requested_networks(context, requested_networks) + + (image_service, image_id) = glance.get_remote_image_service( + context, image_href) + image = image_service.show(context, image_id) + + if instance_type['memory_mb'] < int(image.get('min_ram') or 0): + raise exception.InstanceTypeMemoryTooSmall() + if instance_type['root_gb'] < int(image.get('min_disk') or 0): + raise exception.InstanceTypeDiskTooSmall() + + # Handle config_drive + config_drive_id = None + if config_drive and config_drive is not True: + # config_drive is volume id + config_drive_id = config_drive + config_drive = None + + # Ensure config_drive image exists + image_service.show(context, config_drive_id) + + kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk( + context, kernel_id, ramdisk_id, image, image_service) + + if key_data is None and key_name: + key_pair = self.db.key_pair_get(context, context.user_id, + key_name) + key_data = key_pair['public_key'] + + if reservation_id is None: + reservation_id = utils.generate_uid('r') + + # grab the architecture from glance + architecture = image['properties'].get('architecture', 'Unknown') + + root_device_name = block_device.properties_root_device_name( + image['properties']) + + availability_zone, forced_host = self._handle_availability_zone( + availability_zone) + + base_options = { + 'reservation_id': reservation_id, + 'image_ref': image_href, + 'kernel_id': kernel_id or '', + 'ramdisk_id': ramdisk_id or '', + 'power_state': power_state.NOSTATE, + 'vm_state': vm_states.BUILDING, + 'config_drive_id': config_drive_id or '', + 'config_drive': config_drive or '', + 'user_id': context.user_id, + 'project_id': context.project_id, + 'launch_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', + time.gmtime()), + 'instance_type_id': instance_type['id'], + 'memory_mb': instance_type['memory_mb'], + 'vcpus': instance_type['vcpus'], + 'root_gb': instance_type['root_gb'], + 'ephemeral_gb': instance_type['ephemeral_gb'], + 'display_name': display_name, + 'display_description': display_description or '', + 'user_data': user_data, + 'key_name': key_name, + 'key_data': key_data, + 'locked': False, + 'metadata': metadata, + 'access_ip_v4': access_ip_v4, + 'access_ip_v6': access_ip_v6, + 'availability_zone': availability_zone, + 'root_device_name': root_device_name, + 'architecture': architecture, + 'progress': 0} + + if user_data: + l = len(user_data) + if l > MAX_USERDATA_SIZE: + # NOTE(mikal): user_data is stored in a text column, and + # the database might silently truncate if its over length. + raise exception.InstanceUserDataTooLarge( + length=l, maxsize=MAX_USERDATA_SIZE) - try: - base64.decodestring(user_data) - except base64.binascii.Error: - raise exception.InstanceUserDataMalformed() + try: + base64.decodestring(user_data) + except base64.binascii.Error: + raise exception.InstanceUserDataMalformed() - options_from_image = self._inherit_properties_from_image( - image, auto_disk_config) + options_from_image = self._inherit_properties_from_image( + image, auto_disk_config) - base_options.update(options_from_image) + base_options.update(options_from_image) - LOG.debug(_("Going to run %s instances...") % num_instances) + LOG.debug(_("Going to run %s instances...") % num_instances) - filter_properties = dict(scheduler_hints=scheduler_hints) - if context.is_admin and forced_host: - filter_properties['force_hosts'] = [forced_host] + filter_properties = dict(scheduler_hints=scheduler_hints) + if context.is_admin and forced_host: + filter_properties['force_hosts'] = [forced_host] - instances = [] - instance_uuids = [] - try: for i in xrange(num_instances): options = base_options.copy() instance = self.create_db_entry_for_new_instance( @@ -503,13 +506,14 @@ def _create_instance(self, context, instance_type, security_group, block_device_mapping) instances.append(instance) instance_uuids.append(instance['uuid']) + + # In the case of any exceptions, attempt DB cleanup and rollback the + # quota reservations. except Exception: - # Clean up as best we can. with excutils.save_and_reraise_exception(): try: for instance_uuid in instance_uuids: - self.db.instance_destroy(context, - instance_uuid) + self.db.instance_destroy(context, instance_uuid) finally: QUOTAS.rollback(context, quota_reservations)