Skip to content

Commit

Permalink
Always create the run_instance records locally
Browse files Browse the repository at this point in the history
Currently a request for multiple instances issent to the scheduler,
where it is written to the database. It appears that this was done so
that more advanced schedulers could handle the request as one
batch, but the result is the scheduler is sometimes slow enough
that the call will timeout.

Instead this converts to always creating the instance records
locally and making run_instance into a casting instead of a call.

This made a small change to the rpc api for run instance, so the
version was bumped. Legacy messages are still handled properly.

Fixes bug 1036911

Co-authored-by: Chris Behrens <cbehrens@codestud.com>

Change-Id: I63bbc98c285faebec53f8e62857c01548807db68
  • Loading branch information
vishvananda committed Aug 16, 2012
1 parent 574a78f commit 8718f8e
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 191 deletions.
148 changes: 43 additions & 105 deletions nova/compute/api.py
Expand Up @@ -373,8 +373,7 @@ def _create_instance(self, context, instance_type,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
reservation_id=None, create_instance_here=False,
scheduler_hints=None):
reservation_id=None, scheduler_hints=None):
"""Verify all the input parameters regardless of the provisioning
strategy being performed and schedule the instance(s) for
creation."""
Expand Down Expand Up @@ -495,45 +494,48 @@ def _create_instance(self, context, instance_type,

LOG.debug(_("Going to run %s instances...") % num_instances)

if create_instance_here:
instance = self.create_db_entry_for_new_instance(
context, instance_type, image, base_options,
security_group, block_device_mapping,
quota_reservations)

# Reservations committed; don't double-commit
quota_reservations = None

# Tells scheduler we created the instance already.
base_options['uuid'] = instance['uuid']
use_call = False
else:
# We need to wait for the scheduler to create the instance
# DB entries, because the instance *could* be # created in
# a child zone.
use_call = True

filter_properties = dict(scheduler_hints=scheduler_hints)
if context.is_admin and forced_host:
filter_properties['force_hosts'] = [forced_host]

# TODO(comstud): We should use rpc.multicall when we can
# retrieve the full instance dictionary from the scheduler.
# Otherwise, we could exceed the AMQP max message size limit.
# This would require the schedulers' schedule_run_instances
# methods to return an iterator vs a list.
instances = self._schedule_run_instance(
use_call,
context, base_options,
instance_type,
availability_zone, injected_files,
admin_password, image,
num_instances, requested_networks,
block_device_mapping, security_group,
filter_properties, quota_reservations)

if create_instance_here:
return ([instance], reservation_id)
instances = []
instance_uuids = []
try:
for i in xrange(num_instances):
options = base_options.copy()
instance = self.create_db_entry_for_new_instance(
context, instance_type, image, options,
security_group, block_device_mapping)
instances.append(instance)
instance_uuids.append(instance['uuid'])
except Exception:
# Clean up as best we can.
with excutils.save_and_reraise_exception():
try:
for instance_uuid in instance_uuids:
self.db.instance_destroy(context,
instance_uuid)
finally:
QUOTAS.rollback(context, quota_reservations)

# Commit the reservations
QUOTAS.commit(context, quota_reservations)

request_spec = {
'image': jsonutils.to_primitive(image),
'instance_properties': base_options,
'instance_type': instance_type,
'instance_uuids': instance_uuids,
'block_device_mapping': block_device_mapping,
'security_group': security_group,
}

self.scheduler_rpcapi.run_instance(context,
request_spec=request_spec,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks, is_first_time=True,
filter_properties=filter_properties)

return (instances, reservation_id)

@staticmethod
Expand Down Expand Up @@ -698,7 +700,7 @@ def _populate_instance_for_create(self, base_options, image,
#NOTE(bcwaldon): No policy check since this is only used by scheduler and
# the compute api. That should probably be cleaned up, though.
def create_db_entry_for_new_instance(self, context, instance_type, image,
base_options, security_group, block_device_mapping, reservations):
base_options, security_group, block_device_mapping):
"""Create an entry in the DB for this new instance,
including any related table updates (such as security group,
etc).
Expand All @@ -724,48 +726,8 @@ def create_db_entry_for_new_instance(self, context, instance_type, image,
notifications.send_update_with_states(context, instance, None,
vm_states.BUILDING, None, None, service="api")

# Commit the reservations
if reservations:
QUOTAS.commit(context, reservations)

return instance

def _schedule_run_instance(self,
use_call,
context, base_options,
instance_type,
availability_zone, injected_files,
admin_password, image,
num_instances,
requested_networks,
block_device_mapping,
security_group,
filter_properties,
quota_reservations):
"""Send a run_instance request to the schedulers for processing."""

pid = context.project_id
uid = context.user_id

LOG.debug(_("Sending create to scheduler for %(pid)s/%(uid)s's") %
locals())

request_spec = {
'image': jsonutils.to_primitive(image),
'instance_properties': base_options,
'instance_type': instance_type,
'num_instances': num_instances,
'block_device_mapping': block_device_mapping,
'security_group': security_group,
}

return self.scheduler_rpcapi.run_instance(context,
request_spec=request_spec,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks, is_first_time=True,
filter_properties=filter_properties,
reservations=quota_reservations, call=use_call)

def _check_create_policies(self, context, availability_zone,
requested_networks, block_device_mapping):
"""Check policies for create()."""
Expand Down Expand Up @@ -795,21 +757,13 @@ def create(self, context, instance_type,
scheduler. The scheduler will determine where the instance(s)
go and will handle creating the DB entries.
Returns a tuple of (instances, reservation_id) where instances
could be 'None' or a list of instance dicts depending on if
we waited for information from the scheduler or not.
Returns a tuple of (instances, reservation_id)
"""

self._check_create_policies(context, availability_zone,
requested_networks, block_device_mapping)

# We can create the DB entry for the instance here if we're
# only going to create 1 instance.
# This speeds up API responses for builds
# as we don't need to wait for the scheduler.
create_instance_here = max_count == 1 or max_count is None

(instances, reservation_id) = self._create_instance(
return self._create_instance(
context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
Expand All @@ -820,24 +774,8 @@ def create(self, context, instance_type,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
create_instance_here=create_instance_here,
scheduler_hints=scheduler_hints)

if create_instance_here or instances is None:
return (instances, reservation_id)

inst_ret_list = []
for instance in instances:
if instance.get('_is_precooked', False):
inst_ret_list.append(instance)
else:
# Scheduler only gives us the 'id'. We need to pull
# in the created instances from the DB
instance = self.db.instance_get(context, instance['id'])
inst_ret_list.append(dict(instance.iteritems()))

return (inst_ret_list, reservation_id)

def trigger_provider_fw_rules_refresh(self, context):
"""Called when a rule is added/removed from a provider firewall"""

Expand Down Expand Up @@ -1561,7 +1499,7 @@ def resize(self, context, instance, flavor_id=None, **kwargs):

request_spec = {
'instance_type': new_instance_type,
'num_instances': 1,
'instance_uuids': instance['uuid'],
'instance_properties': instance}

filter_properties = {'ignore_hosts': []}
Expand Down
2 changes: 1 addition & 1 deletion nova/compute/manager.py
Expand Up @@ -594,7 +594,7 @@ def _reschedule(self, context, instance_uuid, requested_networks,
instance_uuid=instance_uuid)
return

request_spec['num_instances'] = 1
request_spec['instance_uuids'] = [instance_uuid]

LOG.debug(_("Re-scheduling instance: attempt %d"),
retry['num_attempts'], instance_uuid=instance_uuid)
Expand Down
26 changes: 26 additions & 0 deletions nova/scheduler/chance.py
Expand Up @@ -65,6 +65,32 @@ def schedule_run_instance(self, context, request_spec,
requested_networks, is_first_time,
filter_properties, reservations):
"""Create and run an instance or instances"""
if 'instance_uuids' not in request_spec:
return self._legacy_schedule_run_instance(context, request_spec,
admin_password, injected_files, requested_networks,
is_first_time, filter_properties, reservations)
instances = []
instance_uuids = request_spec.get('instance_uuids')
for num, instance_uuid in enumerate(instance_uuids):
host = self._schedule(context, 'compute', request_spec,
filter_properties)
request_spec['instance_properties']['launch_index'] = num
updated_instance = driver.instance_update_db(context,
instance_uuid, host)
self.compute_rpcapi.run_instance(context,
instance=updated_instance, host=host,
requested_networks=requested_networks,
injected_files=injected_files,
admin_password=admin_password, is_first_time=is_first_time,
request_spec=request_spec,
filter_properties=filter_properties)
instances.append(driver.encode_instance(updated_instance))
return instances

def _legacy_schedule_run_instance(self, context, request_spec,
admin_password, injected_files,
requested_networks, is_first_time,
filter_properties, reservations):
num_instances = request_spec.get('num_instances', 1)
instances = []
for num in xrange(num_instances):
Expand Down

0 comments on commit 8718f8e

Please sign in to comment.