Skip to content

Commit

Permalink
Fix scheduler error handler
Browse files Browse the repository at this point in the history
Fixes bug 904971

Scheduler error handler was looking for instance_id when it may or may
not exist.  Added the proper code for it to determine whether the
instance was actually created in the DB or not and how to find its ID.

Note: there's some pretty nasty stuff in here, but unavoidable without
larger changes.  I'd like to hold off on these larger changes, because
the problem should be solved with some of the scalability work coming.

Tests included.

Change-Id: Ief5fde8128437c9dc257af9c4d0c2950d0962ce5
  • Loading branch information
comstud committed Dec 16, 2011
1 parent 3f7353d commit baf7e02
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 16 deletions.
2 changes: 1 addition & 1 deletion nova/compute/api.py
Expand Up @@ -310,7 +310,7 @@ def _create_instance(self, context, instance_type,
context, instance_type, image, base_options,
security_group, block_device_mapping)
# Tells scheduler we created the instance already.
base_options['id'] = instance['id']
base_options['uuid'] = instance['uuid']
rpc_method = rpc.cast
else:
# We need to wait for the scheduler to create the instance
Expand Down
5 changes: 4 additions & 1 deletion nova/scheduler/chance.py
Expand Up @@ -74,7 +74,10 @@ def schedule_run_instance(self, context, request_spec, *_args, **kwargs):
driver.cast_to_compute_host(context, host,
'run_instance', instance_uuid=instance['uuid'], **kwargs)
instances.append(driver.encode_instance(instance))

# So if we loop around, create_instance_db_entry will actually
# create a new entry, instead of assume it's been created
# already
del request_spec['instance_properties']['uuid']
return instances

def schedule_prep_resize(self, context, request_spec, *args, **kwargs):
Expand Down
7 changes: 6 additions & 1 deletion nova/scheduler/distributed_scheduler.py
Expand Up @@ -173,7 +173,12 @@ def _provision_resource_locally(self, context, weighted_host, request_spec,
instance = self.create_instance_db_entry(context, request_spec)
driver.cast_to_compute_host(context, weighted_host.host,
'run_instance', instance_uuid=instance['uuid'], **kwargs)
return driver.encode_instance(instance, local=True)
inst = driver.encode_instance(instance, local=True)
# So if another instance is created, create_instance_db_entry will
# actually create a new entry, instead of assume it's been created
# already
del request_spec['instance_properties']['uuid']
return inst

def _make_weighted_host_from_blob(self, blob):
"""Returns the decrypted blob as a WeightedHost object
Expand Down
8 changes: 6 additions & 2 deletions nova/scheduler/driver.py
Expand Up @@ -147,9 +147,9 @@ def hosts_up(self, context, topic):
def create_instance_db_entry(self, context, request_spec):
"""Create instance DB entry based on request_spec"""
base_options = request_spec['instance_properties']
if base_options.get('id'):
if base_options.get('uuid'):
# Instance was already created before calling scheduler
return db.instance_get(context, base_options['id'])
return db.instance_get_by_uuid(context, base_options['uuid'])
image = request_spec['image']
instance_type = request_spec.get('instance_type')
security_group = request_spec.get('security_group', 'default')
Expand All @@ -158,6 +158,10 @@ def create_instance_db_entry(self, context, request_spec):
instance = self.compute_api.create_db_entry_for_new_instance(
context, instance_type, image, base_options,
security_group, block_device_mapping)
# NOTE(comstud): This needs to be set for the generic exception
# checking in scheduler manager, so that it'll set this instance
# to ERROR properly.
base_options['uuid'] = instance['uuid']
return instance

def schedule(self, context, topic, method, *_args, **_kwargs):
Expand Down
29 changes: 20 additions & 9 deletions nova/scheduler/manager.py
Expand Up @@ -108,18 +108,29 @@ def _schedule(self, method, context, topic, *args, **kwargs):
with utils.save_and_reraise_exception():
self._set_instance_error(method, context, ex, *args, **kwargs)

# NOTE (David Subiros) : If the exception is raised ruing run_instance
# method, the DB record probably does not exist yet.
# NOTE (David Subiros) : If the exception is raised during run_instance
# method, we may or may not have an instance_id
def _set_instance_error(self, method, context, ex, *args, **kwargs):
"""Sets VM to Error state"""
LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals())
if method == "start_instance" or method == "run_instance":
instance_id = kwargs['instance_id']
if instance_id:
LOG.warning(_("Setting instance %(instance_id)s to "
"ERROR state.") % locals())
db.instance_update(context, instance_id,
{'vm_state': vm_states.ERROR})
if method != "start_instance" and method != "run_instance":
return
# FIXME(comstud): Clean this up after fully on UUIDs.
instance_id = kwargs.get('instance_uuid', kwargs.get('instance_id'))
if not instance_id:
# FIXME(comstud): We should make this easier. run_instance
# only sends a request_spec, and an instance may or may not
# have been created in the API (or scheduler) already. If it
# was created, there's a 'uuid' set in the instance_properties
# of the request_spec.
request_spec = kwargs.get('request_spec', {})
properties = request_spec.get('instance_properties', {})
instance_id = properties.get('uuid', {})
if instance_id:
LOG.warning(_("Setting instance %(instance_id)s to "
"ERROR state.") % locals())
db.instance_update(context, instance_id,
{'vm_state': vm_states.ERROR})

# NOTE (masumotok) : This method should be moved to nova.api.ec2.admin.
# Based on bexar design summit discussion,
Expand Down
4 changes: 4 additions & 0 deletions nova/scheduler/simple.py
Expand Up @@ -82,6 +82,10 @@ def schedule_run_instance(self, context, request_spec, *_args, **_kwargs):
driver.cast_to_compute_host(context, host, 'run_instance',
instance_uuid=instance_ref['uuid'], **_kwargs)
instances.append(driver.encode_instance(instance_ref))
# So if we loop around, create_instance_db_entry will actually
# create a new entry, instead of assume it's been created
# already
del request_spec['instance_properties']['uuid']
return instances

def schedule_start_instance(self, context, instance_id, *_args, **_kwargs):
Expand Down
31 changes: 29 additions & 2 deletions nova/tests/scheduler/test_scheduler.py
Expand Up @@ -113,6 +113,7 @@ def _fake_create_instance_db_entry(simple_self, context, request_spec):
instance = _create_instance_from_spec(request_spec)
global instance_uuids
instance_uuids.append(instance['uuid'])
request_spec['instance_properties']['uuid'] = instance['uuid']
return instance


Expand Down Expand Up @@ -236,15 +237,41 @@ def NoValidHost_raiser(context, topic, *args, **kwargs):
scheduler = manager.SchedulerManager()
ins_ref = _create_instance(task_state=task_states.STARTING,
vm_state=vm_states.STOPPED)
self.stubs = stubout.StubOutForTesting()
self.stubs.Set(TestDriver, 'schedule', NoValidHost_raiser)
self.mox.StubOutWithMock(rpc, 'cast', use_mock_anything=True)
ctxt = context.get_admin_context()
scheduler.start_instance(ctxt, 'topic', instance_id=ins_ref['id'])
# assert that the instance goes to ERROR state
self._assert_state({'vm_state': vm_states.ERROR,
'task_state': task_states.STARTING})

def test_no_valid_host_exception_on_run_with_id(self):
"""check the vm goes to ERROR state if run_instance fails"""

def NoValidHost_raiser(context, topic, *args, **kwargs):
raise exception.NoValidHost(_("Test NoValidHost exception"))
scheduler = manager.SchedulerManager()
ins_ref = _create_instance(task_state=task_states.STARTING,
vm_state=vm_states.STOPPED)
self.stubs.Set(TestDriver, 'schedule', NoValidHost_raiser)
ctxt = context.get_admin_context()
request_spec = {'instance_properties': {'uuid': ins_ref['uuid']}}
scheduler.run_instance(ctxt, 'topic', request_spec=request_spec)
# assert that the instance goes to ERROR state
self._assert_state({'vm_state': vm_states.ERROR,
'task_state': task_states.STARTING})

def test_no_valid_host_exception_on_run_without_id(self):
"""check error handler doesn't raise if instance wasn't created"""

def NoValidHost_raiser(context, topic, *args, **kwargs):
raise exception.NoValidHost(_("Test NoValidHost exception"))
scheduler = manager.SchedulerManager()
self.stubs.Set(TestDriver, 'schedule', NoValidHost_raiser)
ctxt = context.get_admin_context()
request_spec = {'instance_properties': {}}
scheduler.run_instance(ctxt, 'topic', request_spec=request_spec)
# No error

def test_show_host_resources_no_project(self):
"""No instance are running on the given host."""

Expand Down

0 comments on commit baf7e02

Please sign in to comment.