From 284f6ea64b0837981a24251825b19abef354a9a8 Mon Sep 17 00:00:00 2001 From: Brian Elliott Date: Mon, 8 Oct 2012 18:35:14 +0000 Subject: [PATCH] Set instance host field after resource claim Set the 'host' field on the instance after the resource tracker on the compute node has accepted the build. The field is set after resources are confirmed to be available while the COMPUTE_RESOURCES_SEMAPHORE is held. The semaphore ensures the resources usage values will be consistent even if the update_available_resource periodic task audit runs. bug 1060255 (cherry picked from commit 5fd7a9dba127bae812333196a5fa48a933212aeb) Also includes the following bugfix to the original patch: Set host prior to allocating network information. Make sure 'host' field on the instance is set before allocating network information. bug 1065004 (cherry picked from commit 2649f14673f8ef5ca257583f1cdf5fe57d4734b9) Change-Id: I92105ec14924960ac8ef7ca8c810783085314e10 --- nova/compute/manager.py | 11 +++--- nova/compute/resource_tracker.py | 24 +++++++++++++ nova/scheduler/chance.py | 2 +- nova/scheduler/driver.py | 8 ++--- nova/scheduler/filter_scheduler.py | 3 +- nova/tests/compute/test_resource_tracker.py | 36 +++++++++++-------- nova/tests/scheduler/test_chance_scheduler.py | 8 ++--- nova/tests/scheduler/test_scheduler.py | 2 +- 8 files changed, 60 insertions(+), 34 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index ebf329e2aea..c875da64201 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -479,17 +479,14 @@ def _run_instance(self, context, request_spec, self._notify_about_instance_usage( context, instance, "create.start", extra_usage_info=extra_usage_info) - network_info = self._allocate_network(context, instance, - requested_networks) + network_info = None try: limits = filter_properties.get('limits', {}) with self.resource_tracker.resource_claim(context, instance, limits): - # Resources are available to build this instance here, - # mark it as belonging to this host: - self._instance_update(context, instance['uuid'], - host=self.host, launched_on=self.host) + network_info = self._allocate_network(context, instance, + requested_networks) block_device_info = self._prep_block_device(context, instance) instance = self._spawn(context, instance, image_meta, @@ -534,7 +531,7 @@ def _log_original_error(): try: self._deallocate_network(context, instance) except Exception: - # do not attempt retry if network de-allocation occurs: + # do not attempt retry if network de-allocation failed: _log_original_error() raise diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index e5e1194a57b..fe56b1dad18 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -23,6 +23,7 @@ from nova import db from nova import exception from nova import flags +from nova import notifications from nova.openstack.common import cfg from nova.openstack.common import importutils from nova.openstack.common import jsonutils @@ -152,8 +153,18 @@ def begin_resource_claim(self, context, instance_ref, limits=None, failed. """ if self.disabled: + # compute_driver doesn't support resource tracking, just + # set the 'host' field and continue the build: + instance_ref = self._set_instance_host(context, + instance_ref['uuid']) return + # sanity check: + if instance_ref['host']: + LOG.warning(_("Host field should be not be set on the instance " + "until resources have been claimed."), + instance=instance_ref) + if not limits: limits = {} @@ -184,6 +195,8 @@ def begin_resource_claim(self, context, instance_ref, limits=None, if not self._can_claim_cpu(vcpus, vcpu_limit): return + instance_ref = self._set_instance_host(context, instance_ref['uuid']) + # keep track of this claim until we know whether the compute operation # was successful/completed: claim = Claim(instance_ref, timeout) @@ -196,6 +209,17 @@ def begin_resource_claim(self, context, instance_ref, limits=None, self._update(context, self.compute_node) return claim + def _set_instance_host(self, context, instance_uuid): + """Tag the instance as belonging to this host. This should be done + while the COMPUTE_RESOURCES_SEMPAHORE is being held so the resource + claim will not be lost if the audit process starts. + """ + values = {'host': self.host, 'launched_on': self.host} + (old_ref, instance_ref) = db.instance_update_and_get_original(context, + instance_uuid, values) + notifications.send_update(context, old_ref, instance_ref) + return instance_ref + def _can_claim_memory(self, memory_mb, memory_mb_limit): """Test if memory needed for a claim can be safely allocated""" # Installed memory and usage info: diff --git a/nova/scheduler/chance.py b/nova/scheduler/chance.py index f6a289f26f9..65806d4d517 100644 --- a/nova/scheduler/chance.py +++ b/nova/scheduler/chance.py @@ -68,7 +68,7 @@ def schedule_run_instance(self, context, request_spec, host = self._schedule(context, 'compute', request_spec, filter_properties) updated_instance = driver.instance_update_db(context, - instance_uuid, host) + instance_uuid) self.compute_rpcapi.run_instance(context, instance=updated_instance, host=host, requested_networks=requested_networks, diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index a72f3c26d16..0ca1d7b89ff 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -101,13 +101,13 @@ def cast_to_volume_host(context, host, method, **kwargs): LOG.debug(_("Casted '%(method)s' to volume '%(host)s'") % locals()) -def instance_update_db(context, instance_uuid, host): - '''Set the host and scheduled_at fields of an Instance. +def instance_update_db(context, instance_uuid): + '''Clear the host and set the scheduled_at field of an Instance. :returns: An Instance with the updated fields set properly. ''' now = timeutils.utcnow() - values = {'host': host, 'scheduled_at': now} + values = {'host': None, 'scheduled_at': now} return db.instance_update(context, instance_uuid, values) @@ -116,7 +116,7 @@ def cast_to_compute_host(context, host, method, **kwargs): instance_uuid = kwargs.get('instance_uuid', None) if instance_uuid: - instance_update_db(context, instance_uuid, host) + instance_update_db(context, instance_uuid) rpc.cast(context, rpc.queue_get_for(context, 'compute', host), diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index b2928177df7..e45fcc09945 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -142,8 +142,7 @@ def _provision_resource(self, context, weighted_host, request_spec, 'scheduler.run_instance.scheduled', notifier.INFO, payload) - updated_instance = driver.instance_update_db(context, - instance_uuid, weighted_host.host_state.host) + updated_instance = driver.instance_update_db(context, instance_uuid) self.compute_rpcapi.run_instance(context, instance=updated_instance, host=weighted_host.host_state.host, diff --git a/nova/tests/compute/test_resource_tracker.py b/nova/tests/compute/test_resource_tracker.py index 2dc01e415c7..0dccb0d15d5 100644 --- a/nova/tests/compute/test_resource_tracker.py +++ b/nova/tests/compute/test_resource_tracker.py @@ -22,6 +22,7 @@ from nova.compute import resource_tracker from nova.compute import task_states from nova.compute import vm_states +from nova import context from nova import db from nova import exception from nova.openstack.common import log as logging @@ -32,14 +33,6 @@ LOG = logging.getLogger(__name__) -class FakeContext(object): - def __init__(self, is_admin=False): - self.is_admin = is_admin - - def elevated(self): - return FakeContext(is_admin=True) - - class UnsupportedVirtDriver(driver.ComputeDriver): """Pretend version of a lame virt driver""" def get_available_resource(self): @@ -81,11 +74,13 @@ def setUp(self): self.flags(reserved_host_disk_mb=0, reserved_host_memory_mb=0) - self.context = FakeContext() + self.context = context.RequestContext('fake', 'fake') - self._instances = [] + self._instances = {} self.stubs.Set(db, 'instance_get_all_by_host', - lambda c, h: self._instances) + lambda c, h: self._instances.values()) + self.stubs.Set(db, 'instance_update_and_get_original', + self._fake_instance_update_and_get_original) def _create_compute_node(self, values=None): compute = { @@ -122,8 +117,10 @@ def _create_service(self, host="fakehost", compute=None): return service def _fake_instance(self, *args, **kwargs): + + instance_uuid = str(uuid.uuid1()) instance = { - 'uuid': str(uuid.uuid1()), + 'uuid': instance_uuid, 'vm_state': vm_states.BUILDING, 'task_state': None, 'memory_mb': 2, @@ -136,9 +133,17 @@ def _fake_instance(self, *args, **kwargs): } instance.update(kwargs) - self._instances.append(instance) + self._instances[instance_uuid] = instance return instance + def _fake_instance_update_and_get_original(self, context, instance_uuid, + values): + instance = self._instances[instance_uuid] + instance.update(values) + # the test doesn't care what the original instance values are, it's + # only used in the subsequent notification: + return (instance, instance) + def _tracker(self, unsupported=False): host = "fakehost" @@ -168,7 +173,8 @@ def testDisabled(self): def testDisabledClaim(self): # basic claim: - claim = self.tracker.begin_resource_claim(self.context, 1, 1) + instance = self._fake_instance() + claim = self.tracker.begin_resource_claim(self.context, instance) self.assertEqual(None, claim) def testDisabledInstanceClaim(self): @@ -200,7 +206,7 @@ def testDisabledUpdateUsage(self): class MissingServiceTestCase(BaseTestCase): def setUp(self): super(MissingServiceTestCase, self).setUp() - self.context = FakeContext(is_admin=True) + self.context = context.get_admin_context() self.tracker = self._tracker() def testMissingService(self): diff --git a/nova/tests/scheduler/test_chance_scheduler.py b/nova/tests/scheduler/test_chance_scheduler.py index 4fb9ab617ce..26cde055b2d 100644 --- a/nova/tests/scheduler/test_chance_scheduler.py +++ b/nova/tests/scheduler/test_chance_scheduler.py @@ -90,8 +90,8 @@ def inc_launch_index(*args): self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn( ['host1', 'host2', 'host3', 'host4']) random.random().AndReturn(.5) - driver.instance_update_db(ctxt, instance1['uuid'], - 'host3').WithSideEffects(inc_launch_index).AndReturn(instance1) + driver.instance_update_db(ctxt, instance1['uuid']).WithSideEffects( + inc_launch_index).AndReturn(instance1) compute_rpcapi.ComputeAPI.run_instance(ctxt, host='host3', instance=instance1, requested_networks=None, injected_files=None, admin_password=None, is_first_time=None, @@ -102,8 +102,8 @@ def inc_launch_index(*args): self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn( ['host1', 'host2', 'host3', 'host4']) random.random().AndReturn(.2) - driver.instance_update_db(ctxt, instance2['uuid'], - 'host1').WithSideEffects(inc_launch_index).AndReturn(instance2) + driver.instance_update_db(ctxt, instance2['uuid']).WithSideEffects( + inc_launch_index).AndReturn(instance2) compute_rpcapi.ComputeAPI.run_instance(ctxt, host='host1', instance=instance2, requested_networks=None, injected_files=None, admin_password=None, is_first_time=None, diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py index 83e9cffc8c5..af297b58985 100644 --- a/nova/tests/scheduler/test_scheduler.py +++ b/nova/tests/scheduler/test_scheduler.py @@ -712,7 +712,7 @@ def test_cast_to_compute_host_update_db_with_instance_uuid(self): timeutils.utcnow().AndReturn('fake-now') db.instance_update(self.context, 'fake_uuid', - {'host': host, 'scheduled_at': 'fake-now'}) + {'host': None, 'scheduled_at': 'fake-now'}) rpc.queue_get_for(self.context, 'compute', host).AndReturn(queue) rpc.cast(self.context, queue, {'method': method,