diff --git a/nova/compute/api.py b/nova/compute/api.py index 531aa441488..bc51e9b7303 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -33,6 +33,7 @@ from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi from nova.compute import task_states +from nova.compute import utils as compute_utils from nova.compute import vm_states from nova.consoleauth import rpcapi as consoleauth_rpcapi from nova import crypto @@ -906,21 +907,23 @@ def _delete(self, context, instance): host=src_host, cast=False, reservations=downsize_reservations) - services = self.db.service_get_all_compute_by_host( - context.elevated(), instance['host']) is_up = False + bdms = self.db.block_device_mapping_get_all_by_instance( + context, instance["uuid"]) #Note(jogo): db allows for multiple compute services per host + try: + services = self.db.service_get_all_compute_by_host( + context.elevated(), instance['host']) + except exception.ComputeHostNotFound: + services = [] for service in services: if utils.service_is_up(service): is_up = True self.compute_rpcapi.terminate_instance(context, instance) break - if is_up == False: + if not is_up: # If compute node isn't up, just delete from DB - LOG.warning(_('host for instance is down, deleting from ' - 'database'), instance=instance) - self.db.instance_destroy(context, instance['uuid']) - + self._local_delete(context, instance, bdms) if reservations: QUOTAS.commit(context, reservations) except exception.InstanceNotFound: @@ -932,6 +935,45 @@ def _delete(self, context, instance): if reservations: QUOTAS.rollback(context, reservations) + def _local_delete(self, context, instance, bdms): + LOG.warning(_('host for instance is down, deleting from ' + 'database'), instance=instance) + instance_uuid = instance['uuid'] + self.db.instance_info_cache_delete(context, instance_uuid) + compute_utils.notify_about_instance_usage( + context, instance, "delete.start") + + elevated = context.elevated() + self.network_api.deallocate_for_instance(elevated, + instance) + self.db.instance_destroy(context, instance_uuid) + system_meta = self.db.instance_system_metadata_get(context, + instance_uuid) + + # cleanup volumes + for bdm in bdms: + if bdm['volume_id']: + volume = self.volume_api.get(context, bdm['volume_id']) + # NOTE(vish): We don't have access to correct volume + # connector info, so just pass a fake + # connector. This can be improved when we + # expose get_volume_connector to rpc. + connector = {'ip': '127.0.0.1', 'initiator': 'iqn.fake'} + self.volume_api.terminate_connection(context, + volume, + connector) + self.volume_api.detach(elevated, volume) + if bdm['delete_on_termination']: + self.volume_api.delete(context, volume) + self.db.block_device_mapping_destroy(context, bdm['id']) + instance = self._instance_update(context, + instance_uuid, + vm_state=vm_states.DELETED, + task_state=None, + terminated_at=timeutils.utcnow()) + compute_utils.notify_about_instance_usage( + context, instance, "delete.end", system_metadata=system_meta) + # NOTE(maoy): we allow delete to be called no matter what vm_state says. @wrap_check_policy @check_instance_lock diff --git a/nova/network/manager.py b/nova/network/manager.py index 02bd4097d02..6599184ca3f 100644 --- a/nova/network/manager.py +++ b/nova/network/manager.py @@ -223,7 +223,7 @@ def _rpc_allocate_fixed_ip(self, context, instance_id, network_id, network = self._get_network_by_id(context, network_id) return self.allocate_fixed_ip(context, instance_id, network, **kwargs) - def deallocate_fixed_ip(self, context, address, host=None): + def deallocate_fixed_ip(self, context, address, host=None, teardown=True): """Call the superclass deallocate_fixed_ip if i'm the correct host otherwise call to the correct host""" fixed_ip = self.db.fixed_ip_get_by_address(context, address) @@ -233,18 +233,27 @@ def deallocate_fixed_ip(self, context, address, host=None): # NOTE(tr3buchet): but if we are, host came from instance['host'] if not network['multi_host']: host = network['host'] - if host != self.host: - # need to call deallocate_fixed_ip on correct network host - topic = rpc.queue_get_for(context, FLAGS.network_topic, host) - args = {'address': address, - 'host': host} - rpc.call(context, topic, - {'method': 'deallocate_fixed_ip', - 'args': args}) - else: - # i am the correct host, run here - super(RPCAllocateFixedIP, self).deallocate_fixed_ip(context, - address) + if host == self.host: + # NOTE(vish): deallocate the fixed ip locally + return super(RPCAllocateFixedIP, self).deallocate_fixed_ip(context, + address) + + if network['multi_host']: + service = self.db.service_get_by_host_and_topic(context, + host, + 'network') + if not service or not utils.service_is_up(service): + # NOTE(vish): deallocate the fixed ip locally but don't + # teardown network devices + return super(RPCAllocateFixedIP, self).deallocate_fixed_ip( + context, address, teardown=False) + + topic = rpc.queue_get_for(context, FLAGS.network_topic, host) + args = {'address': address, + 'host': host} + rpc.call(context, topic, + {'method': 'deallocate_fixed_ip', + 'args': args}) def wrap_check_policy(func): @@ -594,14 +603,24 @@ def disassociate_floating_ip(self, context, address, # send to correct host, unless i'm the correct host network = self._get_network_by_id(context, fixed_ip['network_id']) + interface = FLAGS.public_interface or floating_ip['interface'] if network['multi_host']: instance = self.db.instance_get_by_uuid(context, fixed_ip['instance_uuid']) - host = instance['host'] + service = self.db.service_get_by_host_and_topic( + context, instance['host'], 'network') + if service and utils.service_is_up(service): + host = instance['host'] + else: + # NOTE(vish): if the service is down just deallocate the data + # locally. Set the host to local so the call will + # not go over rpc and set interface to None so the + # teardown in the driver does not happen. + host = self.host + interface = None else: host = network['host'] - interface = FLAGS.public_interface or floating_ip['interface'] if host == self.host: # i'm the correct host self._disassociate_floating_ip(context, address, interface) @@ -618,8 +637,9 @@ def _disassociate_floating_ip(self, context, address, interface): # disassociate floating ip fixed_address = self.db.floating_ip_disassociate(context, address) - # go go driver time - self.l3driver.remove_floating_ip(address, fixed_address, interface) + if interface: + # go go driver time + self.l3driver.remove_floating_ip(address, fixed_address, interface) payload = dict(project_id=context.project_id, floating_ip=address) notifier.notify(context, notifier.publisher_id("network"), @@ -1284,7 +1304,7 @@ def allocate_fixed_ip(self, context, instance_id, network, **kwargs): self._setup_network_on_host(context, network) return address - def deallocate_fixed_ip(self, context, address, host=None): + def deallocate_fixed_ip(self, context, address, host=None, teardown=True): """Returns a fixed ip to the pool.""" fixed_ip_ref = self.db.fixed_ip_get_by_address(context, address) vif_id = fixed_ip_ref['virtual_interface_id'] @@ -1301,29 +1321,31 @@ def deallocate_fixed_ip(self, context, address, host=None): self.instance_dns_manager.delete_entry(n, self.instance_dns_domain) - network = self._get_network_by_id(context, fixed_ip_ref['network_id']) - self._teardown_network_on_host(context, network) - - if FLAGS.force_dhcp_release: - dev = self.driver.get_dev(network) - # NOTE(vish): The below errors should never happen, but there may - # be a race condition that is causing them per - # https://code.launchpad.net/bugs/968457, so we log - # an error to help track down the possible race. - msg = _("Unable to release %s because vif doesn't exist.") - if not vif_id: - LOG.error(msg % address) - return - - vif = self.db.virtual_interface_get(context, vif_id) - - if not vif: - LOG.error(msg % address) - return - - # NOTE(vish): This forces a packet so that the release_fixed_ip - # callback will get called by nova-dhcpbridge. - self.driver.release_dhcp(dev, address, vif['address']) + if teardown: + network = self._get_network_by_id(context, + fixed_ip_ref['network_id']) + self._teardown_network_on_host(context, network) + + if FLAGS.force_dhcp_release: + dev = self.driver.get_dev(network) + # NOTE(vish): The below errors should never happen, but there + # may be a race condition that is causing them per + # https://code.launchpad.net/bugs/968457, so we log + # an error to help track down the possible race. + msg = _("Unable to release %s because vif doesn't exist.") + if not vif_id: + LOG.error(msg % address) + return + + vif = self.db.virtual_interface_get(context, vif_id) + + if not vif: + LOG.error(msg % address) + return + + # NOTE(vish): This forces a packet so that the release_fixed_ip + # callback will get called by nova-dhcpbridge. + self.driver.release_dhcp(dev, address, vif['address']) self.db.fixed_ip_update(context, address, {'allocated': False, @@ -1831,9 +1853,10 @@ def _allocate_fixed_ips(self, context, instance_id, host, networks, self.allocate_fixed_ip(context, instance_id, network, address=address) - def deallocate_fixed_ip(self, context, address, host=None): + def deallocate_fixed_ip(self, context, address, host=None, teardown=True): """Returns a fixed ip to the pool.""" - super(FlatManager, self).deallocate_fixed_ip(context, address) + super(FlatManager, self).deallocate_fixed_ip(context, address, host, + teardown) self.db.fixed_ip_disassociate(context, address) def _setup_network_on_host(self, context, network): diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 14e830c7107..8bebdf88a05 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -3002,6 +3002,32 @@ def test_delete_in_resized(self): db.instance_destroy(self.context, instance['uuid']) + def test_delete_with_down_host(self): + self.network_api_called = False + + def dummy(*args, **kwargs): + self.network_api_called = True + pass + self.stubs.Set(self.compute_api.network_api, 'deallocate_for_instance', + dummy) + + #use old time to disable machine + old_time = datetime.datetime(2012, 4, 1) + + instance, instance_uuid = self._run_instance(params={ + 'host': FLAGS.host}) + timeutils.set_time_override(old_time) + self.compute_api.delete(self.context, instance) + timeutils.clear_time_override() + + self.assertEqual(instance['task_state'], None) + self.assertTrue(self.network_api_called) + + #local delete, so db should be clean + self.assertRaises(exception.InstanceNotFound, db.instance_destroy, + self.context, + instance['uuid']) + def test_repeated_delete_quota(self): in_use = {'instances': 1}