From 20294279ee1d6d82dbb87c4c29e3a8b9fd0cb8bd Mon Sep 17 00:00:00 2001 From: Jian Wen Date: Mon, 14 Jan 2013 19:13:24 +0800 Subject: [PATCH] Handle compute node not available for live migration This patch handles exception.ComputeServiceUnavailable by restoring instance's vm_state and instance's task_state after live migration failure caused by unavailable source/dest compute node. Raises detailed HTTPBadRequest explanation for this exception. Fixes bug 973393 and bug 1051881 Conflicts: nova/scheduler/driver.py nova/scheduler/manager.py nova/tests/api/openstack/compute/contrib/test_admin_actions.py nova/tests/scheduler/test_scheduler.py Change-Id: If825b61fad9c4e3030f2e6c5002907255eaf3661 --- .../compute/contrib/admin_actions.py | 2 + nova/exception.py | 2 +- nova/scheduler/driver.py | 7 ++- nova/scheduler/manager.py | 11 ++++ .../compute/contrib/test_admin_actions.py | 61 ++++++++++++++++--- nova/tests/scheduler/test_scheduler.py | 55 +++++++++++++++++ 6 files changed, 125 insertions(+), 13 deletions(-) diff --git a/nova/api/openstack/compute/contrib/admin_actions.py b/nova/api/openstack/compute/contrib/admin_actions.py index fb8916cbff2..0350334ab55 100644 --- a/nova/api/openstack/compute/contrib/admin_actions.py +++ b/nova/api/openstack/compute/contrib/admin_actions.py @@ -286,6 +286,8 @@ def _migrate_live(self, req, id, body): instance = self.compute_api.get(context, id) self.compute_api.live_migrate(context, instance, block_migration, disk_over_commit, host) + except exception.ComputeServiceUnavailable as ex: + raise exc.HTTPBadRequest(explanation=str(ex)) except Exception: msg = _("Live migration of instance %(id)s to host %(host)s" " failed") % locals() diff --git a/nova/exception.py b/nova/exception.py index 2eeef046a9a..64d31a1b1e2 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -359,7 +359,7 @@ class ComputeResourcesUnavailable(ServiceUnavailable): class ComputeServiceUnavailable(ServiceUnavailable): - message = _("Compute service is unavailable at this time.") + message = _("Compute service of %(host)s is unavailable at this time.") class UnableToMigrateToSelf(Invalid): diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index 0ca1d7b89ff..91d05a944df 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -274,8 +274,11 @@ def _live_migration_dest_check(self, context, instance_ref, dest): """ # Checking dest exists and compute node. - dservice_refs = db.service_get_all_compute_by_host(context, dest) - dservice_ref = dservice_refs[0] + try: + dservice_refs = db.service_get_all_compute_by_host(context, dest) + dservice_ref = dservice_refs[0] + except exception.NotFound: + raise exception.ComputeServiceUnavailable(host=dest) # Checking dest host is alive. if not utils.service_is_up(dservice_ref): diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 09eb7c23586..b186f7d6c89 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -24,6 +24,7 @@ import functools import sys +from nova.compute import task_states from nova.compute import utils as compute_utils from nova.compute import vm_states from nova import db @@ -88,6 +89,16 @@ def live_migration(self, context, instance, dest, return self.driver.schedule_live_migration( context, instance, dest, block_migration, disk_over_commit) + except exception.ComputeServiceUnavailable as ex: + request_spec = {'instance_properties': { + 'uuid': instance['uuid'], }, + } + with excutils.save_and_reraise_exception(): + self._set_vm_state_and_notify('live_migration', + dict(vm_state=instance['vm_state'], + task_state=None, + expected_task_state=task_states.MIGRATING,), + context, ex, request_spec) except Exception as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify('live_migration', diff --git a/nova/tests/api/openstack/compute/contrib/test_admin_actions.py b/nova/tests/api/openstack/compute/contrib/test_admin_actions.py index 22fac7e6a83..49da60cb66c 100644 --- a/nova/tests/api/openstack/compute/contrib/test_admin_actions.py +++ b/nova/tests/api/openstack/compute/contrib/test_admin_actions.py @@ -62,13 +62,6 @@ def fake_compute_api_get(self, context, instance_id): 'task_state': None} -def fake_scheduler_api_live_migration(self, context, dest, - block_migration=False, - disk_over_commit=False, instance=None, - instance_id=None, topic=None): - return None - - class AdminActionsTest(test.TestCase): _actions = ('pause', 'unpause', 'suspend', 'resume', 'migrate', @@ -91,9 +84,10 @@ def setUp(self): self.UUID = utils.gen_uuid() for _method in self._methods: self.stubs.Set(compute.API, _method, fake_compute_api) - self.stubs.Set(scheduler_rpcapi.SchedulerAPI, - 'live_migration', - fake_scheduler_api_live_migration) + self.flags( + osapi_compute_extension=[ + 'nova.api.openstack.compute.contrib.select_extensions'], + osapi_compute_ext_list=['Admin_actions']) def test_admin_api_actions(self): app = fakes.wsgi_app() @@ -144,7 +138,16 @@ def fake_update(inst, context, instance, task_state, expected_task_state): return None + def fake_scheduler_api_live_migration(self, context, dest, + block_migration=False, + disk_over_commit=False, instance=None, + instance_id=None, topic=None): + return None + self.stubs.Set(compute.API, 'update', fake_update) + self.stubs.Set(scheduler_rpcapi.SchedulerAPI, + 'live_migration', + fake_scheduler_api_live_migration) res = req.get_response(app) self.assertEqual(res.status_int, 202) @@ -168,6 +171,44 @@ def test_migrate_live_missing_dict_param(self): res = req.get_response(app) self.assertEqual(res.status_int, 400) + def test_migrate_live_compute_service_unavailable(self): + ctxt = context.get_admin_context() + ctxt.user_id = 'fake' + ctxt.project_id = 'fake' + ctxt.is_admin = True + app = fakes.wsgi_app(fake_auth_context=ctxt) + req = webob.Request.blank('/v2/fake/servers/%s/action' % self.UUID) + req.method = 'POST' + req.body = jsonutils.dumps({ + 'os-migrateLive': { + 'host': 'hostname', + 'block_migration': False, + 'disk_over_commit': False, + } + }) + req.content_type = 'application/json' + + def fake_update(inst, context, instance, + task_state, expected_task_state): + return None + + def fake_scheduler_api_live_migration(context, dest, + block_migration=False, + disk_over_commit=False, instance=None, + instance_id=None, topic=None): + raise exception.ComputeServiceUnavailable(host='host') + + self.stubs.Set(compute.API, 'update', fake_update) + self.stubs.Set(scheduler_rpcapi.SchedulerAPI, + 'live_migration', + fake_scheduler_api_live_migration) + + res = req.get_response(app) + self.assertEqual(res.status_int, 400) + self.assertIn( + unicode(exception.ComputeServiceUnavailable(host='host')), + res.body) + class CreateBackupTests(test.TestCase): diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py index af297b58985..0d16b8e437f 100644 --- a/nova/tests/scheduler/test_scheduler.py +++ b/nova/tests/scheduler/test_scheduler.py @@ -24,6 +24,7 @@ from nova.compute import api as compute_api from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi +from nova.compute import task_states from nova.compute import utils as compute_utils from nova.compute import vm_states from nova import context @@ -187,6 +188,37 @@ def test_create_volume_no_valid_host_puts_volume_in_error(self): self.assertRaises(exception.NoValidHost, self.manager.create_volume, self.context, '1', '2') + def test_live_migration_compute_service_notavailable(self): + inst = {"uuid": "fake-instance-id", + "vm_state": vm_states.ACTIVE, + "task_state": task_states.MIGRATING, } + + dest = 'fake_host' + block_migration = False + disk_over_commit = False + + self._mox_schedule_method_helper('schedule_live_migration') + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') + self.mox.StubOutWithMock(db, 'instance_update_and_get_original') + + self.manager.driver.schedule_live_migration(self.context, + inst, dest, block_migration, disk_over_commit).AndRaise( + exception.ComputeServiceUnavailable(host="src")) + db.instance_update_and_get_original(self.context, inst["uuid"], + {"vm_state": inst['vm_state'], + "task_state": None, + "expected_task_state": task_states.MIGRATING, + }).AndReturn((inst, inst)) + compute_utils.add_instance_fault_from_exc(self.context, inst["uuid"], + mox.IsA(exception.ComputeServiceUnavailable), + mox.IgnoreArg()) + + self.mox.ReplayAll() + self.assertRaises(exception.ComputeServiceUnavailable, + self.manager.live_migration, + self.context, inst, dest, block_migration, + disk_over_commit) + def test_prep_resize_no_valid_host_back_in_active_state(self): fake_instance_uuid = 'fake-instance-id' inst = {"vm_state": "", "task_state": ""} @@ -477,6 +509,29 @@ def test_live_migration_compute_src_not_alive(self): block_migration=block_migration, disk_over_commit=disk_over_commit) + def test_live_migration_compute_dest_not_exist(self): + # Raise exception when dest compute node does not exist. + + self.mox.StubOutWithMock(self.driver, '_live_migration_src_check') + self.mox.StubOutWithMock(db, 'service_get_all_compute_by_host') + + dest = 'fake_host2' + block_migration = False + disk_over_commit = False + instance = self._live_migration_instance() + + self.driver._live_migration_src_check(self.context, instance) + # Compute down + db.service_get_all_compute_by_host(self.context, + dest).AndRaise(exception.NotFound()) + + self.mox.ReplayAll() + self.assertRaises(exception.ComputeServiceUnavailable, + self.driver.schedule_live_migration, self.context, + instance=instance, dest=dest, + block_migration=block_migration, + disk_over_commit=disk_over_commit) + def test_live_migration_compute_dest_not_alive(self): """Raise exception when dest compute node is not alive."""