Skip to content

Commit

Permalink
bug 944145: race condition causes VM's state to be SHUTOFF
Browse files Browse the repository at this point in the history
ensure we close down the contention window between _sync_power_states
and live migration/host evacuation.

Change-Id: Ie6cbd9bf2eee206b4a821a4b77a6dced409f3983
  • Loading branch information
Armando Migliaccio committed Mar 7, 2012
1 parent 0bd3b47 commit ec20076
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 8 deletions.
46 changes: 42 additions & 4 deletions nova/compute/manager.py
Expand Up @@ -2265,10 +2265,48 @@ def _sync_power_states(self, context):
vm_instance = self.driver.get_info(db_instance)
vm_power_state = vm_instance['state']
except exception.InstanceNotFound:
LOG.warn(_("Instance found in database but not known by "
"hypervisor. Setting power state to NOSTATE"),
locals(), instance=db_instance)
vm_power_state = power_state.NOSTATE
# This exception might have been caused by a race condition
# between _sync_power_states and live migrations. Two cases
# are possible as documented below. To this aim, refresh the
# DB instance state.
try:
u = self.db.instance_get_by_uuid(context,
db_instance['uuid'])
if self.host != u['host']:
# on the sending end of nova-compute _sync_power_state
# may have yielded to the greenthread performing a live
# migration; this in turn has changed the resident-host
# for the VM; However, the instance is still active, it
# is just in the process of migrating to another host.
# This implies that the compute source must relinquish
# control to the compute destination.
LOG.info(_("During the sync_power process the "
"instance %(uuid)s has moved from "
"host %(src)s to host %(dst)s") %
{'uuid': db_instance['uuid'],
'src': self.host,
'dst': u['host']})
elif (u['host'] == self.host and
u['vm_state'] == vm_states.MIGRATING):
# on the receiving end of nova-compute, it could happen
# that the DB instance already report the new resident
# but the actual VM has not showed up on the hypervisor
# yet. In this case, let's allow the loop to continue
# and run the state sync in a later round
LOG.info(_("Instance %s is in the process of "
"migrating to this host. Wait next "
"sync_power cycle before setting "
"power state to NOSTATE")
% db_instance['uuid'])
else:
LOG.warn(_("Instance found in database but not "
"known by hypervisor. Setting power "
"state to NOSTATE"), locals(),
instance=db_instance)
vm_power_state = power_state.NOSTATE
except exception.InstanceNotFound:
# no need to update vm_state for deleted instances
continue

if vm_power_state == db_power_state:
continue
Expand Down
14 changes: 10 additions & 4 deletions nova/virt/xenapi/host.py
Expand Up @@ -23,6 +23,7 @@
import json
import random

from nova.compute import vm_states
from nova import context
from nova import db
from nova import exception
Expand Down Expand Up @@ -73,17 +74,22 @@ def host_maintenance_mode(self, host, mode):
instance = db.instance_get_by_uuid(ctxt, uuid)
vm_counter = vm_counter + 1

dest = _host_find(ctxt, self._session, host, host_ref)
db.instance_update(ctxt, instance.id,
{'host': dest,
'vm_state': vm_states.MIGRATING})
self._session.call_xenapi('VM.pool_migrate',
vm_ref, host_ref, {})
new_host = _host_find(ctxt, self._session,
host, host_ref)
db.instance_update(ctxt,
instance.id, {'host': new_host})
migrations_counter = migrations_counter + 1
db.instance_update(ctxt, instance.id,
{'vm_state': vm_states.ACTIVE})
break
except self.XenAPI.Failure:
LOG.exception('Unable to migrate VM %(vm_ref)s'
'from %(host)s' % locals())
db.instance_update(ctxt, instance.id,
{'host': host,
'vm_state': vm_states.ACTIVE})
if vm_counter == migrations_counter:
return 'on_maintenance'
else:
Expand Down

0 comments on commit ec20076

Please sign in to comment.