Skip to content

Commit

Permalink
Ensures that when the executor returns a mutex wait cancellation error,
Browse files Browse the repository at this point in the history
the resolver loop returns ErrRestart.

This ensures that when the migration-inactive flag drops upon migration
commencement, we do not cause the unit to go into an error state, in
turn causing migration validation to fail.

Instead we mimic the behaviour for errors determined to be due to
fortress lockdown.
  • Loading branch information
manadart committed Jun 16, 2021
1 parent 35015d4 commit bec7c38
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
2 changes: 1 addition & 1 deletion worker/uniter/operation/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (x *executor) Run(op Operation, remoteStateChange <-chan remotestate.Snapsh
if op.NeedsGlobalMachineLock() {
releaser, err := x.acquireMachineLock(op.String())
if err != nil {
return errors.Annotatef(err, "could not acquire %q lock for %s", op, x.unitName)
return errors.Annotatef(err, "acquiring %q lock for %s", op, x.unitName)
}
defer x.logger.Debugf("lock released for %s", x.unitName)
defer releaser()
Expand Down
15 changes: 15 additions & 0 deletions worker/uniter/resolver/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package resolver
import (
"github.com/juju/charm/v7/hooks"
"github.com/juju/errors"
"github.com/juju/mutex"

"github.com/juju/juju/core/lxdprofile"
"github.com/juju/juju/worker/fortress"
Expand All @@ -20,6 +21,7 @@ var logger interface{}
// Logger represents the logging methods used in this package.
type Logger interface {
Errorf(string, ...interface{})
Warningf(string, ...interface{})
Debugf(string, ...interface{})
Tracef(string, ...interface{})
}
Expand Down Expand Up @@ -114,6 +116,19 @@ func Loop(cfg LoopConfig, localState *LocalState) error {
cfg.Logger.Tracef("running op: %v", op)
if err := cfg.Executor.Run(op, remoteStateChanged); err != nil {
close(done)

if errors.Cause(err) == mutex.ErrCancelled {
// If the lock acquisition was cancelled (such as when the
// migration-inactive flag drops, we do not want the
// resolver to surface that error. This puts the agent into
// the "failed" state, which causes the initial migration
// validation phase to fail.
// The safest thing to do is to bounce the loop and
// reevaluate our state, which is what happens upon a
// fortress error anyway (uniter.TranslateFortressErrors).
cfg.Logger.Warningf("executor lock acquisition cancelled")
return ErrRestart
}
return errors.Trace(err)
}
close(done)
Expand Down
31 changes: 28 additions & 3 deletions worker/uniter/resolver/loop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/juju/charm/v7"
"github.com/juju/charm/v7/hooks"
"github.com/juju/loggo"

"github.com/juju/mutex"
envtesting "github.com/juju/testing"
jc "github.com/juju/testing/checkers"
gc "gopkg.in/check.v1"
Expand Down Expand Up @@ -259,7 +259,7 @@ func (s *LoopSuite) TestLoopWithChange(c *gc.C) {
}

func (s *LoopSuite) TestRunFails(c *gc.C) {
s.executor.SetErrors(errors.New("Run fails"))
s.executor.SetErrors(errors.New("run fails"))
s.resolver = resolver.ResolverFunc(func(
_ resolver.LocalState,
_ remotestate.Snapshot,
Expand All @@ -268,7 +268,7 @@ func (s *LoopSuite) TestRunFails(c *gc.C) {
return mockOp{}, nil
})
_, err := s.loop()
c.Assert(err, gc.ErrorMatches, "Run fails")
c.Assert(err, gc.ErrorMatches, "run fails")
}

func (s *LoopSuite) TestNextOpFails(c *gc.C) {
Expand Down Expand Up @@ -412,6 +412,31 @@ func (s *LoopSuite) testCheckCharmUpgradeCallsRun(c *gc.C) {
s.executor.CheckCallNames(c, "State", "State", "Run", "State")
}

func (s *LoopSuite) TestCancelledLockAcquisitionCausesRestart(c *gc.C) {
s.executor = &mockOpExecutor{
Executor: nil,
Stub: envtesting.Stub{},
st: operation.State{
Started: true,
Kind: operation.Continue,
},
run: func(operation.Operation, <-chan remotestate.Snapshot) error {
return mutex.ErrCancelled
},
}

s.resolver = resolver.ResolverFunc(func(
_ resolver.LocalState,
_ remotestate.Snapshot,
_ operation.Factory,
) (operation.Operation, error) {
return &mockOp{}, nil
})

_, err := s.loop()
c.Assert(err, gc.Equals, resolver.ErrRestart)
}

func waitChannel(c *gc.C, ch <-chan interface{}, activity string) interface{} {
select {
case v := <-ch:
Expand Down

0 comments on commit bec7c38

Please sign in to comment.