This repository has been archived by the owner on Sep 12, 2022. It is now read-only.
/
supervisor_handlers.go
66 lines (59 loc) · 2.2 KB
/
supervisor_handlers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package supervisor
import (
"context"
"github.com/palantir/stacktrace"
"go.uber.org/atomic"
"go.uber.org/zap"
"github.com/Raphy42/weekend/core/errors"
"github.com/Raphy42/weekend/core/logger"
"github.com/Raphy42/weekend/core/scheduler"
)
func (s *Supervisor) handleSchedule(ctx context.Context, payload *scheduler.ScheduleMessagePayload) error {
if _, ok := s.restarts[payload.ManifestID]; !ok {
s.restarts[payload.ManifestID] = atomic.NewInt32(0)
}
spec, ok := s.specLut[payload.ManifestID]
if !ok {
return stacktrace.NewError("no spec associated with manifest ManifestID '%s'", payload.ManifestID)
}
handle, err := s.scheduler.Schedule(ctx, spec.Manifest, spec.Args)
if err != nil {
return stacktrace.Propagate(err, "could not schedule manifest '%s'", spec.Manifest.Name)
}
s.children[handle.ID] = handle
return nil
}
func (s *Supervisor) handleFailure(ctx context.Context, payload *scheduler.FailureMessagePayload) error {
log := logger.FromContext(ctx)
if payload.Error != nil {
spec, ok := s.specLut[payload.ManifestID]
if !ok {
return stacktrace.NewError("no spec associated with manifest ManifestID '%s'", payload.ManifestID)
}
strategy := spec.Strategy
switch strategy.Restart {
case PermanentRestartStrategy:
log.Debug("re-scheduling", zap.Stringer("wk.manifest.id", payload.ManifestID))
return s.restart(ctx, payload.ManifestID, payload.HandleID, payload.Error)
case TransientRestartStrategy:
if errors.IsTransient(payload.Error) {
log.Debug("re-scheduling", zap.Stringer("wk.manifest.id", payload.ManifestID))
return s.restart(ctx, payload.ManifestID, payload.HandleID, payload.Error)
}
log.Debug("not re-scheduling due to restart strategy",
zap.Stringer("wk.manifest.id", payload.ManifestID),
)
return stacktrace.Propagate(payload.Error,
"error was not transient, terminating supervision",
)
case TemporaryRestartStrategy:
// according to strategy we don't terminate the whole supervision tree on error
log.Debug("not re-scheduling due to restart strategy",
zap.Stringer("wk.manifest.id", payload.ManifestID),
)
default:
panic(stacktrace.NewErrorWithCode(errors.EUnreachable, "invalid supervision strategy"))
}
}
return nil
}