diff --git a/services/chaospod.go b/services/chaospod.go index f5839262d..016794a96 100644 --- a/services/chaospod.go +++ b/services/chaospod.go @@ -310,8 +310,11 @@ func (m *chaosPodService) GenerateChaosPodOfDisruption(disruption *chaosv1beta1. // to give time for cleaning activeDeadlineSeconds := int64(disruption.RemainingDuration().Seconds()) + 10 + // It can cause abnormalities in the status of a Disruption if injector pods consider themselves complete in the ~second before the chaos-controller believes the disruption is complete. + // To avoid making our termination state machine logic more complicated, we will pad the injector pod duration by two seconds. + // See https://github.com/DataDog/chaos-controller/issues/748 args = append(args, - "--deadline", time.Now().Add(disruption.RemainingDuration()).Format(time.RFC3339)) + "--deadline", time.Now().Add(chaostypes.InjectorPadDuration).Add(disruption.RemainingDuration()).Format(time.RFC3339)) chaosPod = corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/services/chaospod_test.go b/services/chaospod_test.go index 4e077b362..a68fb9246 100644 --- a/services/chaospod_test.go +++ b/services/chaospod_test.go @@ -728,7 +728,7 @@ var _ = Describe("Chaos Pod Service", func() { args.NotInjectedBefore = notInjectedBefore expectedArgs = args.CreateCmdArgs(subSpec.GenerateArgs()) - expectedArgs = append(expectedArgs, "--deadline", time.Now().Add(disruption.RemainingDuration()).Format(time.RFC3339)) + expectedArgs = append(expectedArgs, "--deadline", time.Now().Add(chaostypes.InjectorPadDuration).Add(disruption.RemainingDuration()).Format(time.RFC3339)) // Action chaosPods, err = chaosPodService.GenerateChaosPodsOfDisruption(&disruption, DefaultTargetName, DefaultTargetNodeName, targetContainers, DefaultTargetPodIp) diff --git a/types/types.go b/types/types.go index 86f3405be..bacdca05d 100644 --- a/types/types.go +++ b/types/types.go @@ -125,6 +125,9 @@ const ( ChaosPodFinalizer = finalizerPrefix + "/chaos-pod" PulsingDisruptionMinimumDuration = 500 * time.Millisecond + // InjectorPadDuration is the length of time we extend the injector's duration on top of the disruption's duration, + // in order to ensure the manager stops the disruption prior to the injectors dying + InjectorPadDuration = 2 * time.Second // InjectorCgroupClassID is linked to the TC tree in the injector network disruption. // Also used in the DNS Disruption to allow combined Network + DNS Disruption