Skip to content

Commit

Permalink
Fix missing pause reason in status
Browse files Browse the repository at this point in the history
* Add more `ExtendedDaemonSetStatusReason`
* Check also Init and Ephemeral container state
  • Loading branch information
clamoriniere committed Jan 25, 2021
1 parent 2c7a85b commit cb836bd
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 10 deletions.
12 changes: 12 additions & 0 deletions api/v1alpha1/extendeddaemonset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,18 @@ const (
ExtendedDaemonSetStatusRestartsTimeoutExceeded ExtendedDaemonSetStatusReason = "RestartsTimeoutExceeded"
// ExtendedDaemonSetStatusSlowStartTimeoutExceeded represents timeout on slow starts as the reason for the ExtendedDaemonSet status
ExtendedDaemonSetStatusSlowStartTimeoutExceeded ExtendedDaemonSetStatusReason = "SlowStartTimeoutExceeded"
// ExtendedDaemonSetStatusReasonErrImagePull represent ErrImagePull as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonErrImagePull ExtendedDaemonSetStatusReason = "ErrImagePull"
// ExtendedDaemonSetStatusReasonImagePullBackOff represent ImagePullBackOff as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonImagePullBackOff ExtendedDaemonSetStatusReason = "ImagePullBackOff"
// ExtendedDaemonSetStatusReasonCreateContainerConfigError represent CreateContainerConfigError as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonCreateContainerConfigError ExtendedDaemonSetStatusReason = "CreateContainerConfigError"
// ExtendedDaemonSetStatusReasonCreateContainerError represent CreateContainerError as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonCreateContainerError ExtendedDaemonSetStatusReason = "CreateContainerError"
// ExtendedDaemonSetStatusReasonPreStartHookError represent PreStartHookError as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonPreStartHookError ExtendedDaemonSetStatusReason = "PreStartHookError"
// ExtendedDaemonSetStatusReasonPostStartHookError represent PostStartHookError as the reason for the ExtendedDaemonSet status state
ExtendedDaemonSetStatusReasonPostStartHookError ExtendedDaemonSetStatusReason = "PostStartHookError"
// ExtendedDaemonSetStatusReasonUnknown represents an Unknown reason for the status state
ExtendedDaemonSetStatusReasonUnknown ExtendedDaemonSetStatusReason = "Unknown"
)
Expand Down
10 changes: 5 additions & 5 deletions controllers/extendeddaemonsetreplicaset/strategy/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func manageCanaryPodFailures(pods []*v1.Pod, params *Parameters, result *Result,
restartingPodStatus string

cannotStart bool
cannotStartPodReason string
cannotStartPodReason v1alpha1.ExtendedDaemonSetStatusReason
cannotStartPodStatus string
)

Expand All @@ -167,7 +167,7 @@ func manageCanaryPodFailures(pods []*v1.Pod, params *Parameters, result *Result,
cannotStart, cannotStartReason = podUtils.CannotStart(pod)
if cannotStart {
cannotStartPodStatus = fmt.Sprintf("Pod %s cannot start with reason: %s", pod.ObjectMeta.Name, string(cannotStartReason))
cannotStartPodReason = string(cannotStartReason)
cannotStartPodReason = cannotStartReason
} else if autoPauseEnabled && podUtils.PendingCreate(pod) && params.Strategy.Canary.AutoPause.MaxSlowStartDuration != nil {
if time.Now().After(pod.Status.StartTime.Time.Add(params.Strategy.Canary.AutoPause.MaxSlowStartDuration.Duration)) {
params.Logger.Info(
Expand All @@ -178,7 +178,7 @@ func manageCanaryPodFailures(pods []*v1.Pod, params *Parameters, result *Result,
cannotStart = true
cannotStartReason = v1alpha1.ExtendedDaemonSetStatusSlowStartTimeoutExceeded
cannotStartPodStatus = fmt.Sprintf("Pod %s cannot start with reason: %s", pod.ObjectMeta.Name, cannotStartReason)
cannotStartPodReason = string(cannotStartReason)
cannotStartPodReason = cannotStartReason
}
}

Expand Down Expand Up @@ -240,7 +240,7 @@ func manageCanaryPodFailures(pods []*v1.Pod, params *Parameters, result *Result,
metav1.NewTime(newRestartTime),
v1alpha1.ConditionTypePodRestarting,
v1.ConditionTrue,
cannotStartPodReason,
string(cannotStartPodReason),
restartingPodStatus,
false,
true,
Expand All @@ -263,7 +263,7 @@ func manageCanaryPodFailures(pods []*v1.Pod, params *Parameters, result *Result,
metav1.NewTime(now),
v1alpha1.ConditionTypePodCannotStart,
conditionStatus,
cannotStartPodReason,
string(cannotStartPodReason),
cannotStartPodStatus,
false,
true,
Expand Down
34 changes: 29 additions & 5 deletions pkg/controller/utils/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ func GetPodConditionFromList(conditions []v1.PodCondition, conditionType v1.PodC
return -1, nil
}

func containerStatusList(pod *v1.Pod) []v1.ContainerStatus {
containersStatus := append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...)
return append(containersStatus, pod.Status.EphemeralContainerStatuses...)
}

// HighestRestartCount checks if a pod in the Canary deployment is restarting
// This returns the count and the "reason" for the pod with the most restarts
func HighestRestartCount(pod *v1.Pod) (int, datadoghqv1alpha1.ExtendedDaemonSetStatusReason) {
Expand All @@ -108,7 +113,7 @@ func HighestRestartCount(pod *v1.Pod) (int, datadoghqv1alpha1.ExtendedDaemonSetS
reason datadoghqv1alpha1.ExtendedDaemonSetStatusReason
)

for _, s := range pod.Status.ContainerStatuses {
for _, s := range containerStatusList(pod) {
if s.RestartCount > restartCount {
restartCount = s.RestartCount
reason = datadoghqv1alpha1.ExtendedDaemonSetStatusReasonUnknown
Expand All @@ -126,7 +131,7 @@ func MostRecentRestart(pod *v1.Pod) (time.Time, datadoghqv1alpha1.ExtendedDaemon
restartTime time.Time
reason datadoghqv1alpha1.ExtendedDaemonSetStatusReason
)
for _, s := range pod.Status.ContainerStatuses {
for _, s := range containerStatusList(pod) {
if s.RestartCount != 0 && s.LastTerminationState != (v1.ContainerState{}) && s.LastTerminationState.Terminated != (&v1.ContainerStateTerminated{}) {
if s.LastTerminationState.Terminated.FinishedAt.After(restartTime) {
restartTime = s.LastTerminationState.Terminated.FinishedAt.Time
Expand Down Expand Up @@ -158,17 +163,36 @@ func IsCannotStartReason(reason string) bool {

// CannotStart returns true if the Pod is currently experiencing abnormal start condition
func CannotStart(pod *v1.Pod) (bool, datadoghqv1alpha1.ExtendedDaemonSetStatusReason) {
for _, s := range pod.Status.ContainerStatuses {
for _, s := range containerStatusList(pod) {
if s.State.Waiting != nil && IsCannotStartReason(s.State.Waiting.Reason) {
return true, datadoghqv1alpha1.ExtendedDaemonSetStatusReason(s.State.Waiting.Reason)
return true, convertReasonToEDSStatusReason(s.State.Waiting.Reason)
}
}
return false, datadoghqv1alpha1.ExtendedDaemonSetStatusReasonUnknown
}

func convertReasonToEDSStatusReason(reason string) datadoghqv1alpha1.ExtendedDaemonSetStatusReason {
t := datadoghqv1alpha1.ExtendedDaemonSetStatusReason(reason)
switch t {
case datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCLB,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonOOM,
datadoghqv1alpha1.ExtendedDaemonSetStatusRestartsTimeoutExceeded,
datadoghqv1alpha1.ExtendedDaemonSetStatusSlowStartTimeoutExceeded,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonErrImagePull,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonImagePullBackOff,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCreateContainerConfigError,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCreateContainerError,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonPreStartHookError,
datadoghqv1alpha1.ExtendedDaemonSetStatusReasonPostStartHookError:
return t
default:
return datadoghqv1alpha1.ExtendedDaemonSetStatusReasonUnknown
}
}

// PendingCreate returns true if the Pod is pending create (may be an eventually resolving state)
func PendingCreate(pod *v1.Pod) bool {
for _, s := range pod.Status.ContainerStatuses {
for _, s := range containerStatusList(pod) {
if s.State.Waiting != nil && s.State.Waiting.Reason == "ContainerCreating" {
return true
}
Expand Down
60 changes: 60 additions & 0 deletions pkg/controller/utils/pod/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package pod

import (
"reflect"
"testing"
"time"

Expand Down Expand Up @@ -278,3 +279,62 @@ func newPod(now metav1.Time, ready bool, beforeSec int) *v1.Pod {
},
}
}

func Test_convertReasonToEDSStatusReason(t *testing.T) {
tests := []struct {
reason string
want datadoghqv1alpha1.ExtendedDaemonSetStatusReason
}{
{
reason: "CrashLoopBackOff",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCLB,
},
{
reason: "OOMKilled",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonOOM,
},
{
reason: "RestartsTimeoutExceeded",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusRestartsTimeoutExceeded,
},
{
reason: "SlowStartTimeoutExceeded",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusSlowStartTimeoutExceeded,
},
{
reason: "ErrImagePull",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonErrImagePull,
},
{
reason: "ImagePullBackOff",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonImagePullBackOff,
},
{
reason: "CreateContainerConfigError",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCreateContainerConfigError,
},
{
reason: "CreateContainerError",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonCreateContainerError,
},
{
reason: "PreStartHookError",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonPreStartHookError,
},
{
reason: "PostStartHookError",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonPostStartHookError,
},
{
reason: "does not exist",
want: datadoghqv1alpha1.ExtendedDaemonSetStatusReasonUnknown,
},
}
for _, tt := range tests {
t.Run(tt.reason, func(t *testing.T) {
if got := convertReasonToEDSStatusReason(tt.reason); !reflect.DeepEqual(got, tt.want) {
t.Errorf("convertReasonToEDSStatusReason() = %v, want %v", got, tt.want)
}
})
}
}

0 comments on commit cb836bd

Please sign in to comment.