Skip to content

Commit

Permalink
Improve controller logic + fix tests flakiness
Browse files Browse the repository at this point in the history
  • Loading branch information
clamoriniere committed Dec 29, 2020
1 parent abbd9aa commit 8918039
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 25 deletions.
7 changes: 4 additions & 3 deletions chart/extendeddaemonset/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
replicaCount: 1
image:
repository: datadog/extendeddaemonset
tag: v0.3.0
#tag: 0.5.0-rc.1
tag: latest
pullPolicy: IfNotPresent
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
logLevel: "info"
logLevel: "debug"
# Allow ExtendedDaemonset controller to watch all namespaces
# Defaulted to false
clusterScope: false
Expand All @@ -29,7 +30,7 @@ podSecurityContext: {}
# fsGroup: 2000

securityContext: {}
# capabilities:
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
Expand Down
8 changes: 1 addition & 7 deletions controllers/extendeddaemonset/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,8 @@ func (r *Reconciler) updateInstanceWithCurrentRS(logger logr.Logger, daemonset *
logger.Error(err, "Failed to update ExtendedDaemonSet")
return newDaemonsetCopy, reconcile.Result{}, err
}
// Not need to update the status, `r.client.Update()` update both

// This ensures that the first client update respects the desired new status
newDaemonsetCopy.Status = newDaemonset.Status
logger.Info("Updating ExtendedDaemonSet status")
if err := r.client.Status().Update(context.TODO(), newDaemonsetCopy); err != nil {
logger.Error(err, "Failed to update ExtendedDaemonSet status")
return newDaemonsetCopy, reconcile.Result{}, err
}
return newDaemonsetCopy, reconcile.Result{}, nil
}

Expand Down
11 changes: 6 additions & 5 deletions controllers/extendeddaemonset/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
test "github.com/DataDog/extendeddaemonset/api/v1alpha1/test"
commontest "github.com/DataDog/extendeddaemonset/pkg/controller/test"
"github.com/DataDog/extendeddaemonset/pkg/controller/utils/comparison"
"github.com/google/go-cmp/cmp"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -643,7 +644,7 @@ func TestReconcileExtendedDaemonSet_updateInstanceWithCurrentRS(t *testing.T) {
}
daemonsetWithCanaryFailedNewStatus := daemonsetWithCanaryFailedOldStatus.DeepCopy()
{
daemonsetWithCanaryFailedNewStatus.ResourceVersion = "3"
daemonsetWithCanaryFailedNewStatus.ResourceVersion = "2"
daemonsetWithCanaryFailedNewStatus.Status.State = datadoghqv1alpha1.ExtendedDaemonSetStatusStateCanaryFailed
daemonsetWithCanaryFailedNewStatus.Status.Canary = nil
}
Expand Down Expand Up @@ -776,11 +777,11 @@ func TestReconcileExtendedDaemonSet_updateInstanceWithCurrentRS(t *testing.T) {
t.Errorf("ReconcileExtendedDaemonSet.updateInstanceWithCurrentRS() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !apiequality.Semantic.DeepEqual(got, tt.want) {
t.Errorf("ReconcileExtendedDaemonSet.updateInstanceWithCurrentRS() got = %#v, \n want %#v", got, tt.want)
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Errorf("ReconcileExtendedDaemonSet.updateInstanceWithCurrentRS() (-want +got):\n%s", diff)
}
if !reflect.DeepEqual(got1, tt.wantResult) {
t.Errorf("ReconcileExtendedDaemonSet.updateInstanceWithCurrentRS() gotResult = %v, \n wantResult %v", got1, tt.wantResult)
if diff := cmp.Diff(tt.wantResult, got1); diff != "" {
t.Errorf("ReconcileExtendedDaemonSet.updateInstanceWithCurrentRS().result (-want +got):\n%s", diff)
}
})
}
Expand Down
12 changes: 8 additions & 4 deletions controllers/extendeddaemonset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var _ = Describe("ExtendedDaemonSet Controller", func() {
const timeout = time.Second * 30
const interval = time.Second * 2

intString2 := intstr.FromInt(2)
intString1 := intstr.FromInt(1)
intString10 := intstr.FromInt(10)
namespace := testConfig.namespace
ctx := context.Background()
Expand All @@ -53,7 +53,7 @@ var _ = Describe("ExtendedDaemonSet Controller", func() {
edsOptions := &testutils.NewExtendedDaemonsetOptions{
CanaryStrategy: &datadoghqv1alpha1.ExtendedDaemonSetSpecStrategyCanary{
Duration: &metav1.Duration{Duration: 1 * time.Minute},
Replicas: &intString2,
Replicas: &intString1,
},
RollingUpdate: &datadoghqv1alpha1.ExtendedDaemonSetSpecStrategyRollingUpdate{
MaxUnavailable: &intString10,
Expand All @@ -74,7 +74,10 @@ var _ = Describe("ExtendedDaemonSet Controller", func() {
Name: eds.Status.ActiveReplicaSet,
}
Eventually(withERS(ersKey, ers, func() bool {
return ers.Status.Status == "active" && int(ers.Status.Available) == len(nodeList.Items)
// Info: we use ers.Status.Desired or ers.Status.Current because the pod status is not updated with the test FWK
// and so available and ready will never be updated
fmt.Fprintf(GinkgoWriter, "ERS status:\n%s\n", spew.Sdump(ers.Status))
return ers.Status.Status == "active" && int(ers.Status.Current) == len(nodeList.Items)
}), timeout, interval).Should(BeTrue())
})

Expand All @@ -100,7 +103,8 @@ var _ = Describe("ExtendedDaemonSet Controller", func() {
},
}
Eventually(withList(listOptions, canaryPods, "canary pods", func() bool {
return len(canaryPods.Items) == 2
fmt.Fprintf(GinkgoWriter, "canary pods nb: %d ", len(canaryPods.Items))
return len(canaryPods.Items) == 1
}), timeout, interval).Should(BeTrue())
})

Expand Down
22 changes: 18 additions & 4 deletions controllers/extendeddaemonsetreplicaset/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
}

if !datadoghqv1alpha1.IsDefaultedExtendedDaemonSet(daemonsetInstance) {
reqLogger.Info("Parent ExtendedDaemonSet is not defaulted, requeuing")
return reconcile.Result{RequeueAfter: time.Second}, nil
message := "Parent ExtendedDaemonSet is not defaulted, requeuing"
reqLogger.Info(message)
err = fmt.Errorf("parent ExtendedDaemonSet is not defaulted")
newStatus := replicaSetInstance.Status.DeepCopy()
// Updating the status with a new condition will trigger a new Event on the ExtendedDaemonSet-controller
// and so the ExtendedDamonset will be defaulted.
// It is better to only update a Resource Kind from its own controller, to avoid conccurent update.
conditions.UpdateErrorCondition(newStatus, now, err, message)
err = r.updateReplicaSet(replicaSetInstance, newStatus)
return reconcile.Result{RequeueAfter: time.Second}, err
}

lastResyncTimeStampCond := conditions.GetExtendedDaemonSetReplicaSetStatusCondition(&replicaSetInstance.Status, datadoghqv1alpha1.ConditionTypeLastFullSync)
Expand Down Expand Up @@ -123,9 +131,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
conditions.UpdateExtendedDaemonSetReplicaSetStatusCondition(newStatus, now, datadoghqv1alpha1.ConditionTypeUnschedule, status, desc, false, false)

// start actions on pods
requeueAfter := 5 * time.Second
if daemonsetInstance.Spec.Strategy.ReconcileFrequency != nil {
requeueAfter = daemonsetInstance.Spec.Strategy.ReconcileFrequency.Duration
}

lastPodDeletionCondition := conditions.GetExtendedDaemonSetReplicaSetStatusCondition(newStatus, datadoghqv1alpha1.ConditionTypePodDeletion)
if lastPodDeletionCondition != nil && now.Sub(lastPodDeletionCondition.LastUpdateTime.Time) < 5*time.Second {
result.RequeueAfter = 5 * time.Second
if lastPodDeletionCondition != nil && now.Sub(lastPodDeletionCondition.LastUpdateTime.Time) < requeueAfter {
result.RequeueAfter = requeueAfter
} else {
errs = append(errs, deletePods(reqLogger, r.client, strategyParams.PodByNodeName, strategyResult.PodsToDelete)...)
if len(strategyResult.PodsToDelete) > 0 {
Expand Down Expand Up @@ -267,6 +280,7 @@ func (r *Reconciler) updateReplicaSet(replicaset *datadoghqv1alpha1.ExtendedDaem
newRS.Status = *newStatus
return r.client.Status().Update(context.TODO(), newRS)
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions examples/foo-eds_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ spec:
rollingUpdate:
maxParallelPodCreation: 1
slowStartIntervalDuration: 1m
maxUnavailable: 1
template:
spec:
containers:
Expand Down
6 changes: 4 additions & 2 deletions examples/foo-eds_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@ spec:
strategy:
canary:
replicas: 1
duration: 5m
duration: 10m
autoPause:
enabled: true
maxRestarts: 5
rollingUpdate:
maxParallelPodCreation: 1
slowStartIntervalDuration: 1m
maxUnavailable: 1
template:
spec:
containers:
- name: daemon
image: k8s.gcr.io/pause:3.1
image: k8s.gcr.io/pause:42
command: ["does", "-not", "-exist"]
tolerations:
- operator: Exists
1 change: 1 addition & 0 deletions examples/kind-cluster-configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker

0 comments on commit 8918039

Please sign in to comment.