Skip to content
This repository has been archived by the owner on Dec 11, 2023. It is now read-only.

Add handling for reaching the Dynatrace API quotas #216

Merged
merged 8 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* No longer change the OneAgent .spec section to set defaults ([#206](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/206))
* Added a setting to configure a proxy via the CR ([#207](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/207))
* Added a setting to add custom CA certificates via the CR - This changes are only done for the Operator image as of now and the changes in the OneAgent image are in progress ([#208](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/208))
* Added proper error handling for Dynatrace API quota limit ([#216](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/216))

### Bug fixes
* Handle sporadic (and benign) race conditions where the error below would appear ([#194](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/194)),
Expand Down
35 changes: 30 additions & 5 deletions pkg/controller/oneagent/oneagent_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,22 @@ func (r *ReconcileOneAgent) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, errClient
}
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
} else if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
DTMad marked this conversation as resolved.
Show resolved Hide resolved
return reconcile.Result{}, err
}

if instance.Spec.DisableAgentUpdate {
Expand All @@ -191,10 +203,22 @@ func (r *ReconcileOneAgent) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, errClient
}
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
} else if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
DTMad marked this conversation as resolved.
Show resolved Hide resolved
return reconcile.Result{}, err
}

// finally we have to determine the correct non error phase
Expand Down Expand Up @@ -245,8 +269,7 @@ func (r *ReconcileOneAgent) reconcileRollout(logger logr.Logger, instance *dynat
if instance.Status.Version == "" {
desired, err := dtc.GetLatestAgentVersion(dtclient.OsUnix, dtclient.InstallerTypeDefault)
if err != nil {
logger.Error(err, "failed to get desired version")
return updateCR, nil
return updateCR, fmt.Errorf("failed to get desired version: %w", err)
DTMad marked this conversation as resolved.
Show resolved Hide resolved
}

instance.Status.Version = desired
Expand Down Expand Up @@ -289,8 +312,7 @@ func (r *ReconcileOneAgent) reconcileVersion(logger logr.Logger, instance *dynat
// get desired version
desired, err := dtc.GetLatestAgentVersion(dtclient.OsUnix, dtclient.InstallerTypeDefault)
if err != nil {
logger.Error(err, "failed to get desired version")
return false, nil
return false, fmt.Errorf("failed to get desired version: %w", err)
} else if desired != "" && instance.Status.Version != desired {
logger.Info("new version available", "actual", instance.Status.Version, "desired", desired)
instance.Status.Version = desired
Expand All @@ -310,7 +332,10 @@ func (r *ReconcileOneAgent) reconcileVersion(logger logr.Logger, instance *dynat
}

// determine pods to restart
podsToDelete, instances := getPodsToRestart(podList.Items, dtc, instance)
podsToDelete, instances, err := getPodsToRestart(podList.Items, dtc, instance)
if err != nil {
return updateCR, err
}

// Workaround: 'instances' can be null, making DeepEqual() return false when comparing against an empty map instance.
// So, compare as long there is data.
Expand Down
9 changes: 7 additions & 2 deletions pkg/controller/oneagent/oneagent_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package oneagent

import (
"errors"
"net/http"
"reflect"
"strings"

Expand Down Expand Up @@ -96,7 +97,7 @@ func hasSpecChanged(dsSpec, dsExpSpec *appsv1.DaemonSetSpec) bool {

// getPodsToRestart determines if a pod needs to be restarted in order to get the desired agent version
// Returns an array of pods and an array of OneAgentInstance objects for status update
func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatracev1alpha1.OneAgent) ([]corev1.Pod, map[string]dynatracev1alpha1.OneAgentInstance) {
func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatracev1alpha1.OneAgent) ([]corev1.Pod, map[string]dynatracev1alpha1.OneAgentInstance, error) {
var doomedPods []corev1.Pod
instances := make(map[string]dynatracev1alpha1.OneAgentInstance)

Expand All @@ -107,6 +108,10 @@ func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatrac
}
ver, err := dtc.GetAgentVersionForIP(pod.Status.HostIP)
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
return nil, nil, err
}
// use last know version if available
if i, ok := instance.Status.Instances[pod.Spec.NodeName]; ok {
item.Version = i.Version
Expand All @@ -120,5 +125,5 @@ func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatrac
instances[pod.Spec.NodeName] = item
}

return doomedPods, instances
return doomedPods, instances, nil
}
3 changes: 2 additions & 1 deletion pkg/controller/oneagent/oneagent_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,12 @@ func TestGetPodsToRestart(t *testing.T) {
oa := newOneAgent()
oa.Status.Version = "1.2.3"
oa.Status.Instances = map[string]api.OneAgentInstance{"node-3": {Version: "outdated"}}
doomed, instances := getPodsToRestart(pods, dtc, oa)
doomed, instances, err := getPodsToRestart(pods, dtc, oa)
assert.Lenf(t, doomed, 1, "list of pods to restart")
assert.Equalf(t, doomed[0], pods[1], "list of pods to restart")
assert.Lenf(t, instances, 3, "list of instances")
assert.Equalf(t, instances["node-3"].Version, oa.Status.Instances["node-3"].Version, "determine agent version from dynatrace server")
assert.Equal(t, nil, err)
}

func TestNotifyDynatraceAboutMarkForTerminationEvent(t *testing.T) {
Expand Down
3 changes: 1 addition & 2 deletions pkg/dtclient/dynatrace_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ func (dc *dynatraceClient) getHostInfoForIP(ip string) (*hostInfo, error) {
if len(dc.hostCache) == 0 {
err := dc.buildHostCache()
if err != nil {
logger.Error(err, "error building hostcache from dynatrace cluster")
return nil, err
return nil, fmt.Errorf("error building hostcache from dynatrace cluster: %w", err)
}
}

Expand Down