Skip to content
This repository has been archived by the owner on Dec 11, 2023. It is now read-only.

Commit

Permalink
Add handling for reaching the Dynatrace API quotas (#216)
Browse files Browse the repository at this point in the history
* Add handling for reaching request limit
  • Loading branch information
DTMad committed Mar 17, 2020
1 parent 61e0514 commit aaf1bc7
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* No longer change the OneAgent .spec section to set defaults ([#206](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/206))
* Added a setting to configure a proxy via the CR ([#207](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/207))
* Added a setting to add custom CA certificates via the CR - This changes are only done for the Operator image as of now and the changes in the OneAgent image are in progress ([#208](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/208))
* Added proper error handling for Dynatrace API quota limit ([#216](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/216))

### Bug fixes
* Handle sporadic (and benign) race conditions where the error below would appear ([#194](https://github.com/Dynatrace/dynatrace-oneagent-operator/pull/194)),
Expand Down
35 changes: 30 additions & 5 deletions pkg/controller/oneagent/oneagent_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,22 @@ func (r *ReconcileOneAgent) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, errClient
}
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
} else if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

if instance.Spec.DisableAgentUpdate {
Expand All @@ -191,10 +203,22 @@ func (r *ReconcileOneAgent) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, errClient
}
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
} else if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
logger.Info("Request limit for Dynatrace API reached! Next reconcile in one minute")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
return reconcile.Result{}, err
}

// finally we have to determine the correct non error phase
Expand Down Expand Up @@ -245,8 +269,7 @@ func (r *ReconcileOneAgent) reconcileRollout(logger logr.Logger, instance *dynat
if instance.Status.Version == "" {
desired, err := dtc.GetLatestAgentVersion(dtclient.OsUnix, dtclient.InstallerTypeDefault)
if err != nil {
logger.Error(err, "failed to get desired version")
return updateCR, nil
return updateCR, fmt.Errorf("failed to get desired version: %w", err)
}

instance.Status.Version = desired
Expand Down Expand Up @@ -289,8 +312,7 @@ func (r *ReconcileOneAgent) reconcileVersion(logger logr.Logger, instance *dynat
// get desired version
desired, err := dtc.GetLatestAgentVersion(dtclient.OsUnix, dtclient.InstallerTypeDefault)
if err != nil {
logger.Error(err, "failed to get desired version")
return false, nil
return false, fmt.Errorf("failed to get desired version: %w", err)
} else if desired != "" && instance.Status.Version != desired {
logger.Info("new version available", "actual", instance.Status.Version, "desired", desired)
instance.Status.Version = desired
Expand All @@ -310,7 +332,10 @@ func (r *ReconcileOneAgent) reconcileVersion(logger logr.Logger, instance *dynat
}

// determine pods to restart
podsToDelete, instances := getPodsToRestart(podList.Items, dtc, instance)
podsToDelete, instances, err := getPodsToRestart(podList.Items, dtc, instance)
if err != nil {
return updateCR, err
}

// Workaround: 'instances' can be null, making DeepEqual() return false when comparing against an empty map instance.
// So, compare as long there is data.
Expand Down
9 changes: 7 additions & 2 deletions pkg/controller/oneagent/oneagent_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package oneagent

import (
"errors"
"net/http"
"reflect"
"strings"

Expand Down Expand Up @@ -96,7 +97,7 @@ func hasSpecChanged(dsSpec, dsExpSpec *appsv1.DaemonSetSpec) bool {

// getPodsToRestart determines if a pod needs to be restarted in order to get the desired agent version
// Returns an array of pods and an array of OneAgentInstance objects for status update
func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatracev1alpha1.OneAgent) ([]corev1.Pod, map[string]dynatracev1alpha1.OneAgentInstance) {
func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatracev1alpha1.OneAgent) ([]corev1.Pod, map[string]dynatracev1alpha1.OneAgentInstance, error) {
var doomedPods []corev1.Pod
instances := make(map[string]dynatracev1alpha1.OneAgentInstance)

Expand All @@ -107,6 +108,10 @@ func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatrac
}
ver, err := dtc.GetAgentVersionForIP(pod.Status.HostIP)
if err != nil {
var serr dtclient.ServerError
if ok := errors.As(err, &serr); ok && serr.Code == http.StatusTooManyRequests {
return nil, nil, err
}
// use last know version if available
if i, ok := instance.Status.Instances[pod.Spec.NodeName]; ok {
item.Version = i.Version
Expand All @@ -120,5 +125,5 @@ func getPodsToRestart(pods []corev1.Pod, dtc dtclient.Client, instance *dynatrac
instances[pod.Spec.NodeName] = item
}

return doomedPods, instances
return doomedPods, instances, nil
}
3 changes: 2 additions & 1 deletion pkg/controller/oneagent/oneagent_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,12 @@ func TestGetPodsToRestart(t *testing.T) {
oa := newOneAgent()
oa.Status.Version = "1.2.3"
oa.Status.Instances = map[string]api.OneAgentInstance{"node-3": {Version: "outdated"}}
doomed, instances := getPodsToRestart(pods, dtc, oa)
doomed, instances, err := getPodsToRestart(pods, dtc, oa)
assert.Lenf(t, doomed, 1, "list of pods to restart")
assert.Equalf(t, doomed[0], pods[1], "list of pods to restart")
assert.Lenf(t, instances, 3, "list of instances")
assert.Equalf(t, instances["node-3"].Version, oa.Status.Instances["node-3"].Version, "determine agent version from dynatrace server")
assert.Equal(t, nil, err)
}

func TestNotifyDynatraceAboutMarkForTerminationEvent(t *testing.T) {
Expand Down
3 changes: 1 addition & 2 deletions pkg/dtclient/dynatrace_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ func (dc *dynatraceClient) getHostInfoForIP(ip string) (*hostInfo, error) {
if len(dc.hostCache) == 0 {
err := dc.buildHostCache()
if err != nil {
logger.Error(err, "error building hostcache from dynatrace cluster")
return nil, err
return nil, fmt.Errorf("error building hostcache from dynatrace cluster: %w", err)
}
}

Expand Down

0 comments on commit aaf1bc7

Please sign in to comment.