Skip to content

Commit

Permalink
[external metrics] Add mgmt of missing resources for metrics provider (
Browse files Browse the repository at this point in the history
…#137)

* [external metrics server] Add management of missing resources for metrics provider

- Adds missing `ClusterRole` and `ClusterRoleBinding` to allow reading external metrics from HPA.
- Adds missing `APIService` registration for `v1beta1.external.metrics.k8s.io`.
- Operator vendoring and `ClusterRole` changes to allow management of `APIService` resources.

* [external metrics] use external-metrics-reader ClusterRole name on GKE

This mimicks special-casing logic that we have [here](https://github.com/DataDog/helm-charts/blob/c47dbc7935aedc9a568cdf98883db299c41a0d5f/charts/datadog/templates/hpa-external-metrics-rbac.yaml#L11).

This ensures HPA controller works with external metrics on GKE.

* Address review feedback

- Add logging around create/delete of APIService.
- Missing delete verb in RBAC to manage APIServices.

* Address code review comment - simplify and correct Service/APIService cleanup logic
  • Loading branch information
xornivore committed Aug 21, 2020
1 parent deea430 commit 7df4099
Show file tree
Hide file tree
Showing 29 changed files with 3,961 additions and 48 deletions.
2 changes: 2 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ core,"k8s.io/gengo/namer",Apache-2.0
core,"k8s.io/gengo/parser",Apache-2.0
core,"k8s.io/gengo/types",Apache-2.0
core,"k8s.io/klog",Apache-2.0
core,"k8s.io/kube-aggregator/pkg/apis/apiregistration",Apache-2.0
core,"k8s.io/kube-aggregator/pkg/apis/apiregistration/v1",Apache-2.0
core,"k8s.io/kube-openapi/cmd/openapi-gen",Apache-2.0
core,"k8s.io/kube-openapi/cmd/openapi-gen/args",Apache-2.0
core,"k8s.io/kube-openapi/pkg/common",Apache-2.0
Expand Down
11 changes: 11 additions & 0 deletions chart/datadog-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,15 @@ rules:
- 'cronjobs'
verbs:
- 'get'
- apiGroups:
- apiregistration.k8s.io
resources:
- apiservices
verbs:
- get
- list
- watch
- update
- create
- delete
{{- end -}}
11 changes: 11 additions & 0 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,14 @@ rules:
- 'cronjobs'
verbs:
- 'get'
- apiGroups:
- apiregistration.k8s.io
resources:
- apiservices
verbs:
- get
- list
- watch
- update
- create
- delete
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/olekukonko/tablewriter v0.0.2
github.com/operator-framework/operator-sdk v0.19.0
github.com/pierrec/lz4 v2.4.1+incompatible // indirect
github.com/pkg/errors v0.9.1
github.com/spf13/cobra v1.0.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.5.1
Expand All @@ -29,6 +30,7 @@ require (
k8s.io/client-go v12.0.0+incompatible
k8s.io/code-generator v0.18.2
k8s.io/gengo v0.0.0-20200114144118-36b2048a9120
k8s.io/kube-aggregator v0.17.3
k8s.io/kube-openapi v0.0.0-20200121204235-bf4fb3bd569c
sigs.k8s.io/controller-runtime v0.6.0
sigs.k8s.io/yaml v1.2.0
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1473,6 +1473,7 @@ k8s.io/klog v0.3.3/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
k8s.io/klog v0.4.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
k8s.io/kube-aggregator v0.17.3 h1:U7U/XHnKwQlvFmsEE6ubpjF0Y4AVhKtXo+9I3d0L6rY=
k8s.io/kube-aggregator v0.17.3/go.mod h1:1dMwMFQbmH76RKF0614L7dNenMl3dwnUJuOOyZ3GMXA=
k8s.io/kube-openapi v0.0.0-20190228160746-b3a7cee44a30/go.mod h1:BXM9ceUBTj2QnfH2MK1odQs778ajze1RxcmP6S8RVVc=
k8s.io/kube-openapi v0.0.0-20190320154901-5e45bb682580/go.mod h1:BXM9ceUBTj2QnfH2MK1odQs778ajze1RxcmP6S8RVVc=
Expand Down
42 changes: 42 additions & 0 deletions pkg/apis/datadoghq/v1alpha1/test/new.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

datadoghqv1alpha1 "github.com/DataDog/datadog-operator/pkg/apis/datadoghq/v1alpha1"
edsdatadoghqv1alpha1 "github.com/datadog/extendeddaemonset/pkg/apis/datadoghq/v1alpha1"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
)

var (
Expand Down Expand Up @@ -426,3 +427,44 @@ func NewService(ns, name string, opts *NewServiceOptions) *corev1.Service {

return service
}

// NewAPIServiceOptions used to provide options to the NewAPIService function
type NewAPIServiceOptions struct {
CreationTime *time.Time
Annotations map[string]string
Labels map[string]string
Spec *apiregistrationv1.APIServiceSpec
}

// NewAPIService returns new apiregistrationv1.APIService instance
func NewAPIService(ns, name string, opts *NewAPIServiceOptions) *apiregistrationv1.APIService {
apiService := &apiregistrationv1.APIService{
TypeMeta: metav1.TypeMeta{
Kind: "APIService",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: ns,
Name: name,
Labels: map[string]string{},
Annotations: map[string]string{},
},
}

if opts != nil {
if opts.CreationTime != nil {
apiService.CreationTimestamp = metav1.NewTime(*opts.CreationTime)
}
if opts.Labels != nil {
apiService.Labels = opts.Labels
}
if opts.Annotations != nil {
apiService.Annotations = opts.Annotations
}
if opts.Spec != nil {
apiService.Spec = *opts.Spec
}
}

return apiService
}
61 changes: 52 additions & 9 deletions pkg/controller/datadogagent/clusteragent.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,8 @@ func (r *ReconcileDatadogAgent) reconcileClusterAgent(logger logr.Logger, dda *d
if clusterAgentDeployment.Status.AvailableReplicas == 0 {
return reconcile.Result{RequeueAfter: defaultRequeuePeriod}, fmt.Errorf("cluster agent deployment is not ready yet: 0 pods available out of %d", clusterAgentDeployment.Status.Replicas)
}

return reconcile.Result{}, nil
}

return reconcile.Result{}, nil
}

Expand Down Expand Up @@ -228,6 +227,11 @@ func (r *ReconcileDatadogAgent) manageClusterAgentDependencies(logger logr.Logge
return result, err
}

result, err = r.manageMetricsServerAPIService(logger, dda)
if shouldReturn(result, err) {
return result, err
}

result, err = r.manageAdmissionControllerService(logger, dda)
if shouldReturn(result, err) {
return result, err
Expand Down Expand Up @@ -595,20 +599,49 @@ func (r *ReconcileDatadogAgent) manageClusterAgentRBACs(logger logr.Logger, dda
return reconcile.Result{}, err
}

// Create or delete HPA ClusterRoleBindig
metricsProviderEnabled := isMetricsProviderEnabled(dda.Spec.ClusterAgent)
// Create or delete HPA ClusterRoleBinding
hpaClusterRoleBindingName := getHPAClusterRoleBindingName(dda)
hpaClusterRoleBinding := &rbacv1.ClusterRoleBinding{}
if isMetricsProviderEnabled(dda.Spec.ClusterAgent) {
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: hpaClusterRoleBindingName}, hpaClusterRoleBinding); err != nil {
if errors.IsNotFound(err) {
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: hpaClusterRoleBindingName}, hpaClusterRoleBinding); err != nil {
if errors.IsNotFound(err) {
if metricsProviderEnabled {
return r.createHPAClusterRoleBinding(logger, dda, hpaClusterRoleBindingName, clusterAgentVersion)
}
} else {
return reconcile.Result{}, err
}
} else {
if result, err := r.deleteIfNeededHpaClusterRoleBinding(logger, dda, hpaClusterRoleBindingName, clusterAgentVersion, hpaClusterRoleBinding); err != nil {
return result, err
} else if !metricsProviderEnabled {
return r.cleanupClusterRoleBinding(logger, r.client, dda, hpaClusterRoleBindingName)
}

// Create or delete external metrics reader ClusterRole and ClusterRoleBinding
metricsReaderClusterRoleName := getExternalMetricsReaderClusterRoleName(dda, r.versionInfo)

metricsReaderClusterRole := &rbacv1.ClusterRole{}
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: metricsReaderClusterRoleName}, metricsReaderClusterRole); err != nil {
if errors.IsNotFound(err) {
if metricsProviderEnabled {
return r.createExternalMetricsReaderClusterRole(logger, dda, metricsReaderClusterRoleName, clusterAgentVersion)
}
} else {
return reconcile.Result{}, err
}
} else if !metricsProviderEnabled {
return r.cleanupClusterRole(logger, r.client, dda, metricsReaderClusterRoleName)
}

metricsReaderClusterRoleBinding := &rbacv1.ClusterRoleBinding{}
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: metricsReaderClusterRoleName}, metricsReaderClusterRoleBinding); err != nil {
if errors.IsNotFound(err) {
if metricsProviderEnabled {
return r.createExternalMetricsReaderClusterRoleBinding(logger, dda, metricsReaderClusterRoleName, clusterAgentVersion)
}
} else {
return reconcile.Result{}, err
}
} else if !metricsProviderEnabled {
return r.cleanupClusterRoleBinding(logger, r.client, dda, metricsReaderClusterRoleName)
}

// Create ServiceAccount
Expand Down Expand Up @@ -739,6 +772,16 @@ func (r *ReconcileDatadogAgent) cleanupClusterAgentRbacResources(logger logr.Log
if result, err := r.cleanupClusterRoleBinding(logger, r.client, dda, hpaClusterRoleBindingName); err != nil {
return result, err
}

externalMetricsReaderName := getExternalMetricsReaderClusterRoleName(dda, r.versionInfo)
if result, err := r.cleanupClusterRoleBinding(logger, r.client, dda, externalMetricsReaderName); err != nil {
return result, err
}

if result, err := r.cleanupClusterRole(logger, r.client, dda, externalMetricsReaderName); err != nil {
return result, err
}

// Delete Service Account
if result, err := r.cleanupServiceAccount(logger, r.client, dda, rbacResourcesName); err != nil {
return result, err
Expand Down
97 changes: 78 additions & 19 deletions pkg/controller/datadogagent/clusteragent_metricsserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import (
"github.com/DataDog/datadog-operator/pkg/controller/utils/datadog"
"github.com/go-logr/logr"
rbacv1 "k8s.io/api/rbac/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
Expand Down Expand Up @@ -43,34 +41,95 @@ func buildMetricsServerClusterRoleBinding(dda *datadoghqv1alpha1.DatadogAgent, n
return nil
}

func (r *ReconcileDatadogAgent) deleteIfNeededHpaClusterRoleBinding(logger logr.Logger, dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string, clusterRoleBinding *rbacv1.ClusterRoleBinding) (reconcile.Result, error) {
newClusterRoleBinding := buildMetricsServerClusterRoleBinding(dda, name, agentVersion)
if newClusterRoleBinding != nil && clusterRoleBinding != nil && !apiequality.Semantic.DeepEqual(&rbacv1.ClusterRoleBinding{}, clusterRoleBinding.Subjects) {
// External Metrics Server used for HPA has been disabled
// Delete its ClusterRoleBinding
logger.V(1).Info("deleteClusterAgentHPARoleBinding", "clusterRoleBinding.name", clusterRoleBinding.Name)
event := buildEventInfo(clusterRoleBinding.Name, clusterRoleBinding.Namespace, clusterRoleBindingKind, datadog.DeletionEvent)
r.recordEvent(dda, event)
if err := r.client.Delete(context.TODO(), clusterRoleBinding); err != nil {
if errors.IsNotFound(err) {
return reconcile.Result{}, nil
}
return reconcile.Result{}, err
func (r *ReconcileDatadogAgent) createHPAClusterRoleBinding(logger logr.Logger, dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) (reconcile.Result, error) {
clusterRoleBinding := buildMetricsServerClusterRoleBinding(dda, name, agentVersion)
if clusterRoleBinding == nil {
return reconcile.Result{}, nil
}
if err := SetOwnerReference(dda, clusterRoleBinding, r.scheme); err != nil {
return reconcile.Result{}, err
}
logger.V(1).Info("createClusterAgentHPARoleBinding", "clusterRoleBinding.name", clusterRoleBinding.Name)
event := buildEventInfo(clusterRoleBinding.Name, clusterRoleBinding.Namespace, clusterRoleBindingKind, datadog.UpdateEvent)
r.recordEvent(dda, event)
return reconcile.Result{Requeue: true}, r.client.Create(context.TODO(), clusterRoleBinding)
}

// buildExternalMetricsReaderClusterRoleBinding creates a ClusterRoleBinding for the HPA controller to be able to read external metrics
func buildExternalMetricsReaderClusterRoleBinding(dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) *rbacv1.ClusterRoleBinding {
if isMetricsProviderEnabled(dda.Spec.ClusterAgent) {
return &rbacv1.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Labels: getDefaultLabels(dda, name, agentVersion),
Name: name,
},
RoleRef: rbacv1.RoleRef{
APIGroup: datadoghqv1alpha1.RbacAPIGroup,
Kind: datadoghqv1alpha1.ClusterRoleKind,
Name: name, // Cluster role has the same name as its binding
},
Subjects: []rbacv1.Subject{
{
Kind: datadoghqv1alpha1.ServiceAccountKind,
Name: "horizontal-pod-autoscaler",
Namespace: "kube-system",
},
},
}
}
return reconcile.Result{}, nil
return nil
}

func (r *ReconcileDatadogAgent) createHPAClusterRoleBinding(logger logr.Logger, dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) (reconcile.Result, error) {
clusterRoleBinding := buildMetricsServerClusterRoleBinding(dda, name, agentVersion)
func (r *ReconcileDatadogAgent) createExternalMetricsReaderClusterRoleBinding(logger logr.Logger, dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) (reconcile.Result, error) {
clusterRoleBinding := buildExternalMetricsReaderClusterRoleBinding(dda, name, agentVersion)
if clusterRoleBinding == nil {
return reconcile.Result{}, nil
}
if err := SetOwnerReference(dda, clusterRoleBinding, r.scheme); err != nil {
return reconcile.Result{}, err
}
logger.V(1).Info("createClusterAgentHPARoleBinding", "clusterRoleBinding.name", clusterRoleBinding.Name)
logger.V(1).Info("createExternalMetricsClusterRoleBinding", "clusterRoleBinding.name", clusterRoleBinding.Name)
event := buildEventInfo(clusterRoleBinding.Name, clusterRoleBinding.Namespace, clusterRoleBindingKind, datadog.UpdateEvent)
r.recordEvent(dda, event)
return reconcile.Result{Requeue: true}, r.client.Create(context.TODO(), clusterRoleBinding)
}

func (r *ReconcileDatadogAgent) createExternalMetricsReaderClusterRole(logger logr.Logger, dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) (reconcile.Result, error) {
clusterRole := buildExternalMetricsReaderClusterRole(dda, name, agentVersion)
if clusterRole == nil {
return reconcile.Result{}, nil
}
if err := SetOwnerReference(dda, clusterRole, r.scheme); err != nil {
return reconcile.Result{}, err
}
logger.V(1).Info("createExternalMetricsClusterRole", "clusterRole.name", clusterRole.Name)
event := buildEventInfo(clusterRole.Name, clusterRole.Namespace, clusterRoleKind, datadog.CreationEvent)
r.recordEvent(dda, event)
return reconcile.Result{Requeue: true}, r.client.Create(context.TODO(), clusterRole)
}

// buildExternalMetricsReaderClusterRole creates a ClusterRole object for access to external metrics resources
func buildExternalMetricsReaderClusterRole(dda *datadoghqv1alpha1.DatadogAgent, name, agentVersion string) *rbacv1.ClusterRole {
if isMetricsProviderEnabled(dda.Spec.ClusterAgent) {
return &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Labels: getDefaultLabels(dda, name, agentVersion),
Name: name,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{
"external.metrics.k8s.io",
},
Resources: []string{"*"},
Verbs: []string{
datadoghqv1alpha1.GetVerb,
datadoghqv1alpha1.ListVerb,
datadoghqv1alpha1.WatchVerb,
},
},
},
}
}
return nil
}
1 change: 1 addition & 0 deletions pkg/controller/datadogagent/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ const (
podDisruptionBudgetKind = "PodDisruptionBudget"
secretKind = "Secret"
serviceKind = "Service"
apiServiceKind = "APIService"
)

0 comments on commit 7df4099

Please sign in to comment.