From 19b5a47d3f8f44ff1f29bed89eaec1947138d4be Mon Sep 17 00:00:00 2001 From: raffis Date: Fri, 5 May 2023 13:56:39 +0200 Subject: [PATCH] feat: add resume profiles (#47) * feat: add resume profiles * feat: update chart and admission * refactory: remove file --- .- | 12 -- Makefile | 9 ++ README.md | 29 ++++ api/v1beta1/doc.go | 20 +++ api/v1beta1/groupversion_info.go | 36 +++++ api/v1beta1/patchrule_types.go | 55 +++++++ api/v1beta1/zz_generated.deepcopy.go | 107 ++++++++++++++ chart/k8s-pause/Chart.yaml | 2 +- ...pause.infra.doodle.com_resumeprofiles.yaml | 106 ++++++++++++++ .../k8s-pause/templates/clusterrole-edit.yaml | 28 ++++ .../k8s-pause/templates/clusterrole-view.yaml | 23 +++ chart/k8s-pause/templates/clusterrole.yaml | 8 + .../k8s-pause/templates/mutatingwebhook.yaml | 2 +- chart/k8s-pause/templates/role.yaml | 2 +- ...pause.infra.doodle.com_resumeprofiles.yaml | 106 ++++++++++++++ config/base/crd/kustomization.yaml | 5 + .../manager/controller_manager_config.yaml | 2 +- config/base/manager/kustomization.yaml | 3 +- config/base/rbac/role.yaml | 8 + config/base/webhook/manifests.yaml | 2 +- .../webhook/namespace_selector_patch.yaml | 2 +- config/default/kustomization.yaml | 1 + config/dev/certmanager/kustomization.yaml | 4 + config/kind/cluster.yaml | 10 ++ config/webhook/manifests.yaml | 2 +- controllers/namespace_controller.go | 138 ++++++++++++++---- controllers/pod_admission_controller.go | 20 ++- main.go | 2 + 28 files changed, 692 insertions(+), 52 deletions(-) delete mode 100644 .- create mode 100644 api/v1beta1/doc.go create mode 100644 api/v1beta1/groupversion_info.go create mode 100644 api/v1beta1/patchrule_types.go create mode 100644 api/v1beta1/zz_generated.deepcopy.go create mode 100644 chart/k8s-pause/crds/pause.infra.doodle.com_resumeprofiles.yaml create mode 100644 chart/k8s-pause/templates/clusterrole-edit.yaml create mode 100644 chart/k8s-pause/templates/clusterrole-view.yaml create mode 100644 config/base/crd/bases/pause.infra.doodle.com_resumeprofiles.yaml create mode 100644 config/base/crd/kustomization.yaml create mode 100644 config/dev/certmanager/kustomization.yaml create mode 100644 config/kind/cluster.yaml diff --git a/.- b/.- deleted file mode 100644 index eb42570..0000000 --- a/.- +++ /dev/null @@ -1,12 +0,0 @@ -## Release process - -### Controller release -1. Merge all pr's to master which need to be part of the new release -2. Create pr to master and bump the kustomization base -3. Push a tag following semantic versioning prefixed by 'v'. Do not create a github release, this is done automatically. -4. Create a new pr and add the following changes: - 1. Bump chart version - 2. Bump charts app version - -### Helm chart change only -1. Bump the helm chart version in the pr \ No newline at end of file diff --git a/Makefile b/Makefile index 011bbe7..8f3605b 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/base/crd/bases + cp config/base/crd/bases/* chart/k8s-pause/crds/ .PHONY: generate generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. @@ -118,6 +119,14 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/default | kubectl delete --ignore-not-found=$(ignore-not-found) -f - +CLUSTER=kind + +.PHONY: kind-test +kind-test: docker-build ## Deploy including test + kind load docker-image ${IMG} --name ${CLUSTER} + kubectl delete pods -n k8s-pause-system --all --force + $(KUSTOMIZE) build config/default | kubectl --context kind-${CLUSTER} apply -f - + CONTROLLER_GEN = $(GOBIN)/controller-gen .PHONY: controller-gen controller-gen: ## Download controller-gen locally if necessary. diff --git a/README.md b/README.md index 0982a8a..6c19e24 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,35 @@ Resume: kubectl annotate ns/my-namespace k8s-pause/suspend=false --overwrite ``` +## Resume profiles + +It is possible to define a set of pods which are allowed to start while a namespace is not paused. +The active profile can be annotated on the namespace just like the suspend annotation. +If no profile is defined all pods in the namespace are suspended if `k8s-pause/suspend=true` is set. + +**Note**: The podSelector rules are or conditions. One ResumeProfile may match multiple pods by matchLabels or matchExpressions. +```yaml +apiVersion: pause.infra.doodle.com/v1beta1 +kind: ResumeProfile +metadata: + name: garden-services +spec: + podSelector: + - matchLabels: + app: garden + service: backend + - matchLabels: + app: garden + service: frontend + - matchLabels: + app: postgres +``` + +Set resume profile: +``` +kubectl annotate ns/my-namespace k8s-pause/profile=garden-services --overwrite +``` + ## Details The suspend flag on namespace level will affect only but any pods. It will not touch any resources besides pods. diff --git a/api/v1beta1/doc.go b/api/v1beta1/doc.go new file mode 100644 index 0000000..7bb579a --- /dev/null +++ b/api/v1beta1/doc.go @@ -0,0 +1,20 @@ +/* +Copyright 2023 Doodle. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1beta1 contains API Schema definitions for the pause.infra.doodle.com v1beta1 API group. +// +kubebuilder:object:generate=true +// +groupName=pause.infra.doodle.com +package v1beta1 diff --git a/api/v1beta1/groupversion_info.go b/api/v1beta1/groupversion_info.go new file mode 100644 index 0000000..ccf669a --- /dev/null +++ b/api/v1beta1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2023 Doodle. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1beta1 contains API Schema definitions for the pause.infra.doodle.com v1beta1 API group +// +kubebuilder:object:generate=true +// +groupName=pause.infra.doodle.com +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "pause.infra.doodle.com", Version: "v1beta1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/v1beta1/patchrule_types.go b/api/v1beta1/patchrule_types.go new file mode 100644 index 0000000..099014c --- /dev/null +++ b/api/v1beta1/patchrule_types.go @@ -0,0 +1,55 @@ +/* +Copyright 2023 Doodle. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ResumeProfileSpec defines the desired state of ResumeProfile +type ResumeProfileSpec struct { + // Prometheus holds information about where to find prometheus + // +required + PodSelector []metav1.LabelSelector `json:"podSelector"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Active",type="string",JSONPath=".status.conditions[?(@.type==\"Active\")].status",description="" +// +kubebuilder:printcolumn:name="Reason",type="string",JSONPath=".status.conditions[?(@.type==\"Active\")].reason",description="" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="" + +// ResumeProfile is the Schema for the patchrules API +type ResumeProfile struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ResumeProfileSpec `json:"spec,omitempty"` +} + +//+kubebuilder:object:root=true + +// ResumeProfileList contains a list of ResumeProfile +type ResumeProfileList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ResumeProfile `json:"items"` +} + +func init() { + SchemeBuilder.Register(&ResumeProfile{}, &ResumeProfileList{}) +} diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go new file mode 100644 index 0000000..7fb441c --- /dev/null +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -0,0 +1,107 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2022 Doodle. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResumeProfile) DeepCopyInto(out *ResumeProfile) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResumeProfile. +func (in *ResumeProfile) DeepCopy() *ResumeProfile { + if in == nil { + return nil + } + out := new(ResumeProfile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ResumeProfile) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResumeProfileList) DeepCopyInto(out *ResumeProfileList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ResumeProfile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResumeProfileList. +func (in *ResumeProfileList) DeepCopy() *ResumeProfileList { + if in == nil { + return nil + } + out := new(ResumeProfileList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ResumeProfileList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResumeProfileSpec) DeepCopyInto(out *ResumeProfileSpec) { + *out = *in + if in.PodSelector != nil { + in, out := &in.PodSelector, &out.PodSelector + *out = make([]v1.LabelSelector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResumeProfileSpec. +func (in *ResumeProfileSpec) DeepCopy() *ResumeProfileSpec { + if in == nil { + return nil + } + out := new(ResumeProfileSpec) + in.DeepCopyInto(out) + return out +} diff --git a/chart/k8s-pause/Chart.yaml b/chart/k8s-pause/Chart.yaml index 799a6d0..b3e839c 100644 --- a/chart/k8s-pause/Chart.yaml +++ b/chart/k8s-pause/Chart.yaml @@ -14,4 +14,4 @@ keywords: name: k8s-pause sources: - https://github.com/DoodleScheduling/k8s-pause -version: 0.2.2 +version: 0.2.3 diff --git a/chart/k8s-pause/crds/pause.infra.doodle.com_resumeprofiles.yaml b/chart/k8s-pause/crds/pause.infra.doodle.com_resumeprofiles.yaml new file mode 100644 index 0000000..e0e371d --- /dev/null +++ b/chart/k8s-pause/crds/pause.infra.doodle.com_resumeprofiles.yaml @@ -0,0 +1,106 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.10.0 + creationTimestamp: null + name: resumeprofiles.pause.infra.doodle.com +spec: + group: pause.infra.doodle.com + names: + kind: ResumeProfile + listKind: ResumeProfileList + plural: resumeprofiles + singular: resumeprofile + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Active")].status + name: Active + type: string + - jsonPath: .status.conditions[?(@.type=="Active")].reason + name: Reason + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: ResumeProfile is the Schema for the patchrules API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: ResumeProfileSpec defines the desired state of ResumeProfile + properties: + podSelector: + description: Prometheus holds information about where to find prometheus + items: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the + key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + required: + - podSelector + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/chart/k8s-pause/templates/clusterrole-edit.yaml b/chart/k8s-pause/templates/clusterrole-edit.yaml new file mode 100644 index 0000000..8ab7b9d --- /dev/null +++ b/chart/k8s-pause/templates/clusterrole-edit.yaml @@ -0,0 +1,28 @@ +{{- if .Values.clusterRBAC.enabled -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "k8s-pause.fullname" . }}-edit + labels: + app.kubernetes.io/name: {{ include "k8s-pause.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + helm.sh/chart: {{ include "k8s-pause.chart" . }} + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" + annotations: + {{- toYaml .Values.annotations | nindent 4 }} +rules: +- apiGroups: + - pause.infra.doodle.com + resources: + - resumeprofiles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +{{- end }} diff --git a/chart/k8s-pause/templates/clusterrole-view.yaml b/chart/k8s-pause/templates/clusterrole-view.yaml new file mode 100644 index 0000000..0193211 --- /dev/null +++ b/chart/k8s-pause/templates/clusterrole-view.yaml @@ -0,0 +1,23 @@ +{{- if .Values.clusterRBAC.enabled -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "k8s-pause.fullname" . }}-view + labels: + app.kubernetes.io/name: {{ include "k8s-pause.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + helm.sh/chart: {{ include "k8s-pause.chart" . }} + rbac.authorization.k8s.io/aggregate-to-view: "true" + annotations: + {{- toYaml .Values.annotations | nindent 4 }} +rules: +- apiGroups: + - pause.infra.doodle.com + resources: + - resumeprofiles + verbs: + - get + - patch + - watch +{{- end }} diff --git a/chart/k8s-pause/templates/clusterrole.yaml b/chart/k8s-pause/templates/clusterrole.yaml index 3114355..3dcaf0c 100644 --- a/chart/k8s-pause/templates/clusterrole.yaml +++ b/chart/k8s-pause/templates/clusterrole.yaml @@ -51,4 +51,12 @@ rules: - patch - update - watch +- apiGroups: + - "pause.infra.doodle.com" + resources: + - resumeprofiles + verbs: + - get + - watch + - list {{- end }} diff --git a/chart/k8s-pause/templates/mutatingwebhook.yaml b/chart/k8s-pause/templates/mutatingwebhook.yaml index 0db2cfe..d035462 100644 --- a/chart/k8s-pause/templates/mutatingwebhook.yaml +++ b/chart/k8s-pause/templates/mutatingwebhook.yaml @@ -21,7 +21,7 @@ webhooks: namespace: {{ .Release.Namespace }} path: /mutate-v1-pod failurePolicy: Fail - name: k8s-pause.infra.doodle.com + name: pause.infra.doodle.com rules: - apiGroups: - "" diff --git a/chart/k8s-pause/templates/role.yaml b/chart/k8s-pause/templates/role.yaml index f609b07..1661633 100644 --- a/chart/k8s-pause/templates/role.yaml +++ b/chart/k8s-pause/templates/role.yaml @@ -35,4 +35,4 @@ rules: - create - delete - update - - get + - get \ No newline at end of file diff --git a/config/base/crd/bases/pause.infra.doodle.com_resumeprofiles.yaml b/config/base/crd/bases/pause.infra.doodle.com_resumeprofiles.yaml new file mode 100644 index 0000000..e0e371d --- /dev/null +++ b/config/base/crd/bases/pause.infra.doodle.com_resumeprofiles.yaml @@ -0,0 +1,106 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.10.0 + creationTimestamp: null + name: resumeprofiles.pause.infra.doodle.com +spec: + group: pause.infra.doodle.com + names: + kind: ResumeProfile + listKind: ResumeProfileList + plural: resumeprofiles + singular: resumeprofile + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Active")].status + name: Active + type: string + - jsonPath: .status.conditions[?(@.type=="Active")].reason + name: Reason + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: ResumeProfile is the Schema for the patchrules API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: ResumeProfileSpec defines the desired state of ResumeProfile + properties: + podSelector: + description: Prometheus holds information about where to find prometheus + items: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the + key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + required: + - podSelector + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/base/crd/kustomization.yaml b/config/base/crd/kustomization.yaml new file mode 100644 index 0000000..2ea9ced --- /dev/null +++ b/config/base/crd/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- bases/pause.infra.doodle.com_resumeprofiles.yaml +#+kubebuilder:scaffold:crdkustomizeresource diff --git a/config/base/manager/controller_manager_config.yaml b/config/base/manager/controller_manager_config.yaml index 9d66be4..b4b462e 100644 --- a/config/base/manager/controller_manager_config.yaml +++ b/config/base/manager/controller_manager_config.yaml @@ -8,4 +8,4 @@ webhook: port: 9443 leaderElection: leaderElect: true - resourceName: k8s-pause.infra.doodle.com + resourceName: pause.infra.doodle.com diff --git a/config/base/manager/kustomization.yaml b/config/base/manager/kustomization.yaml index 21870a7..008748a 100644 --- a/config/base/manager/kustomization.yaml +++ b/config/base/manager/kustomization.yaml @@ -10,4 +10,5 @@ configMapGenerator: name: manager-config images: - name: ghcr.io/doodlescheduling/k8s-pause - newTag: v0.2.2 + newName: k8s-pause + newTag: latest diff --git a/config/base/rbac/role.yaml b/config/base/rbac/role.yaml index bba48a4..aac5f45 100644 --- a/config/base/rbac/role.yaml +++ b/config/base/rbac/role.yaml @@ -57,3 +57,11 @@ rules: - get - patch - update +- apiGroups: + - "pause.infra.doodle.com" + resources: + - resumeprofiles + verbs: + - get + - watch + - list \ No newline at end of file diff --git a/config/base/webhook/manifests.yaml b/config/base/webhook/manifests.yaml index e4f26f6..27edb87 100644 --- a/config/base/webhook/manifests.yaml +++ b/config/base/webhook/manifests.yaml @@ -13,7 +13,7 @@ webhooks: namespace: system path: /mutate-v1-pod failurePolicy: Fail - name: k8s-pause.infra.doodle.com + name: pause.infra.doodle.com rules: - apiGroups: - "" diff --git a/config/base/webhook/namespace_selector_patch.yaml b/config/base/webhook/namespace_selector_patch.yaml index 5b8616a..56873e6 100644 --- a/config/base/webhook/namespace_selector_patch.yaml +++ b/config/base/webhook/namespace_selector_patch.yaml @@ -3,7 +3,7 @@ kind: MutatingWebhookConfiguration metadata: name: mutating-webhook-configuration webhooks: -- name: k8s-pause.infra.doodle.com +- name: pause.infra.doodle.com namespaceSelector: matchExpressions: - key: control-plane diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 85dba8b..ae55bf2 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -14,6 +14,7 @@ namePrefix: k8s-pause- bases: - ../base/rbac +- ../base/crd - ../base/manager - ../base/webhook # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. diff --git a/config/dev/certmanager/kustomization.yaml b/config/dev/certmanager/kustomization.yaml new file mode 100644 index 0000000..70c1b50 --- /dev/null +++ b/config/dev/certmanager/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- https://github.com/cert-manager/cert-manager/releases/download/v1.9.1/cert-manager.yaml \ No newline at end of file diff --git a/config/kind/cluster.yaml b/config/kind/cluster.yaml new file mode 100644 index 0000000..44de613 --- /dev/null +++ b/config/kind/cluster.yaml @@ -0,0 +1,10 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: ClusterConfiguration + apiServer: + extraArgs: + enable-admission-plugins: NodeRestriction,MutatingAdmissionWebhook,ValidatingAdmissionWebhook diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index e4f26f6..27edb87 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -13,7 +13,7 @@ webhooks: namespace: system path: /mutate-v1-pod failurePolicy: Fail - name: k8s-pause.infra.doodle.com + name: pause.infra.doodle.com rules: - apiGroups: - "" diff --git a/controllers/namespace_controller.go b/controllers/namespace_controller.go index 8d4fbc8..4a181b7 100644 --- a/controllers/namespace_controller.go +++ b/controllers/namespace_controller.go @@ -20,12 +20,16 @@ import ( "context" "fmt" + "github.com/doodlescheduling/k8s-pause/api/v1beta1" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -85,6 +89,19 @@ func (r *NamespaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } } + var profile *v1beta1.ResumeProfile + if p, ok := ns.Annotations[profileAnnotation]; ok { + profile = &v1beta1.ResumeProfile{} + err := r.Client.Get(ctx, client.ObjectKey{ + Name: p, + Namespace: req.Name, + }, profile) + + if err != nil { + return ctrl.Result{}, err + } + } + var res ctrl.Result if suspend { @@ -92,21 +109,49 @@ func (r *NamespaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( res, err = r.suspend(ctx, ns, logger) } else { logger.Info("make sure namespace is resumed") - res, err = r.resume(ctx, ns, logger) + res, err = r.resume(ctx, ns, profile, logger) + if err != nil { + return res, err + } + + // suspend all non matching pods from profile + if profile != nil { + return r.suspendNotInProfile(ctx, ns, *profile, logger) + } } return res, err } -func (r *NamespaceReconciler) resume(ctx context.Context, ns corev1.Namespace, logger logr.Logger) (ctrl.Result, error) { +func matchesResumeProfile(pod corev1.Pod, profile v1beta1.ResumeProfile) bool { + for _, match := range profile.Spec.PodSelector { + selector, err := metav1.LabelSelectorAsSelector(&match) + if err != nil { + continue + } + + if selector.Matches(labels.Set(pod.Labels)) { + return true + } + } + + return false +} + +func (r *NamespaceReconciler) resume(ctx context.Context, ns corev1.Namespace, profile *v1beta1.ResumeProfile, logger logr.Logger) (ctrl.Result, error) { var list corev1.PodList if err := r.Client.List(ctx, &list, client.InNamespace(ns.Name)); err != nil { return ctrl.Result{}, err } for _, pod := range list.Items { - if pod.Status.Phase == phaseSuspended && pod.Spec.SchedulerName == schedulerName { + if profile != nil { + if !matchesResumeProfile(pod, *profile) { + continue + } + } + if pod.Status.Phase == phaseSuspended && pod.Spec.SchedulerName == schedulerName { if len(pod.ObjectMeta.OwnerReferences) > 0 { err := r.Client.Delete(ctx, &pod) if err != nil { @@ -181,42 +226,73 @@ func (r *NamespaceReconciler) suspend(ctx context.Context, ns corev1.Namespace, } for _, pod := range list.Items { - if pod.Spec.SchedulerName != schedulerName { - // We assume the pod is managed by another controller if there is an existing owner ref - if len(pod.ObjectMeta.OwnerReferences) > 0 { - err := r.Client.Delete(ctx, &pod) - if err != nil { - logger.Error(err, "failed to delete pod while suspending", "pod", pod.Name) - } + if err := r.suspendPod(ctx, pod, logger); err != nil { + logger.Error(err, "failed to suspend pod", "pod", pod.Name) + continue + } + } - // If there is no owner lets clone the pod and swap the scheduler - } else { - clone := pod.DeepCopy() - // We won't be able to create the object with the same resource version - clone.ObjectMeta.ResourceVersion = "" + return ctrl.Result{}, nil +} - // Remove assigned node to avoid scheduling - clone.Spec.NodeName = "" +func (r *NamespaceReconciler) suspendNotInProfile(ctx context.Context, ns corev1.Namespace, profile v1beta1.ResumeProfile, logger logr.Logger) (ctrl.Result, error) { + var list corev1.PodList + if err := r.Client.List(ctx, &list, client.InNamespace(ns.Name)); err != nil { + return ctrl.Result{}, err + } - // Reset status, not needed as its ignored but nice - clone.Status = corev1.PodStatus{} + for _, pod := range list.Items { + if matchesResumeProfile(pod, profile) { + continue + } - // Assign our own scheduler to avoid the default scheduler interfer with the workload - clone.Spec.SchedulerName = schedulerName + if err := r.suspendPod(ctx, pod, logger); err != nil { + logger.Error(err, "failed to suspend pod", "pod", pod.Name) + continue + } + } - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } + return ctrl.Result{}, nil +} - clone.Annotations[previousSchedulerName] = pod.Spec.SchedulerName +func (r *NamespaceReconciler) suspendPod(ctx context.Context, pod corev1.Pod, logger logr.Logger) error { + if pod.Spec.SchedulerName == schedulerName { + return nil + } - err := r.recreatePod(ctx, pod, clone) - if err != nil { - logger.Error(err, "recrete unowned pod failed", "pod", pod.Name) - } - } + // We assume the pod is managed by another controller if there is an existing owner ref + if len(pod.ObjectMeta.OwnerReferences) > 0 { + err := r.Client.Delete(ctx, &pod) + if err != nil { + return err + } + + // If there is no owner lets clone the pod and swap the scheduler + } else { + clone := pod.DeepCopy() + // We won't be able to create the object with the same resource version + clone.ObjectMeta.ResourceVersion = "" + + // Remove assigned node to avoid scheduling + clone.Spec.NodeName = "" + + // Reset status, not needed as its ignored but nice + clone.Status = corev1.PodStatus{} + + // Assign our own scheduler to avoid the default scheduler interfer with the workload + clone.Spec.SchedulerName = schedulerName + + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + + clone.Annotations[previousSchedulerName] = pod.Spec.SchedulerName + + err := r.recreatePod(ctx, pod, clone) + if err != nil { + return fmt.Errorf("recrete unowned pod `%s` failed: %w", pod.Name, err) } } - return ctrl.Result{}, nil + return nil } diff --git a/controllers/pod_admission_controller.go b/controllers/pod_admission_controller.go index 161d0bf..3edd69c 100644 --- a/controllers/pod_admission_controller.go +++ b/controllers/pod_admission_controller.go @@ -5,6 +5,7 @@ import ( "encoding/json" "net/http" + "github.com/doodlescheduling/k8s-pause/api/v1beta1" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" @@ -12,9 +13,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) -// +kubebuilder:webhook:path=/mutate-v1-pod,mutating=true,failurePolicy=fail,groups="",resources=pods,verbs=create;update,versions=v1,name=k8s-pause.infra.doodle.com,admissionReviewVersions=v1,sideEffects=None +// +kubebuilder:webhook:path=/mutate-v1-pod,mutating=true,failurePolicy=fail,groups="",resources=pods,verbs=create;update,versions=v1,name=pause.infra.doodle.com,admissionReviewVersions=v1,sideEffects=None const ( + profileAnnotation = "k8s-pause/profile" suspendedAnnotation = "k8s-pause/suspend" schedulerName = "k8s-pause" ) @@ -50,6 +52,22 @@ func (a *Scheduler) Handle(ctx context.Context, req admission.Request) admission } } + if p, ok := ns.Annotations[profileAnnotation]; ok { + var profile v1beta1.ResumeProfile + err := a.Client.Get(ctx, client.ObjectKey{ + Name: p, + Namespace: pod.Namespace, + }, &profile) + + if err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + + if !matchesResumeProfile(*pod, profile) { + suspend = true + } + } + if !suspend { return admission.Response{ AdmissionResponse: admissionv1.AdmissionResponse{ diff --git a/main.go b/main.go index cfa80fd..50092f3 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/webhook" + "github.com/doodlescheduling/k8s-pause/api/v1beta1" "github.com/doodlescheduling/k8s-pause/controllers" //+kubebuilder:scaffold:imports ) @@ -48,6 +49,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1beta1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme }