From 2ffab6f64a2f8828a8fadf36df041718ba206c6b Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Thu, 28 Aug 2025 14:15:00 -0700 Subject: [PATCH 01/26] feat(ci): add Checkov security scanning workflow --- .github/workflows/security-checkov.yaml | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/security-checkov.yaml diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml new file mode 100644 index 00000000..a44c0599 --- /dev/null +++ b/.github/workflows/security-checkov.yaml @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Checkov Security Scan + +on: + pull_request: + branches: [main] + push: + branches: [main] + +jobs: + checkov: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Checkov + uses: bridgecrewio/checkov-action@master + with: + directory: . + framework: dockerfile,kubernetes,helm + skip_path: k8s-tests/ + output_format: cli From 780ab52c05bfdd6ccfc1cfadfa3156733e08b329 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Thu, 28 Aug 2025 14:28:10 -0700 Subject: [PATCH 02/26] fix(ci) checkov only in chart dir --- .github/workflows/security-checkov.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index a44c0599..edf7f65b 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -19,8 +19,12 @@ name: Checkov Security Scan on: pull_request: branches: [main] + paths: + - 'chart/**' push: branches: [main] + paths: + - 'chart/**' jobs: checkov: @@ -31,7 +35,6 @@ jobs: - name: Run Checkov uses: bridgecrewio/checkov-action@master with: - directory: . - framework: dockerfile,kubernetes,helm - skip_path: k8s-tests/ + directory: chart + framework: helm output_format: cli From 7e0c7b38603d815e9fd04e29595fa0b29619e221 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 10:29:54 -0700 Subject: [PATCH 03/26] fix(chart): add skyhook namespace for CKV_K8S_21 --- chart/templates/cleanup-webhook-job.yaml | 4 ++++ chart/templates/deployment.yaml | 4 ++++ chart/templates/leader-election-rbac.yaml | 5 +++++ chart/templates/metrics-service.yaml | 4 ++++ chart/templates/serviceaccount.yaml | 4 ++++ chart/templates/webhook-service.yaml | 4 ++++ 6 files changed, 25 insertions(+) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index e85fd612..c3947a40 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -1,8 +1,12 @@ {{- if .Values.webhook.enable }} +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: batch/v1 kind: Job metadata: name: "{{ include "chart.fullname" . }}-webhook-cleanup" + namespace: "{{ .Release.Namespace }}" annotations: "helm.sh/hook": pre-delete "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 5ab5295f..baa7a8d9 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -1,7 +1,11 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: name: {{ include "chart.fullname" . }}-controller-manager + namespace: "{{ .Release.Namespace }}" labels: app: {{ include "chart.fullname" . }}-controller-manager app.kubernetes.io/component: manager diff --git a/chart/templates/leader-election-rbac.yaml b/chart/templates/leader-election-rbac.yaml index fd7731b6..79032bbc 100644 --- a/chart/templates/leader-election-rbac.yaml +++ b/chart/templates/leader-election-rbac.yaml @@ -1,7 +1,11 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: skyhook-operator-leader-election-role + namespace: "{{ .Release.Namespace }}" labels: app.kubernetes.io/component: rbac app.kubernetes.io/created-by: skyhook-operator @@ -44,6 +48,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: skyhook-operator-leader-election-rolebinding + namespace: "{{ .Release.Namespace }}" labels: app.kubernetes.io/component: rbac app.kubernetes.io/created-by: skyhook-operator diff --git a/chart/templates/metrics-service.yaml b/chart/templates/metrics-service.yaml index 472474ce..46d0c18c 100644 --- a/chart/templates/metrics-service.yaml +++ b/chart/templates/metrics-service.yaml @@ -1,7 +1,11 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: v1 kind: Service metadata: name: skyhook-operator-controller-manager-metrics-service + namespace: "{{ .Release.Namespace }}" labels: app.kubernetes.io/component: kube-rbac-proxy app.kubernetes.io/created-by: skyhook-operator diff --git a/chart/templates/serviceaccount.yaml b/chart/templates/serviceaccount.yaml index 6db143ec..9ac987b1 100644 --- a/chart/templates/serviceaccount.yaml +++ b/chart/templates/serviceaccount.yaml @@ -1,7 +1,11 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "chart.fullname" . }}-controller-manager + namespace: "{{ .Release.Namespace }}" labels: app.kubernetes.io/component: rbac app.kubernetes.io/created-by: skyhook-operator diff --git a/chart/templates/webhook-service.yaml b/chart/templates/webhook-service.yaml index fa8dd51b..7339abbe 100644 --- a/chart/templates/webhook-service.yaml +++ b/chart/templates/webhook-service.yaml @@ -1,7 +1,11 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} apiVersion: v1 kind: Service metadata: name: {{ .Values.webhook.serviceName }} + namespace: "{{ .Release.Namespace }}" labels: app.kubernetes.io/component: webhook app.kubernetes.io/created-by: skyhook-operator From 423c8c7144ada6fa539a03d04440d0a530e36a5b Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 10:47:38 -0700 Subject: [PATCH 04/26] fix(chart) cpu/memory limits for BC_K8S_[9-12] --- chart/templates/cleanup-webhook-job.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index c3947a40..74414fdc 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -18,6 +18,13 @@ spec: containers: - name: cleanup image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}:{{ .Values.webhook.removalTag | default "latest" }} + resources: + limits: + cpu: {{ .Values.limitRange.default.cpu }} + memory: {{ .Values.limitRange.default.memory }} + requests: + cpu: {{ .Values.limitRange.defaultRequest.cpu }} + memory: {{ .Values.limitRange.defaultRequest.memory }} command: - /bin/sh - -c From 1eb42ec87d3aa8774790b5e3a7c2d98c426b924a Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 11:02:22 -0700 Subject: [PATCH 05/26] fix(chart): update security context for admission BC_K8S_[22,27,34] --- chart/templates/cleanup-webhook-job.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 74414fdc..c950456b 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -18,6 +18,11 @@ spec: containers: - name: cleanup image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}:{{ .Values.webhook.removalTag | default "latest" }} + securityContext: + runAsNonRoot: true + drop: + - NET_RAW + - ALL resources: limits: cpu: {{ .Values.limitRange.default.cpu }} From 65f354b9246a9ce758b82bd8a9c7609fcb5fdcb9 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 11:12:56 -0700 Subject: [PATCH 06/26] fix(chart): update security context for CKV_K8S_[19,37] --- chart/templates/cleanup-webhook-job.yaml | 2 ++ chart/templates/deployment.yaml | 1 + 2 files changed, 3 insertions(+) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index c950456b..79e25c8c 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -20,6 +20,8 @@ spec: image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}:{{ .Values.webhook.removalTag | default "latest" }} securityContext: runAsNonRoot: true + runAsUser: 10001 + allowPrivilegeEscalation: false drop: - NET_RAW - ALL diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index baa7a8d9..0a9753da 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -145,6 +145,7 @@ spec: - name: {{ quote .Values.imagePullSecret }} securityContext: runAsNonRoot: true + runAsUser: 10001 serviceAccountName: {{ include "chart.fullname" . }}-controller-manager terminationGracePeriodSeconds: 10 {{ if ((.Values.controllerManager.podDisruptionBudget).minAvailable) }} From 874b0fed8352b9a9b310b10ad27a65fe0512b201 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 12:08:39 -0700 Subject: [PATCH 07/26] fix(chart): image issues for BC_K8S_[13,14,39] --- chart/templates/cleanup-webhook-job.yaml | 2 +- chart/templates/deployment.yaml | 9 +++++---- chart/values.yaml | 6 +++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 79e25c8c..649a7547 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -17,7 +17,7 @@ spec: serviceAccountName: {{ include "chart.fullname" . }}-controller-manager containers: - name: cleanup - image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}:{{ .Values.webhook.removalTag | default "latest" }} + image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}@{{ .Values.webhook.removalDigest }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} securityContext: runAsNonRoot: true runAsUser: 10001 diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 0a9753da..ca69b208 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -93,10 +93,11 @@ spec: - name: PAUSE_IMAGE value: {{ quote .Values.controllerManager.manager.env.pauseImage }} - name: AGENT_IMAGE - value: {{ .Values.controllerManager.manager.agent.repository }}:{{ .Values.controllerManager.manager.agent.tag}} + value: {{ .Values.controllerManager.manager.agent.repository }}{{- if .Values.controllerManager.manager.agent.digest }}@{{ .Values.controllerManager.manager.agent.digest }}{{- else }}:{{ .Values.controllerManager.manager.agent.tag}}{{- end }} - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag | default .Chart.AppVersion }} + image: {{ .Values.controllerManager.manager.image.repository }}{{- if .Values.controllerManager.manager.image.digest }}@{{ .Values.controllerManager.manager.image.digest }}{{- else }}:{{ .Values.controllerManager.manager.image.tag | default .Chart.AppVersion }}{{- end }} + imagePullPolicy: Always livenessProbe: httpGet: path: /healthz @@ -131,8 +132,8 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag - | default .Chart.AppVersion }} + image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} + imagePullPolicy: Always name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/chart/values.yaml b/chart/values.yaml index 38269b63..384e557e 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -43,6 +43,7 @@ controllerManager: image: repository: quay.io/brancz/kube-rbac-proxy tag: v0.15.0 + digest: "sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c" resources: limits: cpu: 500m @@ -82,10 +83,12 @@ controllerManager: image: repository: nvcr.io/nvidia/skyhook/operator tag: "" ## if omitted, default to the chart appVersion + digest: "sha256:928585ca0a0d4b314d121a738506f52e13dd520b3426d7a1d9685e76eee0ca9a" ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: repository: nvcr.io/nvidia/skyhook/agent tag: "v6.3.1" + digest: "sha256:b70d345b254e926a15320a3ba3095fbf3fb8249634cd39e5d88ed6dec7fd4d13" # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are @@ -153,7 +156,8 @@ webhook: ## uninstall image for cleaning up webhook resources removalImage: bitnami/kubectl - removalTag: latest + removalTag: 1.33.1 + removalDigest: "sha256:2af8ed9feaeada845f4d60f1fe4db951df2e5334ea01bec4b5ef4f191ad20d65" metrics: addServiceAccountBinding: false From 8a7a1a0859461b6ee6f174d1896b8d86dad554f5 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 12:13:53 -0700 Subject: [PATCH 08/26] fix(chart): add livenessProbe for BC_K8S_7 --- chart/templates/deployment.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index ca69b208..4bf0e6b7 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -134,6 +134,12 @@ spec: value: {{ quote .Values.kubernetesClusterDomain }} image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 name: kube-rbac-proxy ports: - containerPort: 8443 From 861e54f7f29874b3f48874866ebafda1e4cc71bf Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 12:22:49 -0700 Subject: [PATCH 09/26] fix(chart): updates for BC_K8S_[7,8,29,35] --- chart/templates/cleanup-webhook-job.yaml | 4 ++++ chart/templates/deployment.yaml | 14 ++++++++++++++ chart/values.yaml | 2 ++ 3 files changed, 20 insertions(+) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 649a7547..c246bb7b 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -14,6 +14,7 @@ spec: template: spec: restartPolicy: Never + automountServiceAccountToken: false serviceAccountName: {{ include "chart.fullname" . }}-controller-manager containers: - name: cleanup @@ -22,9 +23,12 @@ spec: runAsNonRoot: true runAsUser: 10001 allowPrivilegeEscalation: false + readOnlyRootFilesystem: true drop: - NET_RAW - ALL + seccompProfile: + type: RuntimeDefault resources: limits: cpu: {{ .Values.limitRange.default.cpu }} diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 4bf0e6b7..d5d6359c 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -31,6 +31,7 @@ spec: {{- if and .Values.controllerManager.selectors .Values.controllerManager.nodeAffinity.matchExpressions }} {{- fail "Error: Cannot specify both controllerManager.selectors and controllerManager.nodeAffinity.matchExpressions. Use nodeAffinity.matchExpressions for complex node selection or selectors for simple key-value matching." }} {{- end }} + automountServiceAccountToken: false affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -140,6 +141,15 @@ spec: port: 8081 initialDelaySeconds: 15 periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 2 + timeoutSeconds: 3 name: kube-rbac-proxy ports: - containerPort: 8443 @@ -153,6 +163,9 @@ spec: securityContext: runAsNonRoot: true runAsUser: 10001 + readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault serviceAccountName: {{ include "chart.fullname" . }}-controller-manager terminationGracePeriodSeconds: 10 {{ if ((.Values.controllerManager.podDisruptionBudget).minAvailable) }} @@ -166,6 +179,7 @@ metadata: name: {{ include "chart.fullname" . }}-controller-manager-pdb spec: minAvailable: {{ .Values.controllerManager.podDisruptionBudget.minAvailable }} + automountServiceAccountToken: false selector: matchLabels: app: {{ include "chart.fullname" . }}-controller-manager diff --git a/chart/values.yaml b/chart/values.yaml index 384e557e..4fe8ca18 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -37,6 +37,7 @@ controllerManager: - --v=0 containerSecurityContext: allowPrivilegeEscalation: false + readOnlyRootFilesystem: true capabilities: drop: - ALL @@ -54,6 +55,7 @@ controllerManager: manager: containerSecurityContext: allowPrivilegeEscalation: false + readOnlyRootFilesystem: true capabilities: drop: - ALL From 2c3c9d2ad99316becf5e6b0ea2f83e78b1ce832b Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Wed, 27 Aug 2025 12:27:11 -0700 Subject: [PATCH 10/26] fix(chart): update security context for BC_K8S_[28,43] --- chart/templates/cleanup-webhook-job.yaml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index c246bb7b..92840774 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -16,19 +16,21 @@ spec: restartPolicy: Never automountServiceAccountToken: false serviceAccountName: {{ include "chart.fullname" . }}-controller-manager + securityContext: + runAsNonRoot: true + runAsUser: 10001 + seccompProfile: + type: RuntimeDefault containers: - name: cleanup image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}@{{ .Values.webhook.removalDigest }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} securityContext: - runAsNonRoot: true - runAsUser: 10001 allowPrivilegeEscalation: false readOnlyRootFilesystem: true - drop: - - NET_RAW - - ALL - seccompProfile: - type: RuntimeDefault + capabilities: + drop: + - NET_RAW + - ALL resources: limits: cpu: {{ .Values.limitRange.default.cpu }} From b1b21e3d0b88813939e8f03f1ee7be36b4ac20b6 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Thu, 28 Aug 2025 10:49:26 -0700 Subject: [PATCH 11/26] fix(chart): fixes for ci tests --- chart/templates/cleanup-webhook-job.yaml | 29 ++++++++++++++- chart/templates/deployment.yaml | 37 ++++++++++++++++++- .../helm-chart-test/assert-no-schedule.yaml | 2 +- .../helm-chart-test/assert-scheduled.yaml | 3 +- .../chainsaw/helm/helm-chart-test/values.yaml | 1 + .../values-conflict-test.yaml | 1 + .../helm-node-affinity-test/values-match.yaml | 1 + .../values-no-match.yaml | 1 + .../helm/helm-scale-test/values-scale.yaml | 1 + .../helm/helm-webhook-test/values.yaml | 1 + 10 files changed, 71 insertions(+), 6 deletions(-) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 92840774..439d5d89 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -21,6 +21,25 @@ spec: runAsUser: 10001 seccompProfile: type: RuntimeDefault + volumes: + - name: kube-api-access + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace containers: - name: cleanup image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}@{{ .Values.webhook.removalDigest }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} @@ -29,8 +48,14 @@ spec: readOnlyRootFilesystem: true capabilities: drop: - - NET_RAW - - ALL + - NET_RAW + - ALL + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access + readOnly: true resources: limits: cpu: {{ .Values.limitRange.default.cpu }} diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index d5d6359c..b0355d27 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -110,6 +110,14 @@ spec: - containerPort: 9443 name: webhook-server protocol: TCP + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access + readOnly: true + {{- if .Values.webhook.enable }} + - mountPath: /tmp + name: webhook-certs + {{- end }} readinessProbe: httpGet: path: /readyz @@ -155,15 +163,41 @@ spec: - containerPort: 8443 name: https protocol: TCP + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access + readOnly: true resources: {{- toYaml .Values.controllerManager.kubeRbacProxy.resources | nindent 10 }} securityContext: {{- toYaml .Values.controllerManager.kubeRbacProxy.containerSecurityContext | nindent 10 }} imagePullSecrets: - name: {{ quote .Values.imagePullSecret }} + volumes: + - name: kube-api-access + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace + {{- if .Values.webhook.enable }} + - name: webhook-certs + emptyDir: {} + {{- end }} securityContext: runAsNonRoot: true runAsUser: 10001 - readOnlyRootFilesystem: true seccompProfile: type: RuntimeDefault serviceAccountName: {{ include "chart.fullname" . }}-controller-manager @@ -179,7 +213,6 @@ metadata: name: {{ include "chart.fullname" . }}-controller-manager-pdb spec: minAvailable: {{ .Values.controllerManager.podDisruptionBudget.minAvailable }} - automountServiceAccountToken: false selector: matchLabels: app: {{ include "chart.fullname" . }}-controller-manager diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml index 113b9aec..57e55ca4 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml @@ -97,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy:v0.15.0 + image: quay.io/brancz/kube-rbac-proxy@sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml index 808b9f96..bb074243 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml @@ -49,6 +49,7 @@ spec: - command: - /manager ((env[?name == 'RUNTIME_REQUIRED_TAINT'].value)[0] == 'skyhook.nvidia.com=runtime-required:NoSchedule'): true + image: ghcr.io/nvidia/skyhook/operator:latest livenessProbe: failureThreshold: 3 httpGet: @@ -96,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy:v0.15.0 + image: quay.io/brancz/kube-rbac-proxy@sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/values.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/values.yaml index d73aa355..6ea40ddb 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/values.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/values.yaml @@ -27,5 +27,6 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: latest ## THIS should change to be like a tag so it can point at a specific commit + digest: "" webhook: enable: false diff --git a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-conflict-test.yaml b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-conflict-test.yaml index a27053c2..9cd621df 100644 --- a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-conflict-test.yaml +++ b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-conflict-test.yaml @@ -29,5 +29,6 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: latest + digest: "" webhook: enable: false diff --git a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-match.yaml b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-match.yaml index 1dd8d920..825b7bcd 100644 --- a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-match.yaml +++ b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-match.yaml @@ -31,5 +31,6 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: latest ## THIS should change to be like a tag so it can point at a specific commit + digest: "" webhook: enable: false diff --git a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-no-match.yaml b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-no-match.yaml index fdd71a1f..667cc23c 100644 --- a/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-no-match.yaml +++ b/k8s-tests/chainsaw/helm/helm-node-affinity-test/values-no-match.yaml @@ -31,5 +31,6 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: latest ## THIS should change to be like a tag so it can point at a specific commit + digest: "" webhook: enable: false diff --git a/k8s-tests/chainsaw/helm/helm-scale-test/values-scale.yaml b/k8s-tests/chainsaw/helm/helm-scale-test/values-scale.yaml index 095c0659..bb47aa52 100644 --- a/k8s-tests/chainsaw/helm/helm-scale-test/values-scale.yaml +++ b/k8s-tests/chainsaw/helm/helm-scale-test/values-scale.yaml @@ -21,6 +21,7 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: latest ## THIS should change to be like a tag so it can point at a specific commit + digest: "" estimatedNodeCount: 400 estimatedPackageCount: 5 webhook: diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml index eb1a87e6..e8c9afd8 100644 --- a/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml @@ -21,5 +21,6 @@ controllerManager: image: repository: ghcr.io/nvidia/skyhook/operator tag: v0.7.6-1ec0890 ## TODO: update this to latest onces this is merged + digest: "" webhook: enable: true From c79e21ec29d6cd9da1a4b271a77bf78a44f5d25e Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Fri, 29 Aug 2025 10:35:03 -0700 Subject: [PATCH 12/26] chore(chart): update digest --- chart/templates/deployment.yaml | 10 ++++------ chart/values.yaml | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index b0355d27..163c1c78 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -144,15 +144,13 @@ spec: image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} imagePullPolicy: Always livenessProbe: - httpGet: - path: /healthz - port: 8081 + tcpSocket: + port: 8443 initialDelaySeconds: 15 periodSeconds: 20 readinessProbe: - httpGet: - path: /readyz - port: 8081 + tcpSocket: + port: 8443 initialDelaySeconds: 5 periodSeconds: 20 successThreshold: 1 diff --git a/chart/values.yaml b/chart/values.yaml index 4fe8ca18..acb46631 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -85,12 +85,12 @@ controllerManager: image: repository: nvcr.io/nvidia/skyhook/operator tag: "" ## if omitted, default to the chart appVersion - digest: "sha256:928585ca0a0d4b314d121a738506f52e13dd520b3426d7a1d9685e76eee0ca9a" + digest: "sha256:69b01f2b644c5128515afe6288f30f16da5265882ba775f2f2bd56315d599098" ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: repository: nvcr.io/nvidia/skyhook/agent tag: "v6.3.1" - digest: "sha256:b70d345b254e926a15320a3ba3095fbf3fb8249634cd39e5d88ed6dec7fd4d13" + digest: "sha256:0aab1af6cc0432bae11ef72b4aea71dc65aa17561f5a37b49ae16e50b3f54166" # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are From 5c48e70c389a17352773863813c29e034331c4ee Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Fri, 29 Aug 2025 12:37:17 -0700 Subject: [PATCH 13/26] fix(chart): using multi-arch digests --- chart/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chart/values.yaml b/chart/values.yaml index acb46631..de12f91d 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -44,7 +44,7 @@ controllerManager: image: repository: quay.io/brancz/kube-rbac-proxy tag: v0.15.0 - digest: "sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c" + digest: "sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663" # manifest list digest (multi-arch) resources: limits: cpu: 500m @@ -85,12 +85,12 @@ controllerManager: image: repository: nvcr.io/nvidia/skyhook/operator tag: "" ## if omitted, default to the chart appVersion - digest: "sha256:69b01f2b644c5128515afe6288f30f16da5265882ba775f2f2bd56315d599098" + digest: "sha256:412880d97eab314275590068f993a371c772b19a1cb4b965fd6b9ca101f21b43" # manifest list digest (multi-arch) ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: repository: nvcr.io/nvidia/skyhook/agent tag: "v6.3.1" - digest: "sha256:0aab1af6cc0432bae11ef72b4aea71dc65aa17561f5a37b49ae16e50b3f54166" + digest: "sha256:9be67d86d06b2999937bacbfb5878ad54f34a25b2e3c0eca79c2f8a7dafc71f8" # manifest list digest (multi-arch) # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are From c56f2924c7354fd024731400f8f61141d14a55f7 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Fri, 29 Aug 2025 13:15:35 -0700 Subject: [PATCH 14/26] fix(chart): update tests --- k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml | 2 +- k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml index 57e55ca4..a9807c44 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml @@ -97,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy@sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c + image: quay.io/brancz/kube-rbac-proxy@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml index bb074243..16e14183 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml @@ -97,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy@sha256:b6c3624aedb4b785b3f92ac0fbb5efb0b0572b00cebde6c752e8aac522f9669c + image: quay.io/brancz/kube-rbac-proxy@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 name: kube-rbac-proxy ports: - containerPort: 8443 From 602464bd93f51256e3e004a7041e53cc66b1ef26 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Fri, 29 Aug 2025 13:37:31 -0700 Subject: [PATCH 15/26] fix(chart): probes --- chart/templates/deployment.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 163c1c78..b0355d27 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -144,13 +144,15 @@ spec: image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} imagePullPolicy: Always livenessProbe: - tcpSocket: - port: 8443 + httpGet: + path: /healthz + port: 8081 initialDelaySeconds: 15 periodSeconds: 20 readinessProbe: - tcpSocket: - port: 8443 + httpGet: + path: /readyz + port: 8081 initialDelaySeconds: 5 periodSeconds: 20 successThreshold: 1 From 6054382166736e5bc5586d2ba0d0a3f5d7a1e17b Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Mon, 1 Sep 2025 16:15:48 -0700 Subject: [PATCH 16/26] test --- chart/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/values.yaml b/chart/values.yaml index de12f91d..1b867017 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -159,7 +159,7 @@ webhook: ## uninstall image for cleaning up webhook resources removalImage: bitnami/kubectl removalTag: 1.33.1 - removalDigest: "sha256:2af8ed9feaeada845f4d60f1fe4db951df2e5334ea01bec4b5ef4f191ad20d65" + removalDigest: "" metrics: addServiceAccountBinding: false From 1594b8c5472066a65e3bdaa5c9cf94246c373541 Mon Sep 17 00:00:00 2001 From: Thomas Lam Date: Mon, 1 Sep 2025 19:27:18 -0700 Subject: [PATCH 17/26] add manifest list digest for cleanup --- chart/templates/cleanup-webhook-job.yaml | 1 + chart/values.yaml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 439d5d89..d3414838 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -43,6 +43,7 @@ spec: containers: - name: cleanup image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}@{{ .Values.webhook.removalDigest }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} + imagePullPolicy: Always securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true diff --git a/chart/values.yaml b/chart/values.yaml index 1b867017..16c39946 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -90,7 +90,7 @@ controllerManager: agent: repository: nvcr.io/nvidia/skyhook/agent tag: "v6.3.1" - digest: "sha256:9be67d86d06b2999937bacbfb5878ad54f34a25b2e3c0eca79c2f8a7dafc71f8" # manifest list digest (multi-arch) + digest: "sha256:c034866d1382c3372989c09839e5a0c0837cf2836d2a20a038649469a9dae18b" # manifest list digest (multi-arch) # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are @@ -159,7 +159,7 @@ webhook: ## uninstall image for cleaning up webhook resources removalImage: bitnami/kubectl removalTag: 1.33.1 - removalDigest: "" + removalDigest: "sha256:9081a6f83f4febf47369fc46b6f0f7683c7db243df5b43fc9defe51b0471a950" metrics: addServiceAccountBinding: false From 3aee5a0263d090ed5a4e474f56072e862b170caf Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 12:37:47 -0700 Subject: [PATCH 18/26] fix web secret checkov issue --- chart/templates/cleanup-webhook-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index d3414838..8a44d59a 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -72,7 +72,7 @@ spec: WEBHOOK_SECRET_NAME="{{ .Values.webhook.secretName | default "webhook-cert" }}" VALIDATING_WEBHOOK_CONFIGURATION_NAME="skyhook-operator-validating-webhook" MUTATING_WEBHOOK_CONFIGURATION_NAME="skyhook-operator-mutating-webhook" - kubectl delete secret -n $NAMESPACE $WEBHOOK_SECRET_NAME || true + kubectl delete secret -n $NAMESPACE "{{ .Values.webhook.secretName | default "webhook-cert" }}" || true kubectl delete validatingwebhookconfiguration $VALIDATING_WEBHOOK_CONFIGURATION_NAME || true kubectl delete mutatingwebhookconfiguration $MUTATING_WEBHOOK_CONFIGURATION_NAME || true {{- end }} \ No newline at end of file From 974a2ce0981fe9897f39ab4d51179186c1fc8780 Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 12:51:18 -0700 Subject: [PATCH 19/26] use tag and digest --- chart/README.md | 3 +++ chart/templates/deployment.yaml | 8 ++++---- chart/values.yaml | 2 +- docs/release-process.md | 19 +++++++++++++++++++ docs/versioning.md | 4 ++++ .../helm-chart-test/assert-no-schedule.yaml | 2 +- .../helm-chart-test/assert-scheduled.yaml | 2 +- 7 files changed, 33 insertions(+), 7 deletions(-) diff --git a/chart/README.md b/chart/README.md index 111355ac..ac7a8c66 100644 --- a/chart/README.md +++ b/chart/README.md @@ -30,8 +30,10 @@ Settings | Description | Default | | controllerManager.manager.env.runtimeRequiredTaint | This feature assumes nodes are added to the cluster with `--register-with-taints` kubelet flag. This taint is assume to be all new nodes, and skyhook pods will tolerate this taint, and remove it one the nodes packages are complete. | skyhook.nvidia.com=runtime-required:NoSchedule | | controllerManager.manager.image.repository | Where to get the image from | "ghcr.io/nvidia/skyhook/operator" | | controllerManager.manager.image.tag | what version of the operator to run | defaults to appVersion | +| controllerManager.manager.image.digest | content-addressable pin for the operator image. If set, the digest determines the pulled image. If both tag and digest are provided, the digest takes precedence; the rendered image may include `tag@digest` but the digest controls selection. | "" | | controllerManager.manager.agent.repository | Where to get the image from | "ghcr.io/nvidia/skyhook/agent" | | controllerManager.manager.agent.tag | what version of the agent to run | defaults to the current latest, but is not latest example v6.1.5 | +| controllerManager.manager.agent.digest | content-addressable pin for the agent image. Same precedence rules as above: if both tag and digest are provided, the digest controls which image is pulled. | "" | | imagePullSecret | the secret used to pull the operator controller image, agent image, and package images. | node-init-secret | | estimatedPackageCount | estimated number of packages to be installed on the cluster, this is used to calculate the resources for the operator controller. | 1 | | estimatedNodeCount | estimated number of nodes in the cluster, this is used to calculate the resources for the operator controller | 1 | @@ -40,6 +42,7 @@ Settings | Description | Default | - **estimatedPackageCount** and **estimatedNodeCount** are used to size the resource requirements. Default setting should be good for nodes > 1000 and packages 1-2 or nodes > 500 and packages >= 4. If your approaching this size deployment it would make sense to set these. You can also override them by explicitly with `controllerManager.manager.resources` the values file has an example. - **runtimeRequired**: If your systems nodes have this taint make sure to add the toleration to the controllerManager.tolerations - **CRD**: This project currently has one CRD and its not managed the ["recommended" way](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/). Its part of the templates. Meaning it will be updated with the `helm upgrade`. We decided it was better do it this way for this project. Doing it either way has consequences and this route has worked well for upgrades so far our deployments. +- **Image pinning (tag vs digest)**: You can set either an image tag or a digest. If both are set, the digest is prioritized; the tag is ignored for selection and may appear as `tag@digest` only for readability. This applies to both operator and agent images. ### Resource Management Skyhook uses Kubernetes LimitRange to set default CPU/memory requests/limits for all containers in the namespace. You can override these per-package in your Skyhook CR. Strict validation is enforced. See [../docs/resource_management.md](../docs/resource_management.md) for details and examples. diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index b0355d27..8ee04e13 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -94,11 +94,11 @@ spec: - name: PAUSE_IMAGE value: {{ quote .Values.controllerManager.manager.env.pauseImage }} - name: AGENT_IMAGE - value: {{ .Values.controllerManager.manager.agent.repository }}{{- if .Values.controllerManager.manager.agent.digest }}@{{ .Values.controllerManager.manager.agent.digest }}{{- else }}:{{ .Values.controllerManager.manager.agent.tag}}{{- end }} + value: {{ .Values.controllerManager.manager.agent.repository }}{{- if and (.Values.controllerManager.manager.agent.tag) (.Values.controllerManager.manager.agent.digest) }}:{{ .Values.controllerManager.manager.agent.tag }}@{{ .Values.controllerManager.manager.agent.digest }}{{- else if .Values.controllerManager.manager.agent.digest }}@{{ .Values.controllerManager.manager.agent.digest }}{{- else }}:{{ .Values.controllerManager.manager.agent.tag }}{{- end }} - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - image: {{ .Values.controllerManager.manager.image.repository }}{{- if .Values.controllerManager.manager.image.digest }}@{{ .Values.controllerManager.manager.image.digest }}{{- else }}:{{ .Values.controllerManager.manager.image.tag | default .Chart.AppVersion }}{{- end }} - imagePullPolicy: Always + image: {{ .Values.controllerManager.manager.image.repository }}{{- if .Values.controllerManager.manager.image.digest }}{{- if .Values.controllerManager.manager.image.tag }}:{{ .Values.controllerManager.manager.image.tag }}@{{ .Values.controllerManager.manager.image.digest }}{{- else }}@{{ .Values.controllerManager.manager.image.digest }}{{- end }}{{- else }}:{{ .Values.controllerManager.manager.image.tag | default .Chart.AppVersion }}{{- end }} + imagePullPolicy: IfNotPresent livenessProbe: httpGet: path: /healthz @@ -141,7 +141,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} + image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}{{- if .Values.controllerManager.kubeRbacProxy.image.tag }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- end }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} imagePullPolicy: Always livenessProbe: httpGet: diff --git a/chart/values.yaml b/chart/values.yaml index 16c39946..4e0c4d66 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -84,7 +84,7 @@ controllerManager: pauseImage: registry.k8s.io/pause:3.10 image: repository: nvcr.io/nvidia/skyhook/operator - tag: "" ## if omitted, default to the chart appVersion + tag: "" ## if both tag and digest are omitted, defaults to the chart appVersion digest: "sha256:412880d97eab314275590068f993a371c772b19a1cb4b965fd6b9ca101f21b43" # manifest list digest (multi-arch) ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: diff --git a/docs/release-process.md b/docs/release-process.md index d551d13c..8043d09e 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -115,6 +115,25 @@ git push origin chart/v1.2.3 - [ ] Tests passing - [ ] Documentation updated +### Pin multi-arch image digests in the chart + +Starting with digest pinning, the chart references images using tag@digest (or digest-only where applicable). For each image, fetch the multi-arch manifest digest and update `chart/values.yaml` so our releases are reproducible across architectures. + +Prerequisites: + +- Docker buildx (`docker-buildx version`) + +Fetch a multi-arch digest (example for bitnami/kubectl used by the webhook cleanup job): + +```bash +docker-buildx imagetools inspect bitnami/kubectl:1.33.1 +``` + +Update the digest in `chart/values.yaml` for kube-rbac-proxy, operator, and agent images: + +Note: +- Always use the multi-arch manifest digest (top-level Digest from imagetools), not a single-arch child manifest digest. + **After tagging:** - [ ] CI/CD pipeline completes - [ ] Images published successfully diff --git a/docs/versioning.md b/docs/versioning.md index 1822b9c0..e558b918 100644 --- a/docs/versioning.md +++ b/docs/versioning.md @@ -47,6 +47,10 @@ image: image: "ghcr.io/nvidia/skyhook/operator:0.7.0" ``` +### Image Pinning: Tag vs Digest +- You can specify either a tag or a digest for images. +- If both are provided, the **digest takes precedence** and determines the image pulled. The rendered image reference may display as `:tag@sha256:...`, but the digest controls selection. + ## Release Branching Strategy Skyhook uses **release branches** to manage patches and maintenance releases: diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml index a9807c44..65420488 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml @@ -97,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 + image: quay.io/brancz/kube-rbac-proxy:v0.15.0@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml index 16e14183..6f6d38fa 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml @@ -97,7 +97,7 @@ spec: env: - name: KUBERNETES_CLUSTER_DOMAIN value: cluster.local - image: quay.io/brancz/kube-rbac-proxy@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 + image: quay.io/brancz/kube-rbac-proxy:v0.15.0@sha256:2c7b120590cbe9f634f5099f2cbb91d0b668569023a81505ca124a5c437e7663 name: kube-rbac-proxy ports: - containerPort: 8443 From 85993643da1aec02fbf9f97d9fa426f38d52a30b Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 12:52:44 -0700 Subject: [PATCH 20/26] template caBundle and update field for webhooks to minimize rbac --- chart/templates/cleanup-webhook-job.yaml | 3 +- chart/templates/deployment.yaml | 10 +-- chart/templates/manager-rbac.yaml | 16 +++- chart/templates/mutating-webhook.yaml | 38 +++++++++ chart/templates/validating-webhook.yaml | 37 ++++++++ .../internal/controller/webhook_controller.go | 84 +++++++++++-------- 6 files changed, 141 insertions(+), 47 deletions(-) create mode 100644 chart/templates/mutating-webhook.yaml create mode 100644 chart/templates/validating-webhook.yaml diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index 8a44d59a..e74a6bb0 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -42,7 +42,7 @@ spec: path: namespace containers: - name: cleanup - image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}@{{ .Values.webhook.removalDigest }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} + image: {{ .Values.webhook.removalImage | default "bitnami/kubectl" }}{{- if .Values.webhook.removalDigest }}{{- if .Values.webhook.removalTag }}:{{ .Values.webhook.removalTag | default "1.33.1" }}@{{ .Values.webhook.removalDigest }}{{- else }}@{{ .Values.webhook.removalDigest }}{{- end }}{{- else }}:{{ .Values.webhook.removalTag | default "1.33.1" }}{{- end }} imagePullPolicy: Always securityContext: allowPrivilegeEscalation: false @@ -69,7 +69,6 @@ spec: - -c - | NAMESPACE="{{ .Release.Namespace }}" - WEBHOOK_SECRET_NAME="{{ .Values.webhook.secretName | default "webhook-cert" }}" VALIDATING_WEBHOOK_CONFIGURATION_NAME="skyhook-operator-validating-webhook" MUTATING_WEBHOOK_CONFIGURATION_NAME="skyhook-operator-mutating-webhook" kubectl delete secret -n $NAMESPACE "{{ .Values.webhook.secretName | default "webhook-cert" }}" || true diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 8ee04e13..439faac6 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -144,15 +144,13 @@ spec: image: {{ .Values.controllerManager.kubeRbacProxy.image.repository }}{{- if .Values.controllerManager.kubeRbacProxy.image.digest }}{{- if .Values.controllerManager.kubeRbacProxy.image.tag }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- else }}@{{ .Values.controllerManager.kubeRbacProxy.image.digest }}{{- end }}{{- else }}:{{ .Values.controllerManager.kubeRbacProxy.image.tag | default .Chart.AppVersion }}{{- end }} imagePullPolicy: Always livenessProbe: - httpGet: - path: /healthz - port: 8081 + tcpSocket: + port: 8443 initialDelaySeconds: 15 periodSeconds: 20 readinessProbe: - httpGet: - path: /readyz - port: 8081 + tcpSocket: + port: 8443 initialDelaySeconds: 5 periodSeconds: 20 successThreshold: 1 diff --git a/chart/templates/manager-rbac.yaml b/chart/templates/manager-rbac.yaml index 9fcf1b35..aee633f5 100644 --- a/chart/templates/manager-rbac.yaml +++ b/chart/templates/manager-rbac.yaml @@ -2,6 +2,8 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: skyhook-operator-manager-role + annotations: + checkov.io/skip1: CKV_K8S_155=Operator must manage webhook configs for cert rotation labels: {{- include "chart.labels" . | nindent 4 }} rules: @@ -10,13 +12,19 @@ rules: resources: - mutatingwebhookconfigurations - validatingwebhookconfigurations + resourceNames: + - skyhook-operator-validating-webhook + - skyhook-operator-mutating-webhook verbs: - - create - - delete - get - - list - - patch - update +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list - watch - apiGroups: - "" diff --git a/chart/templates/mutating-webhook.yaml b/chart/templates/mutating-webhook.yaml new file mode 100644 index 00000000..3a56aa0e --- /dev/null +++ b/chart/templates/mutating-webhook.yaml @@ -0,0 +1,38 @@ +{{- if .Values.webhook.enable }} +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: skyhook-operator-mutating-webhook +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + caBundle: "" + service: + name: skyhook-operator-webhook-service + namespace: {{ .Release.Namespace }} + path: /mutate-skyhook-nvidia-com-v1alpha1-skyhook + port: 443 + failurePolicy: Fail + matchPolicy: Equivalent + name: mutate-skyhook.nvidia.com + namespaceSelector: {} + objectSelector: {} + reinvocationPolicy: Never + rules: + - apiGroups: + - skyhook.nvidia.com + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - skyhooks + scope: '*' + sideEffects: None + timeoutSeconds: 10 +{{- end }} \ No newline at end of file diff --git a/chart/templates/validating-webhook.yaml b/chart/templates/validating-webhook.yaml new file mode 100644 index 00000000..daf6e9d2 --- /dev/null +++ b/chart/templates/validating-webhook.yaml @@ -0,0 +1,37 @@ +{{- if .Values.webhook.enable }} +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: skyhook-operator-validating-webhook +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + caBundle: "" + service: + name: skyhook-operator-webhook-service + namespace: {{ .Release.Namespace }} + path: /validate-skyhook-nvidia-com-v1alpha1-skyhook + port: 443 + failurePolicy: Fail + matchPolicy: Equivalent + name: validate-skyhook.nvidia.com + namespaceSelector: {} + objectSelector: {} + rules: + - apiGroups: + - skyhook.nvidia.com + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - skyhooks + scope: '*' + sideEffects: None + timeoutSeconds: 10 +{{- end }} \ No newline at end of file diff --git a/operator/internal/controller/webhook_controller.go b/operator/internal/controller/webhook_controller.go index 8f7e7053..c0890443 100644 --- a/operator/internal/controller/webhook_controller.go +++ b/operator/internal/controller/webhook_controller.go @@ -216,53 +216,61 @@ func (r *WebhookController) CheckOrUpdateWebhookCertSecret(ctx context.Context, } func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Context, secret *corev1.Secret) (bool, error) { + // Update only CABundle fields of existing webhook configurations created by Helm + caBundle := secret.Data["ca.crt"] + changed := false + // ValidatingWebhookConfiguration - validatingWebhookConfiguration := webhookValidatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret) - existingValidatingWebhookConfiguration := &admissionregistrationv1.ValidatingWebhookConfiguration{} - err := r.Get(ctx, types.NamespacedName{Name: validatingWebhookConfiguration.Name}, existingValidatingWebhookConfiguration) - if err != nil { + validatingName := webhookValidatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret).GetName() + existingValidating := &admissionregistrationv1.ValidatingWebhookConfiguration{} + if err := r.Get(ctx, types.NamespacedName{Name: validatingName}, existingValidating); err != nil { if errors.IsNotFound(err) { - err := r.Create(ctx, validatingWebhookConfiguration) - if err != nil && !errors.IsAlreadyExists(err) { // race condition, ignore - return false, err - } - } else { - return false, err + return false, nil } - } else { - if compareValidatingWebhookConfigurations(existingValidatingWebhookConfiguration, validatingWebhookConfiguration) { - existingValidatingWebhookConfiguration.Webhooks = validatingWebhookConfiguration.Webhooks - err := r.Update(ctx, existingValidatingWebhookConfiguration) - if err != nil { - return false, err - } + return false, err + } + + needUpdate := false + for i := range existingValidating.Webhooks { + if len(existingValidating.Webhooks[i].ClientConfig.CABundle) == 0 { + existingValidating.Webhooks[i].ClientConfig.CABundle = caBundle + needUpdate = true + } + } + if needUpdate { + if err := r.Update(ctx, existingValidating); err != nil { + return false, err + } else { + changed = true } } // MutatingWebhookConfiguration - mutatingWebhookConfiguration := webhookMutatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret) - existingMutatingWebhookConfiguration := &admissionregistrationv1.MutatingWebhookConfiguration{} - err = r.Get(ctx, types.NamespacedName{Name: mutatingWebhookConfiguration.Name}, existingMutatingWebhookConfiguration) - if err != nil { + mutatingName := webhookMutatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret).GetName() + existingMutating := &admissionregistrationv1.MutatingWebhookConfiguration{} + if err := r.Get(ctx, types.NamespacedName{Name: mutatingName}, existingMutating); err != nil { if errors.IsNotFound(err) { - err := r.Create(ctx, mutatingWebhookConfiguration) - if err != nil && !errors.IsAlreadyExists(err) { // race condition, ignore - return false, err - } - } else { - return false, err + return changed, nil } - } else { - if compareMutatingWebhookConfigurations(existingMutatingWebhookConfiguration, mutatingWebhookConfiguration) { - existingMutatingWebhookConfiguration.Webhooks = mutatingWebhookConfiguration.Webhooks - err := r.Update(ctx, existingMutatingWebhookConfiguration) - if err != nil { - return false, err - } + return false, err + } + + needUpdate = false + for i := range existingMutating.Webhooks { + if len(existingMutating.Webhooks[i].ClientConfig.CABundle) == 0 { + existingMutating.Webhooks[i].ClientConfig.CABundle = caBundle + needUpdate = true + } + } + if needUpdate { + if err := r.Update(ctx, existingMutating); err != nil { + return false, err + } else { + changed = true } } - return false, nil + return changed, nil } // webhookValidatingWebhookConfiguration returns a new validating webhook configuration. @@ -383,6 +391,9 @@ func (r *WebhookController) WebhookSecretReadyzCheck(_ *http.Request) error { validatingWebhookConfiguration := &admissionregistrationv1.ValidatingWebhookConfiguration{} err = r.Get(context.Background(), types.NamespacedName{Name: validatingWebhookName}, validatingWebhookConfiguration) if err != nil { + if errors.IsNotFound(err) { + return fmt.Errorf("ValidatingWebhookConfiguration %q not found. Either disable webhooks (not recommended) or reinstall the operator via the Helm chart to provision webhooks", validatingWebhookName) + } return err } @@ -393,6 +404,9 @@ func (r *WebhookController) WebhookSecretReadyzCheck(_ *http.Request) error { mutatingWebhookConfiguration := webhookMutatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret) err = r.Get(context.Background(), types.NamespacedName{Name: mutatingWebhookConfiguration.Name}, mutatingWebhookConfiguration) if err != nil { + if errors.IsNotFound(err) { + return fmt.Errorf("MutatingWebhookConfiguration %q not found. Either disable webhooks (not recommended) or reinstall the operator via the Helm chart to provision webhooks", mutatingWebhookConfiguration.Name) + } return err } From d3154771946db76506ad6c350f2917413898f812 Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 13:32:00 -0700 Subject: [PATCH 21/26] test --- .github/workflows/security-checkov.yaml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index edf7f65b..094779fd 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -22,7 +22,7 @@ on: paths: - 'chart/**' push: - branches: [main] + branches: [main, checkov-testing] paths: - 'chart/**' @@ -31,10 +31,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - - name: Run Checkov + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: v3.14.4 + + - name: Run Checkov (Helm) uses: bridgecrewio/checkov-action@master with: directory: chart framework: helm output_format: cli + bc_cli_args: --helm-args '--namespace skyhook -f chart/values.yaml' From c41cedc77efa921baa65cae2ed46b5e50c7438dd Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 14:01:06 -0700 Subject: [PATCH 22/26] fix checkov --- .github/workflows/security-checkov.yaml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index 094779fd..c7292f19 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -27,20 +27,17 @@ on: - 'chart/**' jobs: - checkov: + build: + name: checkov runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - - name: Set up Helm - uses: azure/setup-helm@v4 + - uses: actions/setup-python@v5 with: - version: v3.14.4 - - - name: Run Checkov (Helm) + python-version: "3.x" + - name: Test with Checkov + id: checkov uses: bridgecrewio/checkov-action@master with: - directory: chart + directory: ./chart framework: helm - output_format: cli - bc_cli_args: --helm-args '--namespace skyhook -f chart/values.yaml' From 2790927a89a62c0bf710f9477afaa4bf04b22a60 Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 14:02:46 -0700 Subject: [PATCH 23/26] cli output --- .github/workflows/security-checkov.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index c7292f19..1a792f92 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -41,3 +41,4 @@ jobs: with: directory: ./chart framework: helm + output_format: cli From c372cfae6ed1b625e471ece28e53cfec64b91d14 Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Thu, 4 Sep 2025 14:47:18 -0700 Subject: [PATCH 24/26] fix license symlink --- .github/workflows/security-checkov.yaml | 5 +- chart/LICENSE | 203 +++++++++++++++++++++++- 2 files changed, 206 insertions(+), 2 deletions(-) mode change 120000 => 100644 chart/LICENSE diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index 1a792f92..802e19c9 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -38,7 +38,10 @@ jobs: - name: Test with Checkov id: checkov uses: bridgecrewio/checkov-action@master + env: + HELM_NAMESPACE: skyhook with: - directory: ./chart + directory: chart framework: helm output_format: cli + skip_check: CKV2_K8S_6 # not in nspect or local checkov diff --git a/chart/LICENSE b/chart/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/chart/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/chart/LICENSE b/chart/LICENSE new file mode 100644 index 00000000..371c80cd --- /dev/null +++ b/chart/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) NVIDIA CORPORATION. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file From 7fb11803a8df7ee12777de0836277e27238cdcec Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Mon, 8 Sep 2025 10:00:58 -0700 Subject: [PATCH 25/26] address comments --- .github/workflows/security-checkov.yaml | 1 - chart/templates/cleanup-webhook-job.yaml | 4 ---- chart/templates/deployment.yaml | 3 --- chart/templates/leader-election-rbac.yaml | 3 --- chart/templates/metrics-service.yaml | 3 --- chart/templates/mutating-webhook.yaml | 3 --- chart/templates/networkpolicy.yaml | 20 +++++++++++++++++++ chart/templates/serviceaccount.yaml | 3 --- chart/templates/validating-webhook.yaml | 3 --- chart/templates/validations.yaml | 5 +++++ chart/templates/webhook-service.yaml | 3 --- docs/release-process.md | 17 ++++++++++++++++ .../internal/controller/webhook_controller.go | 8 ++++---- 13 files changed, 46 insertions(+), 30 deletions(-) create mode 100644 chart/templates/networkpolicy.yaml create mode 100644 chart/templates/validations.yaml diff --git a/.github/workflows/security-checkov.yaml b/.github/workflows/security-checkov.yaml index 802e19c9..948d3cd1 100644 --- a/.github/workflows/security-checkov.yaml +++ b/.github/workflows/security-checkov.yaml @@ -44,4 +44,3 @@ jobs: directory: chart framework: helm output_format: cli - skip_check: CKV2_K8S_6 # not in nspect or local checkov diff --git a/chart/templates/cleanup-webhook-job.yaml b/chart/templates/cleanup-webhook-job.yaml index e74a6bb0..c0557bba 100644 --- a/chart/templates/cleanup-webhook-job.yaml +++ b/chart/templates/cleanup-webhook-job.yaml @@ -1,7 +1,4 @@ {{- if .Values.webhook.enable }} -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: batch/v1 kind: Job metadata: @@ -49,7 +46,6 @@ spec: readOnlyRootFilesystem: true capabilities: drop: - - NET_RAW - ALL seccompProfile: type: RuntimeDefault diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 439faac6..8046e266 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -1,6 +1,3 @@ -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: apps/v1 kind: Deployment metadata: diff --git a/chart/templates/leader-election-rbac.yaml b/chart/templates/leader-election-rbac.yaml index 79032bbc..d01862e6 100644 --- a/chart/templates/leader-election-rbac.yaml +++ b/chart/templates/leader-election-rbac.yaml @@ -1,6 +1,3 @@ -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: diff --git a/chart/templates/metrics-service.yaml b/chart/templates/metrics-service.yaml index 46d0c18c..708dd615 100644 --- a/chart/templates/metrics-service.yaml +++ b/chart/templates/metrics-service.yaml @@ -1,6 +1,3 @@ -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: v1 kind: Service metadata: diff --git a/chart/templates/mutating-webhook.yaml b/chart/templates/mutating-webhook.yaml index 3a56aa0e..ff819deb 100644 --- a/chart/templates/mutating-webhook.yaml +++ b/chart/templates/mutating-webhook.yaml @@ -1,7 +1,4 @@ {{- if .Values.webhook.enable }} -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: diff --git a/chart/templates/networkpolicy.yaml b/chart/templates/networkpolicy.yaml new file mode 100644 index 00000000..5a11abe2 --- /dev/null +++ b/chart/templates/networkpolicy.yaml @@ -0,0 +1,20 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "chart.fullname" . }}-controller-manager-allow-all + namespace: "{{ .Release.Namespace }}" + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + control-plane: controller-manager + {{- include "chart.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - {} + egress: + - {} + diff --git a/chart/templates/serviceaccount.yaml b/chart/templates/serviceaccount.yaml index 9ac987b1..7a785749 100644 --- a/chart/templates/serviceaccount.yaml +++ b/chart/templates/serviceaccount.yaml @@ -1,6 +1,3 @@ -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: v1 kind: ServiceAccount metadata: diff --git a/chart/templates/validating-webhook.yaml b/chart/templates/validating-webhook.yaml index daf6e9d2..cdb75d9f 100644 --- a/chart/templates/validating-webhook.yaml +++ b/chart/templates/validating-webhook.yaml @@ -1,7 +1,4 @@ {{- if .Values.webhook.enable }} -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: diff --git a/chart/templates/validations.yaml b/chart/templates/validations.yaml new file mode 100644 index 00000000..bb4e2185 --- /dev/null +++ b/chart/templates/validations.yaml @@ -0,0 +1,5 @@ +{{- if eq .Release.Namespace "default" }} +{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} +{{- end }} + + diff --git a/chart/templates/webhook-service.yaml b/chart/templates/webhook-service.yaml index 7339abbe..8cdc2e12 100644 --- a/chart/templates/webhook-service.yaml +++ b/chart/templates/webhook-service.yaml @@ -1,6 +1,3 @@ -{{- if eq .Release.Namespace "default" }} -{{- fail "Deployment to 'default' namespace is not allowed for security reasons. Please specify a different namespace." }} -{{- end }} apiVersion: v1 kind: Service metadata: diff --git a/docs/release-process.md b/docs/release-process.md index 8043d09e..b31dce2e 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -129,6 +129,23 @@ Fetch a multi-arch digest (example for bitnami/kubectl used by the webhook clean docker-buildx imagetools inspect bitnami/kubectl:1.33.1 ``` +Example output (look for the top-level Digest): + +``` +Name: docker.io/bitnami/kubectl:1.33.1 +MediaType: application/vnd.docker.distribution.manifest.list.v2+json +Digest: sha256:9081a6f83f4febf47369fc46b6f0f7683c7db243df5b43fc9defe51b0471a950 + +Manifests: + Name: docker.io/bitnami/kubectl:1.33.1@sha256:c8efec87588c7a2d84c760d54446b2e081e607a709f16f19283774d5612191b7 + MediaType: application/vnd.docker.distribution.manifest.v2+json + Platform: linux/amd64 + + Name: docker.io/bitnami/kubectl:1.33.1@sha256:2af8ed9feaeada845f4d60f1fe4db951df2e5334ea01bec4b5ef4f191ad20d65 + MediaType: application/vnd.docker.distribution.manifest.v2+json + Platform: linux/arm64 +``` + Update the digest in `chart/values.yaml` for kube-rbac-proxy, operator, and agent images: Note: diff --git a/operator/internal/controller/webhook_controller.go b/operator/internal/controller/webhook_controller.go index c0890443..dd64f3af 100644 --- a/operator/internal/controller/webhook_controller.go +++ b/operator/internal/controller/webhook_controller.go @@ -225,9 +225,9 @@ func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Conte existingValidating := &admissionregistrationv1.ValidatingWebhookConfiguration{} if err := r.Get(ctx, types.NamespacedName{Name: validatingName}, existingValidating); err != nil { if errors.IsNotFound(err) { - return false, nil + return false, fmt.Errorf("ValidatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", validatingName, err) } - return false, err + return false, fmt.Errorf("failed to get ValidatingWebhookConfiguration %q: %w", validatingName, err) } needUpdate := false @@ -250,9 +250,9 @@ func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Conte existingMutating := &admissionregistrationv1.MutatingWebhookConfiguration{} if err := r.Get(ctx, types.NamespacedName{Name: mutatingName}, existingMutating); err != nil { if errors.IsNotFound(err) { - return changed, nil + return changed, fmt.Errorf("MutatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", mutatingName, err) } - return false, err + return false, fmt.Errorf("failed to get MutatingWebhookConfiguration %q: %w", mutatingName, err) } needUpdate = false From 5a50017e5a296e042d48df4cb17c4dc03038ba9f Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Mon, 8 Sep 2025 10:44:06 -0700 Subject: [PATCH 26/26] move all validations in deployment to specific file --- chart/templates/deployment.yaml | 7 +------ chart/templates/validations.yaml | 13 +++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 8046e266..bedca2a7 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -25,9 +25,7 @@ spec: annotations: kubectl.kubernetes.io/default-container: manager spec: - {{- if and .Values.controllerManager.selectors .Values.controllerManager.nodeAffinity.matchExpressions }} - {{- fail "Error: Cannot specify both controllerManager.selectors and controllerManager.nodeAffinity.matchExpressions. Use nodeAffinity.matchExpressions for complex node selection or selectors for simple key-value matching." }} - {{- end }} + automountServiceAccountToken: false affinity: nodeAffinity: @@ -198,9 +196,6 @@ spec: serviceAccountName: {{ include "chart.fullname" . }}-controller-manager terminationGracePeriodSeconds: 10 {{ if ((.Values.controllerManager.podDisruptionBudget).minAvailable) }} -{{ if ge .Values.controllerManager.podDisruptionBudget.minAvailable .Values.controllerManager.replicas }} -{{- $_ := required "minAvailable to be less than replicas" .nil }} -{{ end }} --- apiVersion: policy/v1 kind: PodDisruptionBudget diff --git a/chart/templates/validations.yaml b/chart/templates/validations.yaml index bb4e2185..e56f14ad 100644 --- a/chart/templates/validations.yaml +++ b/chart/templates/validations.yaml @@ -3,3 +3,16 @@ {{- end }} +{{- /* Prevent conflicting node selection config */ -}} +{{- if and .Values.controllerManager.selectors .Values.controllerManager.nodeAffinity.matchExpressions }} +{{- fail "Error: Cannot specify both controllerManager.selectors and controllerManager.nodeAffinity.matchExpressions. Use nodeAffinity.matchExpressions for complex node selection or selectors for simple key-value matching." }} +{{- end }} + +{{- /* Validate PodDisruptionBudget: minAvailable must be < replicas */ -}} +{{- if ((.Values.controllerManager.podDisruptionBudget).minAvailable) -}} + {{- if ge .Values.controllerManager.podDisruptionBudget.minAvailable .Values.controllerManager.replicas -}} + {{- fail "Error: controllerManager.podDisruptionBudget.minAvailable must be less than controllerManager.replicas" -}} + {{- end -}} +{{- end -}} + +