From 4830eb6215ad32814a5f8d7a7bb2afbf8ef82127 Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Wed, 24 Sep 2025 11:30:21 -0700 Subject: [PATCH 1/2] feat(crd): add deployment policy --- chart/templates/deploymentpolicy-crd.yaml | 278 ++++++++++++++++++ chart/templates/skyhook-crd.yaml | 15 + .../api/v1alpha1/deployment_policy_types.go | 156 ++++++++++ operator/api/v1alpha1/skyhook_types.go | 4 + .../api/v1alpha1/zz_generated.deepcopy.go | 196 +++++++++++- ...skyhook.nvidia.com_deploymentpolicies.yaml | 278 ++++++++++++++++++ .../bases/skyhook.nvidia.com_skyhooks.yaml | 4 + 7 files changed, 929 insertions(+), 2 deletions(-) create mode 100644 chart/templates/deploymentpolicy-crd.yaml create mode 100644 operator/api/v1alpha1/deployment_policy_types.go create mode 100644 operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml diff --git a/chart/templates/deploymentpolicy-crd.yaml b/chart/templates/deploymentpolicy-crd.yaml new file mode 100644 index 00000000..538dd204 --- /dev/null +++ b/chart/templates/deploymentpolicy-crd.yaml @@ -0,0 +1,278 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: deploymentpolicies.skyhook.nvidia.com +spec: + group: skyhook.nvidia.com + names: + kind: DeploymentPolicy + listKind: DeploymentPolicyList + plural: deploymentpolicies + singular: deploymentpolicy + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: DeploymentPolicy configures safe rollout defaults and compartment + overrides + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: DeploymentPolicySpec defines rollout ceilings/strategy by + default and per-compartment + properties: + compartments: + description: Compartments, each with selector and budget; optional + strategy + items: + description: Compartment defines a named selector with its own ceiling + and optional strategy + properties: + budget: + description: Exactly one of percent or count + properties: + count: + minimum: 1 + type: integer + percent: + maximum: 100 + minimum: 1 + type: integer + type: object + name: + description: Unique name within the policy + minLength: 1 + type: string + selector: + description: Selector defining the nodes in this compartment + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + strategy: + description: Optional per-compartment strategy override + properties: + exponential: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + growthFactor: + minimum: 2 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + fixed: + description: Strategy parameters + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + linear: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + delta: + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + type: object + required: + - budget + - name + - selector + type: object + type: array + default: + description: Default budget/strategy for unmatched nodes + properties: + budget: + description: Exactly one of percent or count + properties: + count: + minimum: 1 + type: integer + percent: + maximum: 100 + minimum: 1 + type: integer + type: object + strategy: + description: Strategy to use + properties: + exponential: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + growthFactor: + minimum: 2 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + fixed: + description: Strategy parameters + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + linear: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + delta: + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + type: object + type: object + required: + - default + type: object + type: object + served: true + storage: true diff --git a/chart/templates/skyhook-crd.yaml b/chart/templates/skyhook-crd.yaml index bbe1d664..6147596c 100644 --- a/chart/templates/skyhook-crd.yaml +++ b/chart/templates/skyhook-crd.yaml @@ -105,6 +105,21 @@ spec: type: string type: object type: array + deploymentPolicy: + description: DeploymentPolicy is the name of a DeploymentPolicy for + rollout settings + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic interruptionBudget: description: InterruptionBudget configures how many nodes that match node selectors that allowed to be interrupted at once. diff --git a/operator/api/v1alpha1/deployment_policy_types.go b/operator/api/v1alpha1/deployment_policy_types.go new file mode 100644 index 00000000..6ebf3d1e --- /dev/null +++ b/operator/api/v1alpha1/deployment_policy_types.go @@ -0,0 +1,156 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Strategy parameters +type FixedStrategy struct { + // +kubebuilder:validation:Minimum=1 + // +optional + InitialBatch int `json:"initialBatch,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + BatchThreshold int `json:"batchThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +optional + FailureThreshold int `json:"failureThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + SafetyLimit int `json:"safetyLimit,omitempty"` +} + +type LinearStrategy struct { + // +kubebuilder:validation:Minimum=1 + // +optional + InitialBatch int `json:"initialBatch,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +optional + Delta int `json:"delta,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + BatchThreshold int `json:"batchThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +optional + FailureThreshold int `json:"failureThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + SafetyLimit int `json:"safetyLimit,omitempty"` +} + +type ExponentialStrategy struct { + // +kubebuilder:validation:Minimum=1 + // +optional + InitialBatch int `json:"initialBatch,omitempty"` + // +kubebuilder:validation:Minimum=2 + // +optional + GrowthFactor int `json:"growthFactor,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + BatchThreshold int `json:"batchThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +optional + FailureThreshold int `json:"failureThreshold,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + SafetyLimit int `json:"safetyLimit,omitempty"` +} + +// DeploymentStrategy is a single-key sum-type: exactly one of fixed|linear|exponential must be set +type DeploymentStrategy struct { + // +optional + Fixed *FixedStrategy `json:"fixed,omitempty"` + // +optional + Linear *LinearStrategy `json:"linear,omitempty"` + // +optional + Exponential *ExponentialStrategy `json:"exponential,omitempty"` +} + +// Budget ceiling either in percent or count +type DeploymentBudget struct { + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +optional + Percent *int `json:"percent,omitempty"` + // +kubebuilder:validation:Minimum=1 + // +optional + Count *int `json:"count,omitempty"` +} + +// PolicyDefault defines default budget and strategy for unmatched nodes +type PolicyDefault struct { + // Exactly one of percent or count + Budget DeploymentBudget `json:"budget,omitempty"` + // Strategy to use + // +optional + Strategy *DeploymentStrategy `json:"strategy,omitempty"` +} + +// Compartment defines a named selector with its own ceiling and optional strategy +type Compartment struct { + // Unique name within the policy + // +kubebuilder:validation:MinLength=1 + Name string `json:"name"` + // Selector defining the nodes in this compartment + Selector metav1.LabelSelector `json:"selector"` + // Exactly one of percent or count + Budget DeploymentBudget `json:"budget"` + // Optional per-compartment strategy override + // +optional + Strategy *DeploymentStrategy `json:"strategy,omitempty"` +} + +// DeploymentPolicySpec defines rollout ceilings/strategy by default and per-compartment +type DeploymentPolicySpec struct { + // Default budget/strategy for unmatched nodes + Default PolicyDefault `json:"default"` + // Compartments, each with selector and budget; optional strategy + // +optional + Compartments []Compartment `json:"compartments,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Cluster + +// DeploymentPolicy configures safe rollout defaults and compartment overrides +type DeploymentPolicy struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec DeploymentPolicySpec `json:"spec,omitempty"` +} + +// +kubebuilder:object:root=true + +func init() { + SchemeBuilder.Register(&DeploymentPolicy{}) +} diff --git a/operator/api/v1alpha1/skyhook_types.go b/operator/api/v1alpha1/skyhook_types.go index 77ac7823..9cb4489d 100644 --- a/operator/api/v1alpha1/skyhook_types.go +++ b/operator/api/v1alpha1/skyhook_types.go @@ -56,6 +56,10 @@ type SkyhookSpec struct { // NodeSelector are a set of labels we want to monitor nodes for applying packages too NodeSelector metav1.LabelSelector `json:"nodeSelectors,omitempty"` + // DeploymentPolicy is the name of a DeploymentPolicy for rollout settings + // +optional + DeploymentPolicy string `json:"deploymentPolicy,omitempty"` + // InterruptionBudget configures how many nodes that match node selectors that allowed to be interrupted at once. InterruptionBudget InterruptionBudget `json:"interruptionBudget,omitempty"` diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index 86a58578..86acb6ae 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1,5 +1,3 @@ -//go:build !ignore_autogenerated - /* * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 @@ -18,6 +16,8 @@ * limitations under the License. */ +//go:build !ignore_autogenerated + // Code generated by controller-gen. DO NOT EDIT. package v1alpha1 @@ -28,6 +28,162 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Compartment) DeepCopyInto(out *Compartment) { + *out = *in + in.Selector.DeepCopyInto(&out.Selector) + in.Budget.DeepCopyInto(&out.Budget) + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(DeploymentStrategy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Compartment. +func (in *Compartment) DeepCopy() *Compartment { + if in == nil { + return nil + } + out := new(Compartment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentBudget) DeepCopyInto(out *DeploymentBudget) { + *out = *in + if in.Percent != nil { + in, out := &in.Percent, &out.Percent + *out = new(int) + **out = **in + } + if in.Count != nil { + in, out := &in.Count, &out.Count + *out = new(int) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentBudget. +func (in *DeploymentBudget) DeepCopy() *DeploymentBudget { + if in == nil { + return nil + } + out := new(DeploymentBudget) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentPolicy) DeepCopyInto(out *DeploymentPolicy) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentPolicy. +func (in *DeploymentPolicy) DeepCopy() *DeploymentPolicy { + if in == nil { + return nil + } + out := new(DeploymentPolicy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *DeploymentPolicy) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentPolicySpec) DeepCopyInto(out *DeploymentPolicySpec) { + *out = *in + in.Default.DeepCopyInto(&out.Default) + if in.Compartments != nil { + in, out := &in.Compartments, &out.Compartments + *out = make([]Compartment, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentPolicySpec. +func (in *DeploymentPolicySpec) DeepCopy() *DeploymentPolicySpec { + if in == nil { + return nil + } + out := new(DeploymentPolicySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentStrategy) DeepCopyInto(out *DeploymentStrategy) { + *out = *in + if in.Fixed != nil { + in, out := &in.Fixed, &out.Fixed + *out = new(FixedStrategy) + **out = **in + } + if in.Linear != nil { + in, out := &in.Linear, &out.Linear + *out = new(LinearStrategy) + **out = **in + } + if in.Exponential != nil { + in, out := &in.Exponential, &out.Exponential + *out = new(ExponentialStrategy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentStrategy. +func (in *DeploymentStrategy) DeepCopy() *DeploymentStrategy { + if in == nil { + return nil + } + out := new(DeploymentStrategy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExponentialStrategy) DeepCopyInto(out *ExponentialStrategy) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExponentialStrategy. +func (in *ExponentialStrategy) DeepCopy() *ExponentialStrategy { + if in == nil { + return nil + } + out := new(ExponentialStrategy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FixedStrategy) DeepCopyInto(out *FixedStrategy) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FixedStrategy. +func (in *FixedStrategy) DeepCopy() *FixedStrategy { + if in == nil { + return nil + } + out := new(FixedStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Interrupt) DeepCopyInto(out *Interrupt) { *out = *in @@ -73,6 +229,21 @@ func (in *InterruptionBudget) DeepCopy() *InterruptionBudget { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LinearStrategy) DeepCopyInto(out *LinearStrategy) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LinearStrategy. +func (in *LinearStrategy) DeepCopy() *LinearStrategy { + if in == nil { + return nil + } + out := new(LinearStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in NodeState) DeepCopyInto(out *NodeState) { { @@ -204,6 +375,27 @@ func (in Packages) DeepCopy() Packages { return *out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PolicyDefault) DeepCopyInto(out *PolicyDefault) { + *out = *in + in.Budget.DeepCopyInto(&out.Budget) + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(DeploymentStrategy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PolicyDefault. +func (in *PolicyDefault) DeepCopy() *PolicyDefault { + if in == nil { + return nil + } + out := new(PolicyDefault) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) { *out = *in diff --git a/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml new file mode 100644 index 00000000..538dd204 --- /dev/null +++ b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml @@ -0,0 +1,278 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: deploymentpolicies.skyhook.nvidia.com +spec: + group: skyhook.nvidia.com + names: + kind: DeploymentPolicy + listKind: DeploymentPolicyList + plural: deploymentpolicies + singular: deploymentpolicy + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: DeploymentPolicy configures safe rollout defaults and compartment + overrides + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: DeploymentPolicySpec defines rollout ceilings/strategy by + default and per-compartment + properties: + compartments: + description: Compartments, each with selector and budget; optional + strategy + items: + description: Compartment defines a named selector with its own ceiling + and optional strategy + properties: + budget: + description: Exactly one of percent or count + properties: + count: + minimum: 1 + type: integer + percent: + maximum: 100 + minimum: 1 + type: integer + type: object + name: + description: Unique name within the policy + minLength: 1 + type: string + selector: + description: Selector defining the nodes in this compartment + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + strategy: + description: Optional per-compartment strategy override + properties: + exponential: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + growthFactor: + minimum: 2 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + fixed: + description: Strategy parameters + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + linear: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + delta: + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + type: object + required: + - budget + - name + - selector + type: object + type: array + default: + description: Default budget/strategy for unmatched nodes + properties: + budget: + description: Exactly one of percent or count + properties: + count: + minimum: 1 + type: integer + percent: + maximum: 100 + minimum: 1 + type: integer + type: object + strategy: + description: Strategy to use + properties: + exponential: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + growthFactor: + minimum: 2 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + fixed: + description: Strategy parameters + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + linear: + properties: + batchThreshold: + maximum: 100 + minimum: 1 + type: integer + delta: + minimum: 1 + type: integer + failureThreshold: + minimum: 1 + type: integer + initialBatch: + minimum: 1 + type: integer + safetyLimit: + maximum: 100 + minimum: 1 + type: integer + type: object + type: object + type: object + required: + - default + type: object + type: object + served: true + storage: true diff --git a/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml b/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml index 6062b4d9..43df1e9f 100644 --- a/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml +++ b/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml @@ -110,6 +110,10 @@ spec: type: string type: object type: array + deploymentPolicy: + description: DeploymentPolicy is the name of a DeploymentPolicy for + rollout settings + type: string interruptionBudget: description: InterruptionBudget configures how many nodes that match node selectors that allowed to be interrupted at once. From 3326f54855ba2d7543306af324273e4ac3c247dc Mon Sep 17 00:00:00 2001 From: Tommy Lam Date: Wed, 24 Sep 2025 15:07:49 -0700 Subject: [PATCH 2/2] add deployment policy to project file --- chart/templates/deploymentpolicy-crd.yaml | 2 +- operator/PROJECT | 8 ++++++++ operator/api/v1alpha1/deployment_policy_types.go | 2 +- .../crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/chart/templates/deploymentpolicy-crd.yaml b/chart/templates/deploymentpolicy-crd.yaml index 538dd204..59297763 100644 --- a/chart/templates/deploymentpolicy-crd.yaml +++ b/chart/templates/deploymentpolicy-crd.yaml @@ -28,7 +28,7 @@ spec: listKind: DeploymentPolicyList plural: deploymentpolicies singular: deploymentpolicy - scope: Cluster + scope: Namespaced versions: - name: v1alpha1 schema: diff --git a/operator/PROJECT b/operator/PROJECT index c06c3408..080268d5 100644 --- a/operator/PROJECT +++ b/operator/PROJECT @@ -26,4 +26,12 @@ resources: kind: Pod path: k8s.io/api/core/v1 version: v1 +- api: + crdVersion: v1 + namespaced: true + domain: nvidia.com + group: skyhook + kind: DeploymentPolicy + path: github.com/NVIDIA/skyhook/operator/api/v1alpha1 + version: v1alpha1 version: "3" diff --git a/operator/api/v1alpha1/deployment_policy_types.go b/operator/api/v1alpha1/deployment_policy_types.go index 6ebf3d1e..629cec15 100644 --- a/operator/api/v1alpha1/deployment_policy_types.go +++ b/operator/api/v1alpha1/deployment_policy_types.go @@ -139,7 +139,7 @@ type DeploymentPolicySpec struct { } // +kubebuilder:object:root=true -// +kubebuilder:resource:scope=Cluster +// +kubebuilder:resource:scope=Namespaced // DeploymentPolicy configures safe rollout defaults and compartment overrides type DeploymentPolicy struct { diff --git a/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml index 538dd204..59297763 100644 --- a/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml +++ b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml @@ -28,7 +28,7 @@ spec: listKind: DeploymentPolicyList plural: deploymentpolicies singular: deploymentpolicy - scope: Cluster + scope: Namespaced versions: - name: v1alpha1 schema: