diff --git a/chart/templates/skyhook-crd.yaml b/chart/templates/skyhook-crd.yaml index 89112945..4eb332ea 100644 --- a/chart/templates/skyhook-crd.yaml +++ b/chart/templates/skyhook-crd.yaml @@ -231,6 +231,13 @@ spec: The keys stored in Data must not overlap with the keys in the BinaryData field, this is enforced during validation process. type: object + containerSHA: + description: |- + ContainerSHA is the SHA256 digest of the container image for verification purposes. + When specified, this will be used instead of the version tag to pull the exact image. + Format: sha256:1234567890abcdef... + example: sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef + type: string dependsOn: additionalProperties: type: string @@ -694,6 +701,10 @@ spec: additionalProperties: additionalProperties: properties: + containerSHA: + description: ContainerSHA is the SHA256 digest that was actually + deployed + type: string image: description: Image for the package type: string diff --git a/k8s-tests/chainsaw/skyhook/simple-skyhook/assert.yaml b/k8s-tests/chainsaw/skyhook/simple-skyhook/assert.yaml index 1740cf02..2872fe9b 100644 --- a/k8s-tests/chainsaw/skyhook/simple-skyhook/assert.yaml +++ b/k8s-tests/chainsaw/skyhook/simple-skyhook/assert.yaml @@ -34,6 +34,7 @@ metadata: "name": "foobar", "version": "1.2", "image": "ghcr.io/nvidia/skyhook/agentless", + "containerSHA": "sha256:a9cc86f68d73e874bf2dfbabab5e8a8f710ad78411a03764d1b42aa1b5cda7dd", "stage": "config", "state": "complete" }, @@ -74,6 +75,7 @@ status: state: complete version: '1.2' image: ghcr.io/nvidia/skyhook/agentless + containerSHA: sha256:a9cc86f68d73e874bf2dfbabab5e8a8f710ad78411a03764d1b42aa1b5cda7dd stage: config spencer|3.2.3: name: spencer diff --git a/k8s-tests/chainsaw/skyhook/simple-skyhook/skyhook.yaml b/k8s-tests/chainsaw/skyhook/simple-skyhook/skyhook.yaml index 9918ff26..69ffeeff 100644 --- a/k8s-tests/chainsaw/skyhook/simple-skyhook/skyhook.yaml +++ b/k8s-tests/chainsaw/skyhook/simple-skyhook/skyhook.yaml @@ -42,6 +42,7 @@ spec: foobar: version: "1.2" image: ghcr.io/nvidia/skyhook/agentless + containerSHA: "sha256:a9cc86f68d73e874bf2dfbabab5e8a8f710ad78411a03764d1b42aa1b5cda7dd" dependsOn: dexter: "1.2.3" env: diff --git a/operator/api/v1alpha1/skyhook_types.go b/operator/api/v1alpha1/skyhook_types.go index 9375be25..47518af1 100644 --- a/operator/api/v1alpha1/skyhook_types.go +++ b/operator/api/v1alpha1/skyhook_types.go @@ -196,6 +196,13 @@ type Package struct { //+kubebuilder:validation:Required Image string `json:"image"` + // ContainerSHA is the SHA256 digest of the container image for verification purposes. + // When specified, this will be used instead of the version tag to pull the exact image. + // Format: sha256:1234567890abcdef... + //+kubebuilder:example="sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + //+optional + ContainerSHA string `json:"containerSHA,omitempty"` + // Agent Image Override is the container image to override at the package level. Full qualified image with tag. // This overrides the image provided via ENV to the operator. //+kubebuilder:example="alpine:3.21.0" @@ -365,19 +372,20 @@ type SkyhookStatus struct { type NodeState map[string]PackageStatus // Upsert adds or updates specified state for package in the node state -func (ns *NodeState) Upsert(_package PackageRef, image string, state State, stage Stage, restarts int32) bool { +func (ns *NodeState) Upsert(_package PackageRef, image string, state State, stage Stage, restarts int32, containerSHA string) bool { if *ns == nil { *ns = make(map[string]PackageStatus) } status := PackageStatus{ - Name: _package.Name, - Version: _package.Version, - State: state, - Image: image, - Stage: stage, - Restarts: restarts, + Name: _package.Name, + Version: _package.Version, + State: state, + Image: image, + Stage: stage, + Restarts: restarts, + ContainerSHA: containerSHA, } existing, ok := (*ns)[_package.GetUniqueName()] @@ -567,6 +575,10 @@ type PackageStatus struct { //+kubebuilder:validation:Required Image string `json:"image"` + // ContainerSHA is the SHA256 digest that was actually deployed + //+optional + ContainerSHA string `json:"containerSHA,omitempty"` + // Stage is where in the package install process is currently for a node. // these stages encapsulate checks. Both Apply and PostInterrupt also run checks, // these are all or nothing, meaning both need to be successful in order to transition @@ -583,10 +595,11 @@ type PackageStatus struct { Restarts int32 `json:"restarts,omitempty"` } -// Equal checks name, version, state, state (not restarts) +// Equal checks name, version, image, containerSHA, stage, state, and restarts func (left *PackageStatus) Equal(right *PackageStatus) bool { return left.Name == right.Name && left.Version == right.Version && + left.ContainerSHA == right.ContainerSHA && left.Stage == right.Stage && left.State == right.State && left.Restarts == right.Restarts diff --git a/operator/api/v1alpha1/skyhook_types_test.go b/operator/api/v1alpha1/skyhook_types_test.go index 097b7f15..b82d09de 100644 --- a/operator/api/v1alpha1/skyhook_types_test.go +++ b/operator/api/v1alpha1/skyhook_types_test.go @@ -298,19 +298,19 @@ var _ = Describe("Skyhook Types", func() { Expect(nodeState.Upsert(PackageRef{ Name: "foo", Version: "1.2.3", - }, "", StateComplete, stage, 2)).To(BeTrue()) + }, "", StateComplete, stage, 2, "")).To(BeTrue()) Expect(nodeState.Upsert(PackageRef{ Name: "bar", Version: "2.3", - }, "", StateComplete, stage, 2)).To(BeTrue()) + }, "", StateComplete, stage, 2, "")).To(BeTrue()) Expect(nodeState.Upsert(PackageRef{ // replace Name: "bar", Version: "2", - }, "", StateComplete, stage, 2)).To(BeTrue()) + }, "", StateComplete, stage, 2, "")).To(BeTrue()) Expect(nodeState.Upsert(PackageRef{ // exists Name: "bar", Version: "2", - }, "", StateComplete, stage, 2)).To(BeFalse()) + }, "", StateComplete, stage, 2, "")).To(BeFalse()) interrupts := map[string][]*Interrupt{} configUpdates := map[string][]string{} diff --git a/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml b/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml index bbecc69c..d273abfd 100644 --- a/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml +++ b/operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml @@ -225,6 +225,13 @@ spec: The keys stored in Data must not overlap with the keys in the BinaryData field, this is enforced during validation process. type: object + containerSHA: + description: |- + ContainerSHA is the SHA256 digest of the container image for verification purposes. + When specified, this will be used instead of the version tag to pull the exact image. + Format: sha256:1234567890abcdef... + example: sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef + type: string dependsOn: additionalProperties: type: string @@ -695,6 +702,10 @@ spec: additionalProperties: additionalProperties: properties: + containerSHA: + description: ContainerSHA is the SHA256 digest that was actually + deployed + type: string image: description: Image for the package type: string diff --git a/operator/internal/controller/annotations.go b/operator/internal/controller/annotations.go index 35a5c1a2..a754e941 100644 --- a/operator/internal/controller/annotations.go +++ b/operator/internal/controller/annotations.go @@ -31,6 +31,7 @@ type PackageSkyhook struct { Skyhook string `json:"skyhook"` Stage v1alpha1.Stage `json:"stage"` Image string `json:"image"` + ContainerSHA string `json:"containerSHA,omitempty"` Invalid bool `json:"invalid,omitempty"` } @@ -59,10 +60,11 @@ func SetPackages(pod *corev1.Pod, skyhook *v1alpha1.Skyhook, image string, stage } strk := &PackageSkyhook{ - Skyhook: skyhook.Name, - Stage: stage, - PackageRef: _package.PackageRef, - Image: image, + Skyhook: skyhook.Name, + Stage: stage, + PackageRef: _package.PackageRef, + Image: image, + ContainerSHA: _package.ContainerSHA, } data, err := json.Marshal(strk) diff --git a/operator/internal/controller/pod_controller.go b/operator/internal/controller/pod_controller.go index 26ce6790..f36c62ae 100644 --- a/operator/internal/controller/pod_controller.go +++ b/operator/internal/controller/pod_controller.go @@ -164,7 +164,7 @@ func (r *SkyhookReconciler) UpdateNodeState(ctx context.Context, pod *corev1.Pod } if !updated { - err = skyhookNode.Upsert(packagePtr.PackageRef, packagePtr.Image, state, packagePtr.Stage, restarts) + err = skyhookNode.Upsert(packagePtr.PackageRef, packagePtr.Image, state, packagePtr.Stage, restarts, packagePtr.ContainerSHA) if err != nil { return false, err } @@ -240,7 +240,7 @@ func (r *SkyhookReconciler) HandleCompletePod(ctx context.Context, skyhookNode w if exists { // If the uninstall was caused by a version changed progress forward the new version that was waiting // on the uninstall to finish - err = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateComplete, v1alpha1.StageUninstall, 0) + err = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateComplete, v1alpha1.StageUninstall, 0, _package.ContainerSHA) if err != nil { return false, fmt.Errorf("error updating node status: %w", err) } diff --git a/operator/internal/controller/skyhook_controller.go b/operator/internal/controller/skyhook_controller.go index 94413db9..11aa2b69 100644 --- a/operator/internal/controller/skyhook_controller.go +++ b/operator/internal/controller/skyhook_controller.go @@ -707,7 +707,7 @@ func HandleVersionChange(skyhook SkyhookNodes) ([]*v1alpha1.Package, error) { if !exists && packageStatus.Stage != v1alpha1.StageUninstall { // Start uninstall of old package - err := node.Upsert(packageStatusRef, packageStatus.Image, v1alpha1.StateInProgress, v1alpha1.StageUninstall, 0) + err := node.Upsert(packageStatusRef, packageStatus.Image, v1alpha1.StateInProgress, v1alpha1.StageUninstall, 0, "") if err != nil { return nil, fmt.Errorf("error updating node status: %w", err) } @@ -724,7 +724,7 @@ func HandleVersionChange(skyhook SkyhookNodes) ([]*v1alpha1.Package, error) { } // start upgrade of package - err := node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageUpgrade, 0) + err := node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageUpgrade, 0, _package.ContainerSHA) if err != nil { return nil, fmt.Errorf("error updating node status: %w", err) } @@ -732,13 +732,13 @@ func HandleVersionChange(skyhook SkyhookNodes) ([]*v1alpha1.Package, error) { upgrade = true } else if comparison == -1 && packageStatus.Stage != v1alpha1.StageUninstall { // Start uninstall of old package - err := node.Upsert(packageStatusRef, packageStatus.Image, v1alpha1.StateInProgress, v1alpha1.StageUninstall, 0) + err := node.Upsert(packageStatusRef, packageStatus.Image, v1alpha1.StateInProgress, v1alpha1.StageUninstall, 0, "") if err != nil { return nil, fmt.Errorf("error updating node status: %w", err) } // If version changed then update new version to wait - err = node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateSkipped, v1alpha1.StageUninstall, 0) + err = node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateSkipped, v1alpha1.StageUninstall, 0, _package.ContainerSHA) if err != nil { return nil, fmt.Errorf("error updating node status: %w", err) } @@ -949,7 +949,7 @@ func (r *SkyhookReconciler) HandleConfigUpdates(ctx context.Context, clusterStat skyhook.GetSkyhook().AddConfigUpdates(_package.Name, newConfigUpdates...) for _, node := range skyhook.GetNodes() { - err := node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageConfig, 0) + err := node.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageConfig, 0, _package.ContainerSHA) if err != nil { return false, fmt.Errorf("error upserting node status [%s]: %w", node.GetNode().Name, err) } @@ -1291,7 +1291,7 @@ func (r *SkyhookReconciler) Interrupt(ctx context.Context, skyhookNode wrapper.S return fmt.Errorf("error creating interruption pod: %w", err) } - _ = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageInterrupt, 0) + _ = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, v1alpha1.StageInterrupt, 0, _package.ContainerSHA) r.recorder.Eventf(skyhookNode.GetSkyhook().Skyhook, EventTypeNormal, EventsReasonSkyhookInterrupt, "Interrupting node [%s] package [%s:%s] from [skyhook:%s]", @@ -1603,6 +1603,16 @@ func getAgentImage(opts SkyhookOperatorOptions, _package *v1alpha1.Package) stri return opts.AgentImage } +// getPackageImage returns the full image reference for a package, using the digest if specified +func getPackageImage(_package *v1alpha1.Package) string { + if _package.ContainerSHA != "" { + // When containerSHA is specified, use it instead of the version tag for immutable image reference + return fmt.Sprintf("%s@%s", _package.Image, _package.ContainerSHA) + } + // Fall back to version tag + return fmt.Sprintf("%s:%s", _package.Image, _package.Version) +} + func getAgentConfigEnvVars(opts SkyhookOperatorOptions, packageName string, packageVersion string, resourceID string, skyhookName string) []corev1.EnvVar { return []corev1.EnvVar{ { @@ -1712,7 +1722,7 @@ func createPodFromPackage(opts SkyhookOperatorOptions, _package *v1alpha1.Packag InitContainers: []corev1.Container{ { Name: fmt.Sprintf("%s-init", trunstr(_package.Name, 43)), - Image: fmt.Sprintf("%s:%s", _package.Image, _package.Version), + Image: getPackageImage(_package), ImagePullPolicy: "Always", Command: []string{"/bin/sh"}, Args: []string{ @@ -2070,7 +2080,7 @@ func (r *SkyhookReconciler) ProcessInterrupt(ctx context.Context, skyhookNode wr //skipping if stage == v1alpha1.StageInterrupt && !runInterrupt { - err := skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateSkipped, stage, 0) + err := skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateSkipped, stage, 0, _package.ContainerSHA) if err != nil { return false, fmt.Errorf("error upserting to skip interrupt: %w", err) } @@ -2185,7 +2195,7 @@ func (r *SkyhookReconciler) ApplyPackage(ctx context.Context, logger logr.Logger return fmt.Errorf("error creating pod: %w", err) } - if err = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, stage, 0); err != nil { + if err = skyhookNode.Upsert(_package.PackageRef, _package.Image, v1alpha1.StateInProgress, stage, 0, _package.ContainerSHA); err != nil { err = fmt.Errorf("error upserting package: %w", err) // want to keep going in this case, but don't want to lose the err } diff --git a/operator/internal/wrapper/node.go b/operator/internal/wrapper/node.go index 8e6e8a45..9dd6576a 100644 --- a/operator/internal/wrapper/node.go +++ b/operator/internal/wrapper/node.go @@ -62,7 +62,7 @@ type SkyhookNodeOnly interface { State() (v1alpha1.NodeState, error) SetState(state v1alpha1.NodeState) error RemoveState(_package v1alpha1.PackageRef) error - Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32) error + Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32, containerSHA string) error GetNode() *corev1.Node Taint(key string) RemoveTaint(key string) @@ -283,8 +283,8 @@ func (node *skyhookNode) RemoveState(_package v1alpha1.PackageRef) error { return nil } -func (node *skyhookNode) Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32) error { - changed := node.nodeState.Upsert(_package, image, state, stage, restarts) +func (node *skyhookNode) Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32, containerSHA string) error { + changed := node.nodeState.Upsert(_package, image, state, stage, restarts, containerSHA) if changed { if node.skyhook != nil { node.skyhook.Updated = true diff --git a/operator/internal/wrapper/node_test.go b/operator/internal/wrapper/node_test.go index ea5e2bec..6ffde60d 100644 --- a/operator/internal/wrapper/node_test.go +++ b/operator/internal/wrapper/node_test.go @@ -84,7 +84,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[1].Name).To(Equal("b-package")) // Complete b-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "b-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "b-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Should still get a-package since c-package depends on both a and b pkgs, err = skyhookNode.RunNext() @@ -93,7 +93,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[0].Name).To(Equal("a-package")) // Complete a-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "a-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "a-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Now should get c-package since both dependencies are complete pkgs, err = skyhookNode.RunNext() @@ -102,7 +102,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[0].Name).To(Equal("c-package")) // Complete c-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "c-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "c-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Now should get d-package since c-package is complete pkgs, err = skyhookNode.RunNext() @@ -112,7 +112,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[1].Name).To(Equal("e-package")) // Complete e-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "e-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "e-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Now should get d-package since c-package and e-package are complete pkgs, err = skyhookNode.RunNext() @@ -121,7 +121,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[0].Name).To(Equal("d-package")) // Complete d-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "d-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "d-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Now should get f-package since both d-package and e-package are complete pkgs, err = skyhookNode.RunNext() @@ -130,7 +130,7 @@ var _ = Describe("SkyhookNode", func() { Expect(pkgs[0].Name).To(Equal("f-package")) // Complete f-package - err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "f-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0) + err = skyhookNode.Upsert(v1alpha1.PackageRef{Name: "f-package", Version: "1.0"}, "image", v1alpha1.StateComplete, v1alpha1.StageConfig, 0, "") Expect(err).NotTo(HaveOccurred()) // Now should get nothing since all packages are complete pkgs, err = skyhookNode.RunNext() diff --git a/operator/internal/wrapper/zz.migration.0.5.0.go b/operator/internal/wrapper/zz.migration.0.5.0.go index 26e615d4..0eedfd7b 100644 --- a/operator/internal/wrapper/zz.migration.0.5.0.go +++ b/operator/internal/wrapper/zz.migration.0.5.0.go @@ -42,7 +42,7 @@ func migrateNodeTo_0_5_0(node *skyhookNode, logger logr.Logger) error { if exists && packageStatus.Version == _package.Version { // upsert to migrate - err := node.Upsert(packageStatusRef, _package.Image, packageStatus.State, packageStatus.Stage, packageStatus.Restarts) + err := node.Upsert(packageStatusRef, _package.Image, packageStatus.State, packageStatus.Stage, packageStatus.Restarts, "") if err != nil { return err }