diff --git a/.github/actions/kwok-test/action.yml b/.github/actions/kwok-test/action.yml
index 02c2f8a3d..76f9f46e5 100644
--- a/.github/actions/kwok-test/action.yml
+++ b/.github/actions/kwok-test/action.yml
@@ -19,6 +19,10 @@ inputs:
   recipe:
     description: 'Recipe name to test'
     required: true
+  deployer:
+    description: 'Deployer to exercise: helm | argocd-oci | argocd-helm-oci | flux-oci'
+    required: false
+    default: 'helm'
   go_version:
     description: 'Go version to install'
     required: true
@@ -108,7 +112,9 @@ runs:
       env:
         KIND_NODE_IMAGE: ${{ inputs.kind_node_image }}
       run: |
-        KWOK_CLUSTER=aicr-kwok-test bash kwok/scripts/run-all-recipes.sh ${{ inputs.recipe }}
+        KWOK_CLUSTER=aicr-kwok-test bash kwok/scripts/run-all-recipes.sh \
+          --deployer ${{ inputs.deployer }} \
+          ${{ inputs.recipe }}
 
     - name: Collect debug artifacts
       if: failure()
@@ -122,6 +128,14 @@ runs:
           kubectl get nodes -o wide > "/tmp/kwok-debug-artifacts/${cluster}-nodes.txt" || true
           kubectl get events --all-namespaces --sort-by='.lastTimestamp' > "/tmp/kwok-debug-artifacts/${cluster}-events.txt" || true
           kubectl get pods --all-namespaces -o wide > "/tmp/kwok-debug-artifacts/${cluster}-pods.txt" || true
+          kubectl get applications -n argocd -o yaml > "/tmp/kwok-debug-artifacts/${cluster}-argo-apps.yaml" 2>/dev/null || true
+          kubectl logs -n argocd deploy/argocd-repo-server --tail=500 > "/tmp/kwok-debug-artifacts/${cluster}-argo-reposerver.log" 2>/dev/null || true
+          kubectl logs -n argocd statefulset/argocd-application-controller --tail=500 > "/tmp/kwok-debug-artifacts/${cluster}-argo-appcontroller.log" 2>/dev/null || true
+          kubectl get ocirepositories,kustomizations,helmreleases -A -o yaml > "/tmp/kwok-debug-artifacts/${cluster}-flux-resources.yaml" 2>/dev/null || true
+          kubectl logs -n flux-system deploy/source-controller --tail=500 > "/tmp/kwok-debug-artifacts/${cluster}-flux-source-controller.log" 2>/dev/null || true
+          kubectl logs -n flux-system deploy/kustomize-controller --tail=500 > "/tmp/kwok-debug-artifacts/${cluster}-flux-kustomize-controller.log" 2>/dev/null || true
+          kubectl logs -n flux-system deploy/helm-controller --tail=500 > "/tmp/kwok-debug-artifacts/${cluster}-flux-helm-controller.log" 2>/dev/null || true
+          kubectl logs -n aicr-registry deploy/registry --tail=200 > "/tmp/kwok-debug-artifacts/${cluster}-registry.log" 2>/dev/null || true
         done
 
     - name: Export Kind logs
@@ -137,7 +151,7 @@ runs:
       if: failure() && inputs.upload_artifacts == 'true'
       uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f  # v6.0.0
       with:
-        name: kwok-debug-${{ inputs.recipe }}-${{ github.run_id }}
+        name: kwok-debug-${{ inputs.recipe }}-${{ inputs.deployer }}-${{ github.run_id }}
         path: |
           /tmp/kwok-debug-artifacts/
           /tmp/kwok-kind-logs/
diff --git a/.github/actions/load-versions/action.yml b/.github/actions/load-versions/action.yml
index 5ca4be25d..eb43e66c2 100644
--- a/.github/actions/load-versions/action.yml
+++ b/.github/actions/load-versions/action.yml
@@ -85,6 +85,12 @@ outputs:
   yq:
     description: 'yq version'
     value: ${{ steps.versions.outputs.yq }}
+  registry_image:
+    description: 'OCI registry image (in-cluster registry for KWOK deployer-matrix CI)'
+    value: ${{ steps.versions.outputs.registry_image }}
+  argocd_chart:
+    description: 'Argo CD Helm chart version (for KWOK deployer-matrix CI)'
+    value: ${{ steps.versions.outputs.argocd_chart }}
   coverage_threshold:
     description: 'Minimum test coverage percentage'
     value: ${{ steps.versions.outputs.coverage_threshold }}
@@ -148,6 +154,8 @@ runs:
         echo "chainsaw_sha256_linux_amd64=$(yq eval '.testing_tools.chainsaw_checksums.linux_amd64' .settings.yaml)" >> $GITHUB_OUTPUT
         echo "chainsaw_sha256_linux_arm64=$(yq eval '.testing_tools.chainsaw_checksums.linux_arm64' .settings.yaml)" >> $GITHUB_OUTPUT
         echo "yq=$(yq eval '.testing_tools.yq' .settings.yaml)" >> $GITHUB_OUTPUT
+        echo "registry_image=$(yq eval '.testing_tools.registry_image' .settings.yaml)" >> $GITHUB_OUTPUT
+        echo "argocd_chart=$(yq eval '.testing_tools.argocd_chart' .settings.yaml)" >> $GITHUB_OUTPUT
 
         # Quality thresholds
         echo "coverage_threshold=$(yq eval '.quality.coverage_threshold' .settings.yaml)" >> $GITHUB_OUTPUT
@@ -188,6 +196,8 @@ runs:
         echo "  chainsaw_sha256_linux_amd64: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}"
         echo "  chainsaw_sha256_linux_arm64: ${{ steps.versions.outputs.chainsaw_sha256_linux_arm64 }}"
         echo "  yq: ${{ steps.versions.outputs.yq }}"
+        echo "  registry_image: ${{ steps.versions.outputs.registry_image }}"
+        echo "  argocd_chart: ${{ steps.versions.outputs.argocd_chart }}"
         echo "  coverage_threshold: ${{ steps.versions.outputs.coverage_threshold }}"
         echo "  lint_timeout: ${{ steps.versions.outputs.lint_timeout }}"
         echo "  test_timeout: ${{ steps.versions.outputs.test_timeout }}"
diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index ee09381e0..f5adffee8 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -224,7 +224,7 @@ jobs:
 
   # ── Tier 1: PR gate — generic overlays (PR + push, skip on schedule) ──
   test-tier1:
-    name: 'Tier 1: ${{ matrix.recipe }}'
+    name: 'Tier 1: ${{ matrix.recipe }} (${{ matrix.deployer }})'
     needs: discover
     if: >-
       github.event_name != 'schedule' &&
@@ -236,6 +236,7 @@ jobs:
       fail-fast: false
       matrix:
         recipe: ${{ fromJSON(needs.discover.outputs.tier1) }}
+        deployer: [helm, argocd-oci, argocd-helm-oci, flux-oci]
     steps:
       - name: Checkout Code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -250,6 +251,7 @@ jobs:
         uses: ./.github/actions/kwok-test
         with:
           recipe: ${{ matrix.recipe }}
+          deployer: ${{ matrix.deployer }}
           go_version: ${{ steps.versions.outputs.go }}
           goreleaser_version: ${{ steps.versions.outputs.goreleaser }}
           kind_version: ${{ steps.versions.outputs.kind }}
@@ -300,7 +302,7 @@ jobs:
   # Per ADR-003: separate concurrency group per SHA so successive merges
   # to main never cancel in-flight Tier 3 runs.
   test-tier3:
-    name: 'Tier 3: ${{ matrix.recipe }}'
+    name: 'Tier 3: ${{ matrix.recipe }} (${{ matrix.deployer }})'
     needs: discover
     concurrency:
       group: kwok-tier3-${{ github.sha }}
@@ -315,6 +317,7 @@ jobs:
       fail-fast: false
       matrix:
         recipe: ${{ fromJSON(needs.discover.outputs.tier3) }}
+        deployer: [helm, argocd-oci, argocd-helm-oci, flux-oci]
     steps:
       - name: Checkout Code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -329,6 +332,7 @@ jobs:
         uses: ./.github/actions/kwok-test
         with:
           recipe: ${{ matrix.recipe }}
+          deployer: ${{ matrix.deployer }}
           go_version: ${{ steps.versions.outputs.go }}
           goreleaser_version: ${{ steps.versions.outputs.goreleaser }}
           kind_version: ${{ steps.versions.outputs.kind }}
diff --git a/.settings.yaml b/.settings.yaml
index 448444cfc..130a04b43 100644
--- a/.settings.yaml
+++ b/.settings.yaml
@@ -85,6 +85,19 @@ testing_tools:
   yq: 'v4.53.2'
   # renovate: datasource=github-releases depName=kubernetes-sigs/karpenter depType=testing_tools
   karpenter: 'v1.12.0'
+  # In-cluster OCI registry for KWOK deployer-matrix CI (issue #843).
+  # renovate: datasource=docker depName=registry depType=testing_tools
+  registry_image: 'registry:2.8.3'
+  # Argo CD Helm chart for KWOK deployer-matrix CI. v9.5.x pins app v3.4.x,
+  # the first stable line with native OCI artifact source (issue #843).
+  # renovate: datasource=helm depName=argo-cd registryUrl=https://argoproj.github.io/argo-helm depType=testing_tools
+  argocd_chart: '9.5.14'
+  # Flux 2 release for KWOK deployer-matrix CI flux-oci lane (issue #843).
+  # install.yaml is downloaded from the GitHub release; source-controller,
+  # kustomize-controller and helm-controller are the only controllers the
+  # bundle consumes (OCIRepository -> Kustomization -> HelmRelease).
+  # renovate: datasource=github-releases depName=fluxcd/flux2 depType=testing_tools
+  flux_version: 'v2.8.7'
 
 # Quality Thresholds
 quality:
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 7b22113a7..d70008d31 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -522,10 +522,13 @@ make kwok-e2e RECIPE=gb200-eks-training # Test single recipe
 
 Recipes with `spec.criteria.service` defined are auto-discovered. KWOK validates scheduling (node selectors, tolerations, resource requests) but not runtime behavior (no container execution or GPU functionality).
 
+For the deployer matrix (argocd / argocd-helm OCI lanes), see [Deployer Matrix Testing](docs/contributor/kwok-testing.md).
+
 | Command | Description |
 |---------|-------------|
 | `make kwok-test-all` | Test all recipes in shared cluster (serial) |
 | `make kwok-e2e RECIPE=<name>` | Full e2e: cluster, nodes, validate |
+| `make kwok-test-deployer RECIPE=<name> DEPLOYER=<name>` | Validate single recipe under a specific deployer (`helm`, `argocd-oci`, `argocd-helm-oci`) |
 | `make kwok-cluster` | Create Kind cluster with KWOK |
 | `make kwok-status` | Show cluster and node status |
 | `make kwok-cluster-delete` | Delete cluster |
diff --git a/Makefile b/Makefile
index c126935ca..ab4f365ed 100644
--- a/Makefile
+++ b/Makefile
@@ -687,6 +687,20 @@ endif
 kwok-test-all: build ## Run all KWOK recipe tests in a shared cluster
 	@bash kwok/scripts/run-all-recipes.sh
 
+.PHONY: kwok-test-deployer
+kwok-test-deployer: build ## Validate scheduling under a specific deployer (RECIPE=… DEPLOYER=helm|argocd-oci|argocd-helm-oci|flux-oci)
+ifndef RECIPE
+	@echo "Error: RECIPE is required"
+	@echo "Usage: make kwok-test-deployer RECIPE=eks-training DEPLOYER=argocd-oci"
+	@exit 1
+endif
+ifndef DEPLOYER
+	@echo "Error: DEPLOYER is required (helm | argocd-oci | argocd-helm-oci | flux-oci)"
+	@exit 1
+endif
+	@echo "Validating $(RECIPE) under deployer=$(DEPLOYER)"
+	bash kwok/scripts/run-all-recipes.sh --deployer $(DEPLOYER) $(RECIPE)
+
 # =============================================================================
 # Talos local test harness
 # =============================================================================
diff --git a/docs/contributor/kwok-testing.md b/docs/contributor/kwok-testing.md
new file mode 100644
index 000000000..196bcd1de
--- /dev/null
+++ b/docs/contributor/kwok-testing.md
@@ -0,0 +1,209 @@
+# KWOK Deployer Matrix Testing
+
+KWOK (Kubernetes WithOut Kubelet) simulates a GPU cluster without real
+hardware so CI can validate scheduling shape — node selectors,
+tolerations, resource requests — for every recipe. The **deployer
+matrix** extends that coverage by re-running the same recipes through
+three additional output adapters (`argocd-oci`, `argocd-helm-oci`,
+`flux-oci`) in addition to the existing `helm` path. This catches
+Argo CD / Flux template regressions and OCI-source compatibility
+breaks that the `helm`-only lane could never see.
+
+For the design rationale and the spike findings that justify the chart
+pin and Repository-secret shape, see
+[ADR-008](https://github.com/NVIDIA/aicr/blob/main/docs/design/008-kwok-deployer-matrix.md).
+For the cluster-level KWOK setup (node profiles, recipe
+auto-discovery), see [kwok/README.md](https://github.com/NVIDIA/aicr/blob/main/kwok/README.md).
+
+## Deployer Coverage Matrix
+
+| Tier | Trigger | Deployers exercised |
+|------|---------|----------------------|
+| Tier 1 — generic overlays | every PR + push | `helm`, `argocd-oci`, `argocd-helm-oci`, `flux-oci` |
+| Tier 2 — diff-aware accelerator overlays | PR only, conditional on changed files | `helm` only |
+| Tier 3 — full overlay set | push to `main` + nightly schedule | `helm`, `argocd-oci`, `argocd-helm-oci`, `flux-oci` |
+
+### What each lane validates
+
+| Lane | Pull artifact | Apply manifests | Reconcile to Ready |
+|---|---|---|---|
+| `helm` | n/a (filesystem) | ✅ (`helm install`) | ✅ pods scheduled |
+| `argocd-oci` | ✅ (repo-server OCI pull) | ✅ (Argo CD sync) | ✅ `Synced+Healthy` (or documented partial-pass states) |
+| `argocd-helm-oci` | ✅ (`helm pull` OCI) | ✅ (wrapper chart install) | ✅ `Synced+Healthy` (or documented partial-pass states) |
+| `flux-oci` | ✅ (source-controller OCI pull) | ✅ (kustomize-controller apply) | ⏳ **deferred** — see [#964](https://github.com/NVIDIA/aicr/issues/964) |
+
+`flux-oci` intentionally stops at "bundle applied + HelmRelease CRs created" because the bundler's flux deployer emits local-chart HelmReleases with `GitRepository` sources pointing at the placeholder URL `https://github.com/YOUR_ORG/YOUR_REPO.git`. Under OCI consumption helm-controller cannot fetch these charts, and Flux's `dependsOn` cascade then stalls every downstream HelmRelease. Issue [#964](https://github.com/NVIDIA/aicr/issues/964) tracks the proper fix (render local-chart manifests at bundle time OR push each local chart as its own OCI helm artifact).
+
+For filesystem (Git-source) round-trip coverage of `argocd` / `flux`, see [#963](https://github.com/NVIDIA/aicr/issues/963).
+
+Tier 2 stays `helm`-only because its job is to verify that an
+accelerator-specific overlay still produces a correct bundle when the
+diff touches that overlay's inputs. The deployer shape is orthogonal
+to that question — re-running the same recipe under Argo CD would only
+re-exercise template rendering, which Tier 1 and Tier 3 already cover
+on the generic overlays.
+
+## Running Locally
+
+`make kwok-test-deployer` is the single entry point that mirrors what
+CI runs per matrix cell. It expects a KWOK cluster to already exist.
+
+```bash
+unset GITLAB_TOKEN
+make build
+make kwok-cluster
+
+# Single recipe + single deployer
+make kwok-test-deployer RECIPE=eks-training DEPLOYER=argocd-oci
+```
+
+Valid `DEPLOYER` values: `helm`, `argocd-oci`, `argocd-helm-oci`,
+`flux-oci`. The target invokes `kwok/scripts/run-all-recipes.sh
+--deployer <name> <recipe>`, which calls `install-infra.sh` once with
+`DEPLOYER` exported (in-cluster `registry:2` always; Argo CD for
+`argocd-*`; Flux 2 controllers for `flux-oci`), then runs
+`validate-scheduling.sh` for the recipe.
+
+### Registry host port
+
+The Kind cluster exposes the in-cluster `registry:2` Service on **host
+port 5500** (`kwok/kind-config.yaml`'s `extraPortMappings`). The
+unconventional choice avoids Apple ControlCenter (AirPlay / Handoff),
+which listens on host port 5000 by default on macOS and would otherwise
+fail `kind create cluster` with a port-bind error. Linux CI runners have
+5500 free as well, so the same default works everywhere.
+
+The in-cluster NodePort (`30500`) and the Service `containerPort`
+(`5000`) are hardcoded and independent of the host-side mapping —
+Argo CD's repo-server reaches the registry via Service DNS
+(`registry.aicr-registry.svc.cluster.local:5000`) regardless.
+
+Override `KWOK_REGISTRY_HOST_PORT` only when running against a
+non-standard cluster topology (e.g., port 5500 is already in use on
+your host). The variable adjusts the reachability probe; you still need
+to update `kwok/kind-config.yaml`'s `hostPort` to match.
+
+## Sweeping All Deployers Locally
+
+`make kwok-test-all` still defaults to `helm` and there is no
+matrix-aware make target — CI does the fan-out via the workflow
+matrix. To reproduce the matrix locally, loop in shell:
+
+```bash
+for d in helm argocd-oci argocd-helm-oci flux-oci; do
+  make kwok-test-deployer RECIPE=eks-training DEPLOYER="$d" || break
+done
+```
+
+For the full recipe set under a single deployer, call the script
+directly:
+
+```bash
+bash kwok/scripts/run-all-recipes.sh --deployer argocd-oci
+```
+
+## Failure Modes and Exit Codes
+
+The three scripts emit distinct exit codes so callers (CI, the Make
+target, local loops) can branch on failure mode rather than parsing
+logs.
+
+| Script | Code | Meaning |
+|--------|------|---------|
+| `install-infra.sh` | 10 | `yq` missing or required `.settings.yaml` field absent |
+| `install-infra.sh` | 20 | Registry Deployment not Ready within 120 s |
+| `install-infra.sh` | 21 | Registry not reachable on host port within 60 s |
+| `install-infra.sh` | 30 | Argo CD Helm install failed |
+| `install-infra.sh` | 31 | `applications.argoproj.io` CRD not Established within 120 s |
+| `install-infra.sh` | 40 | Repository secret apply failed |
+| `install-infra.sh` | 60 | Flux install manifest apply failed |
+| `install-infra.sh` | 61 | Flux controller (source/kustomize/helm) not Ready within 180 s |
+| `install-infra.sh` | 62 | Flux CRDs (OCIRepository/Kustomization/HelmRelease) not Established within 60 s |
+| `validate-scheduling.sh` | 50 | GitOps sync deadline hit (`KWOK_ARGOCD_SYNC_TIMEOUT` for `argocd-*`, `KWOK_FLUX_SYNC_TIMEOUT` for `flux-oci`); only GitOps lanes can return 50 |
+| `run-all-recipes.sh` | 50 | Three consecutive GitOps sync timeouts; tripped the ADR-008 3-strike rule and bailed the whole job |
+
+Exit code 50 from `validate-scheduling.sh` is intentionally distinct
+from generic non-zero exits: a sync-deadline timeout is qualitatively
+different from a bundle-render failure or a scheduling-shape mismatch,
+and the 3-strike rule in `run-all-recipes.sh` only counts code-50
+strikes.
+
+## Tuning the Sync Deadline
+
+Four environment variables shape how long the GitOps lanes wait before
+declaring a sync timeout. The Argo CD pair is independent of the Flux
+pair so the two GitOps lanes can be tuned separately when a recipe has
+deployer-specific reconciliation overhead.
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `KWOK_ARGOCD_SYNC_TIMEOUT` | `300` (seconds) | Total deadline for all child Argo CD Applications to reach `Synced` + `Healthy` |
+| `KWOK_ARGOCD_ROOT_GRACE` | `30` (seconds) | Grace period for the root Application to appear before deadline accounting starts |
+| `KWOK_FLUX_SYNC_TIMEOUT` | `300` (seconds) | Total deadline for the outer `OCIRepository` to fetch + `Kustomization` to apply + `HelmRelease` CRs to be created. `flux-oci` does NOT wait for `HelmRelease` reconciliation to terminal state — see [issue #964](https://github.com/NVIDIA/aicr/issues/964) |
+| `KWOK_FLUX_ROOT_GRACE` | `30` (seconds) | Grace period for the outer Kustomization to appear before deadline accounting starts |
+
+On a clean local Kind cluster the Phase-0 spike observed
+Synced+Healthy in roughly 30 seconds. CI runners are slower and
+contended, so the 300-second default exists to absorb that variance.
+If a local run trips code 50 but the cluster is otherwise healthy,
+raise `KWOK_ARGOCD_SYNC_TIMEOUT` / `KWOK_FLUX_SYNC_TIMEOUT` before
+assuming the recipe is broken — image pulls on a cold cluster are the
+most common cause.
+
+## Debugging CI Failures
+
+When the `kwok-test` composite action fails, it uploads an artifact
+named `kwok-debug-<recipe>-<deployer>-<run_id>` containing:
+
+- `<cluster>-resources.txt` — `kubectl get all --all-namespaces`
+- `<cluster>-nodes.txt` — `kubectl get nodes -o wide`
+- `<cluster>-events.txt` — events sorted by `lastTimestamp`
+- `<cluster>-pods.txt` — pod listing across all namespaces
+- `<cluster>-argo-apps.yaml` — `applications.argoproj.io` YAML in `argocd` namespace (argocd lanes only)
+- `<cluster>-argo-reposerver.log` — last 500 lines of `argocd-repo-server`
+- `<cluster>-argo-appcontroller.log` — last 500 lines of `argocd-application-controller`
+- `<cluster>-flux-resources.yaml` — YAML for `ocirepositories`, `kustomizations`, and `helmreleases` across all namespaces (flux-oci lane only)
+- `<cluster>-flux-source-controller.log` — last 500 lines of `source-controller`
+- `<cluster>-flux-kustomize-controller.log` — last 500 lines of `kustomize-controller`
+- `<cluster>-flux-helm-controller.log` — last 500 lines of `helm-controller`
+- `<cluster>-registry.log` — last 200 lines of the in-cluster `registry:2` Deployment
+
+The repo-server log (Argo CD) and source-controller log (Flux) are
+the first places to look for OCI-pull failures (plain-HTTP scheme
+errors, mediaType mismatches). The application-controller (Argo CD)
+and kustomize-controller (Flux) logs show reconciliation decisions
+and prune behavior; helm-controller logs surface per-`HelmRelease`
+install and upgrade outcomes.
+
+## Adding a New Deployer Value
+
+The deployer set is intentionally finite and matches what
+`pkg/bundler` emits. To add a new value (say, `argocd-git`):
+
+1. Add a `case` branch in `kwok/scripts/validate-scheduling.sh`'s
+   `resolve_argocd_root_app()` (or the equivalent `resolve_flux_root_names()`
+   if the new lane reconciles via Flux) mapping the new deployer to its
+   root-resource name, plus any new branches in `generate_bundle` and
+   `deploy_bundle`. Reuse the existing `argocd-oci` / `flux-oci` branches
+   as templates.
+2. Extend the `DEPLOYER` allowlist in `kwok/scripts/run-all-recipes.sh`
+   (the `case "$DEPLOYER" in` block in `main()`).
+3. Extend the `case "${DEPLOYER}"` branches in
+   `kwok/scripts/install-infra.sh`'s `main()` so the right controller
+   stack is installed for the new deployer.
+4. Extend the `deployer:` input description in
+   `.github/actions/kwok-test/action.yml` so callers see the new
+   value in the input docs.
+5. Add the value to the `deployer:` matrix in Tier 1 and Tier 3 of
+   `.github/workflows/kwok-recipes.yaml`. Leave Tier 2 alone — the
+   orthogonality rationale above still applies.
+6. Add a row to the [Deployer Coverage Matrix](#deployer-coverage-matrix)
+   table on this page so contributors can discover the new lane.
+
+If the new value requires changes to the in-cluster infra (a different
+registry, a different Argo CD chart, additional CRDs), update
+`install-infra.sh` and pin any new versions in `.settings.yaml` rather
+than hardcoding. The exit-code taxonomy in
+[Failure Modes and Exit Codes](#failure-modes-and-exit-codes) is
+contiguous — pick the next free code if a new distinct failure mode
+appears.
diff --git a/docs/design/008-kwok-deployer-matrix.md b/docs/design/008-kwok-deployer-matrix.md
index e4ee0fa96..f1e31d2c7 100644
--- a/docs/design/008-kwok-deployer-matrix.md
+++ b/docs/design/008-kwok-deployer-matrix.md
@@ -57,23 +57,37 @@ round-trip.
 ## Decision
 
 Add a `deployer` dimension to the existing `kwok-recipes.yaml` matrix
-with values `{helm, argocd-oci, argocd-helm-oci}`. Per matrix cell,
-boot a shared in-cluster OCI registry and Argo CD; push the bundle
-via OCI; observe Argo CD reconciliation to `Synced+Healthy`; then
-reuse the existing pod-Running verification.
+with values `{helm, argocd-oci, argocd-helm-oci, flux-oci}`. Per matrix
+cell, boot a shared in-cluster OCI registry plus the GitOps controllers
+needed by the selected deployer (Argo CD for `argocd-*`, Flux 2 for
+`flux-oci`); push the bundle via OCI; observe reconciliation to a
+controller-specific terminal-pass state; then reuse the existing
+pod-Running verification.
 
 | Tier | Trigger | Deployer values |
 |---|---|---|
-| Tier 1 — generic overlays | every PR + push | `{helm, argocd-oci, argocd-helm-oci}` |
+| Tier 1 — generic overlays | every PR + push | `{helm, argocd-oci, argocd-helm-oci, flux-oci}` |
 | Tier 2 — diff-aware accelerator overlays | PR only, conditional | `helm` only (unchanged) |
-| Tier 3 — full overlay set | push to main + nightly | `{helm, argocd-oci, argocd-helm-oci}` |
-
-**Rationale for tier scope.** Argo CD template regressions surface on
-generic overlays (Tier 1) — that is where the issue's bug class
-lives. Full accelerator-specific Argo CD coverage runs nightly and on
-push to main without inflating PR latency. Tier 2 stays helm-only
-because its purpose is diff-aware accelerator-config validation;
-Argo CD shape is orthogonal.
+| Tier 3 — full overlay set | push to main + nightly | `{helm, argocd-oci, argocd-helm-oci, flux-oci}` |
+
+**Rationale for tier scope.** Argo CD / Flux template regressions
+surface on generic overlays (Tier 1) — that is where the issue's bug
+class lives. Full accelerator-specific GitOps coverage runs nightly
+and on push to main without inflating PR latency. Tier 2 stays
+helm-only because its purpose is diff-aware accelerator-config
+validation; GitOps shape is orthogonal.
+
+**Rationale for adding `flux-oci`.** AICR ships a `flux` deployer
+alongside `argocd` / `argocd-helm`. Without an end-to-end CI lane,
+Flux-side bundle-format regressions ship to `main` undetected (same
+class of bug `argocd-oci` was added to catch). Flux's source-controller
+can consume the AICR generic OCI artifact via an outer `OCIRepository`
++ `Kustomization` wrapper (`spec.layerSelector.mediaType:
+application/vnd.oci.image.layer.v1.tar+gzip`, `operation: extract`);
+the inner `HelmRelease` resources then reconcile their per-component
+charts via helm-controller. The lane validates that bundle shape, not
+chart reconciliation completeness under KWOK fidelity gaps (see the
+terminal-pass framing below).
 
 **Rationale for OCI over Git source.** The user-confirmed production
 path is OCI. A local Gitea / dumb-HTTP git server would test the
@@ -111,9 +125,9 @@ One Kind cluster per matrix cell. Inside each cluster:
 │           │             │   (KWOK fake pods → Running)    │        │
 │           │             └─────────────────────────────────┘        │
 └───────────┼───────────────────────────────────────────────────────┘
-            │ kind extraPortMappings: host 5000 → node 30500
+            │ kind extraPortMappings: host 5500 → node 30500
    ┌────────┴────────┐
-   │   CI runner     │  aicr bundle --output oci://localhost:5000/...
+   │   CI runner     │  aicr bundle --output oci://localhost:5500/...
    └─────────────────┘
 ```
 
@@ -165,24 +179,45 @@ Per `(recipe, deployer)` matrix cell:
         └── if deployer in {argocd-oci, argocd-helm-oci}:
               aicr bundle --recipe ... \
                           --deployer <d> \
-                          --output oci://localhost:5000/aicr/<recipe>:<sha>-<deployer>
+                          --output oci://localhost:5500/aicr/<recipe>:<sha>-<deployer>
               kubectl apply -f bundle/app-of-apps.yaml          (argocd)
               # or: helm install <release> oci://...             (argocd-helm)
-              wait_for_argocd_sync                              (new helper)
+              wait_for_argocd_sync                              (helper)
+              verify_pods                                       (existing)
+
+        └── if deployer == flux-oci:
+              aicr bundle --recipe ... \
+                          --deployer flux \
+                          --output oci://localhost:5500/aicr/<recipe>:<sha>-flux-oci
+              kubectl apply -f -    # OCIRepository (layerSelector +
+                                    # spec.insecure for plain-HTTP)
+              kubectl apply -f -    # Kustomization (sourceRef -> OCIRepository,
+                                    # path: ./, wait: false)
+              wait_for_flux_sync                                (helper)
               verify_pods                                       (existing)
 
 3. Per-recipe teardown
-   └── cleanup_between_tests           [argocd/registry survive]
+   └── cleanup_between_tests           [argocd/flux-system/registry survive]
 ```
 
-**`wait_for_argocd_sync` (new helper)** polls
+**`wait_for_argocd_sync` (helper)** polls
 `kubectl get application -n argocd -o json` until every `Application`
 has `status.sync.status=Synced` and `status.health.status=Healthy`
 (or `Progressing` after a bounded retry window — KWOK pods may take
 a few stage-fast cycles to settle). On failure, dumps specific Argo
 CD conditions for actionable logs.
 
-**OCI tag convention:** `oci://localhost:5000/aicr/<recipe>:<short-sha>-<deployer>`
+**`wait_for_flux_sync` (helper)** polls the outer `Kustomization`
+status until `Ready=True`, then polls `kubectl get helmrelease -A -o
+json` until every `HelmRelease` reaches a terminal pass state:
+either `Ready=True` (canonical pass) or `Released=True` AND
+`status.history[0].status=="deployed"` AND `Stalled!=True` ("bundle
+applied; KWOK can't drive readiness probes" pass — analog of Argo CD's
+`Synced + Progressing` arm). Reuses the EXIT_ARGOCD_SYNC_TIMEOUT (50)
+exit code so `run-all-recipes.sh`'s 3-strike rule fires symmetrically
+across both GitOps lanes.
+
+**OCI tag convention:** `oci://localhost:5500/aicr/<recipe>:<short-sha>-<deployer>`
 avoids stale-tag confusion when running the same recipe across
 deployer values back-to-back in local development.
 
@@ -193,12 +228,12 @@ deployer values back-to-back in local development.
 | File | Change |
 |---|---|
 | `.settings.yaml` | Pin `argocd_chart` and `registry_image` under `versions:` |
-| `kwok/kind-config.yaml` | Add `extraPortMappings: {containerPort: 30500, hostPort: 5000}` |
+| `kwok/kind-config.yaml` | Add `extraPortMappings: {containerPort: 30500, hostPort: 5500}` (avoids macOS ControlCenter port-5000 collision) |
 | `kwok/scripts/install-infra.sh` (new) | Idempotent install of registry + Argo CD + repo-creds Secret |
 | `kwok/scripts/validate-scheduling.sh` | Add `--deployer <name>` flag; branch on deployer |
 | `kwok/scripts/run-all-recipes.sh` | Accept deployer arg; allowlist `aicr-registry` / `argocd` in cleanup |
 | `.github/actions/kwok-test/action.yml` | New input `deployer`; pass through to `run-all-recipes.sh` |
-| `.github/workflows/kwok-recipes.yaml` | Add `deployer: [helm, argocd-oci, argocd-helm-oci]` to Tier 1 and Tier 3 matrices |
+| `.github/workflows/kwok-recipes.yaml` | Add `deployer: [helm, argocd-oci, argocd-helm-oci, flux-oci]` to Tier 1 and Tier 3 matrices |
 | `Makefile` | New `kwok-test-deployer RECIPE=... DEPLOYER=...` target |
 | `docs/contributor/<area>.md` (KWOK testing page) | Document the matrix and local repro |
 
@@ -220,7 +255,7 @@ deployer values back-to-back in local development.
 
 | # | Failure | Detection | Remediation |
 |---|---|---|---|
-| 1 | Registry push fails | `aicr bundle --output oci://...` exits non-zero | `wait_for_registry_ready` polls `curl -sf http://localhost:5000/v2/` before any bundle step; dump registry Pod logs |
+| 1 | Registry push fails | `aicr bundle --output oci://...` exits non-zero | `wait_for_registry_ready` polls `curl -sf http://localhost:5500/v2/` before any bundle step; dump registry Pod logs |
 | 2 | Argo CD install / CRD not ready | `kubectl wait --for=condition=Established crd/applications.argoproj.io --timeout=120s` | Bounded wait; fail fast with `kubectl describe deploy -n argocd` |
 | 3 | Argo CD cannot pull from local OCI | `Application.status.conditions[].type=ComparisonError` | `dump_argocd_failures` runs `kubectl get applications -n argocd -o json \| jq '.items[].status.conditions'` + repo-server logs |
 | 4 | Application stuck `OutOfSync` / `Progressing` past deadline | `wait_for_argocd_sync` deadline (default 300 s, override via `KWOK_ARGOCD_SYNC_TIMEOUT`) | Dump each Application's `status.operationState.message` and `status.health.message` |
diff --git a/docs/index.yml b/docs/index.yml
index 45a87e58c..bacc90d46 100644
--- a/docs/index.yml
+++ b/docs/index.yml
@@ -76,3 +76,5 @@ navigation:
         path: contributor/validator.md
       - page: Maintaining Recipe Contributions
         path: contributor/maintaining.md
+      - page: KWOK Deployer Matrix Testing
+        path: contributor/kwok-testing.md
diff --git a/go.mod b/go.mod
index a8cc48df2..310b26720 100644
--- a/go.mod
+++ b/go.mod
@@ -4,6 +4,7 @@ go 1.26.3
 
 require (
 	github.com/CycloneDX/cyclonedx-go v0.11.0
+	github.com/Masterminds/semver/v3 v3.5.0
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/coreos/go-systemd/v22 v22.7.0
 	github.com/distribution/reference v0.6.0
@@ -42,7 +43,6 @@ require (
 	dario.cat/mergo v1.0.2 // indirect
 	github.com/IGLOU-EU/go-wildcard v1.0.3 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.5.0 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
 	github.com/aquilax/truncate v1.0.1 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
diff --git a/kwok/kind-config.yaml b/kwok/kind-config.yaml
index 66d896ab3..e31291d59 100644
--- a/kwok/kind-config.yaml
+++ b/kwok/kind-config.yaml
@@ -16,7 +16,10 @@
 #
 # Features:
 # - DRA (Dynamic Resource Allocation) enabled for GPU scheduling tests
-# - CoreDNS skipped - not needed for scheduling tests
+# - CoreDNS enabled — required by the Argo CD deployer-matrix lanes so
+#   in-cluster pods (argocd-server, repo-server, redis, etc.) can resolve
+#   each other via Service DNS. Costs ~15s extra boot vs the historical
+#   skip; trivial for KWOK CI. Issue #843.
 # - Control-plane tainted to prevent test workloads from scheduling there
 
 kind: Cluster
@@ -27,11 +30,27 @@ runtimeConfig:
   "resource.k8s.io/v1beta1": "true"
 nodes:
   - role: control-plane
+    # Host port 5500 → NodePort 30500 exposes the in-cluster OCI registry
+    # (registry.aicr-registry.svc) to the runner for `aicr bundle --output oci://...`
+    # pushes during the KWOK deployer-matrix lanes. Plain HTTP — gated by
+    # `--plain-http` on the bundler side and `insecureOCIForceHttp` on the
+    # Argo CD repo secret.
+    #
+    # Why 5500 and not 5000: on macOS, Apple ControlCenter (AirPlay /
+    # Handoff) listens on host port 5000 by default, and `kind create
+    # cluster` then fails to bind the extraPortMapping. Linux CI
+    # runners have port 5500 free as well, so the unconventional
+    # choice costs nothing there. The Service `containerPort: 5000`
+    # and NodePort `30500` are unchanged — Argo CD's repo-server
+    # reaches the registry in-cluster via Service DNS on port 5000
+    # regardless of the host-side mapping. Issue #843.
+    extraPortMappings:
+      - containerPort: 30500
+        hostPort: 5500
+        protocol: TCP
     kubeadmConfigPatches:
       - |
         kind: InitConfiguration
-        skipPhases:
-          - addon/coredns
         nodeRegistration:
           ignorePreflightErrors:
             - SystemVerification
diff --git a/kwok/scripts/install-infra.sh b/kwok/scripts/install-infra.sh
new file mode 100755
index 000000000..8c4cc33ed
--- /dev/null
+++ b/kwok/scripts/install-infra.sh
@@ -0,0 +1,544 @@
+#!/usr/bin/env bash
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# install-infra.sh - Install in-cluster OCI registry + the controllers the
+# selected DEPLOYER needs (Argo CD or Flux) for the KWOK deployer-matrix CI
+# lane (issue #843).
+#
+# Idempotent: re-running is a no-op when state already matches. All versions
+# are sourced from .settings.yaml (`testing_tools.registry_image`,
+# `testing_tools.argocd_chart`, `testing_tools.flux_version`) — never hardcoded.
+#
+# Components installed (controller installs branch on $DEPLOYER):
+#   1. ALWAYS: `registry:2` Deployment + NodePort Service in the
+#      `aicr-registry` namespace. Service is exposed on nodePort 30500;
+#      kwok/kind-config.yaml maps host port 5500 -> nodePort 30500 so
+#      `aicr bundle --output oci://localhost:5500/...` works from the
+#      runner. Host port 5500 is used (not the more conventional 5000)
+#      because Apple ControlCenter holds 5000 on macOS by default; Linux
+#      CI runners are unaffected.
+#   2. DEPLOYER=argocd-*: Argo CD via Helm, chart pinned to .settings.yaml.
+#      Server runs with `server.insecure=true` (no TLS termination needed
+#      inside the cluster). RepoCreds template secret
+#      `aicr-oci-repo-creds` with `insecureOCIForceHttp: "true"` so Argo
+#      CD's repo-server pulls the plain-HTTP in-cluster registry over
+#      HTTP. Phase 0 spike confirmed `repo-creds` shape does NOT propagate
+#      this field for OCI repos — see docs/plans/2026-05-18-kwok-deployer-
+#      matrix.md (F4).
+#   3. DEPLOYER=flux-oci: Flux 2 install manifest applied from the pinned
+#      GitHub release; we only need source-controller (OCIRepository),
+#      kustomize-controller (Kustomization wrapper) and helm-controller
+#      (HelmRelease per AICR component). notification-controller and
+#      image-reflector-controller are tolerated but unused. Flux does NOT
+#      require a separate repo-creds-style secret for the in-cluster
+#      registry: per-OCIRepository `spec.insecure: true` toggles plain-HTTP
+#      at the source level.
+#
+# Usage:
+#   DEPLOYER=helm|argocd-oci|argocd-helm-oci|flux-oci ./install-infra.sh
+#   (DEPLOYER defaults to "helm"; under helm only the registry is installed,
+#    which is harmless if unused.)
+#
+# Environment variables:
+#   KUBECTL_CONTEXT          (optional) - kubectl context to use; defaults to
+#                                         the current context if unset.
+#   KWOK_REGISTRY_HOST_PORT  (optional, default 5500) - Host port the script
+#                                         probes when verifying the registry is
+#                                         reachable. MUST match the
+#                                         `extraPortMappings[*].hostPort` in
+#                                         the Kind cluster config — this script
+#                                         does not edit Kind config, so the
+#                                         variable only adjusts the local
+#                                         reachability check, not the actual
+#                                         port mapping. The default matches
+#                                         `kwok/kind-config.yaml`; override
+#                                         only when running against a
+#                                         non-standard cluster topology. The
+#                                         in-cluster NodePort (30500) and the
+#                                         Service containerPort (5000) are
+#                                         hardcoded and not affected by this
+#                                         variable.
+#
+# Environment variables:
+#   DEPLOYER                 (optional, default "helm") - Selects which
+#                                         controller(s) to install on top of
+#                                         the registry. Recognized values:
+#                                           helm             - registry only
+#                                           argocd-oci       - registry + Argo CD
+#                                           argocd-helm-oci  - registry + Argo CD
+#                                           flux-oci         - registry + Flux 2
+#                                         Unknown values fall back to the
+#                                         registry-only path; the caller is
+#                                         expected to validate first.
+#
+# Exit codes (distinct so callers can branch on failure mode):
+#    0  success
+#   10  `yq` missing or required .settings.yaml field absent
+#   20  registry Deployment not Ready in 120s
+#   21  registry not reachable on host port within 60s
+#   30  Argo CD Helm install failed
+#   31  `applications.argoproj.io` CRD not Established in 120s
+#   40  Repository secret apply failed
+#   60  Flux install manifest apply failed
+#   61  Flux controller (source/kustomize/helm) not Ready in 180s
+#   62  Flux CRDs (OCIRepository/Kustomization/HelmRelease) not Established in 60s
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+KWOK_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+REPO_ROOT="$(cd "${KWOK_DIR}/.." && pwd)"
+SETTINGS_FILE="${REPO_ROOT}/.settings.yaml"
+
+# Colors for output (match validate-scheduling.sh / install-karpenter-kwok.sh)
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+log_info()  { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn()  { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_debug() { echo -e "${BLUE}[DEBUG]${NC} $*"; }
+
+# Configuration
+REGISTRY_NAMESPACE="aicr-registry"
+REGISTRY_NAME="registry"
+REGISTRY_PORT=5000
+REGISTRY_NODEPORT=30500
+REGISTRY_HOST_PORT="${KWOK_REGISTRY_HOST_PORT:-5500}"
+
+ARGOCD_NAMESPACE="argocd"
+ARGOCD_RELEASE="argocd"
+ARGOCD_REPO_NAME="argo"
+ARGOCD_REPO_URL="https://argoproj.github.io/argo-helm"
+ARGOCD_HELM_TIMEOUT="5m"
+
+# Flux 2 configuration. install.yaml is fetched from the pinned GitHub
+# release; only source/kustomize/helm controllers are required by the
+# flux-oci lane, but the upstream manifest also installs notification-
+# controller and image-reflector-controller — they're tolerated.
+FLUX_NAMESPACE="flux-system"
+FLUX_CONTROLLERS=(source-controller kustomize-controller helm-controller)
+FLUX_REQUIRED_CRDS=(
+    ocirepositories.source.toolkit.fluxcd.io
+    kustomizations.kustomize.toolkit.fluxcd.io
+    helmreleases.helm.toolkit.fluxcd.io
+)
+
+# Selected deployer (env var). Branches controller install in main().
+DEPLOYER="${DEPLOYER:-helm}"
+
+# Wrapper that honors KUBECTL_CONTEXT when set; otherwise uses current context.
+kc() {
+    if [[ -n "${KUBECTL_CONTEXT:-}" ]]; then
+        kubectl --context="${KUBECTL_CONTEXT}" "$@"
+    else
+        kubectl "$@"
+    fi
+}
+
+hc() {
+    if [[ -n "${KUBECTL_CONTEXT:-}" ]]; then
+        helm --kube-context "${KUBECTL_CONTEXT}" "$@"
+    else
+        helm "$@"
+    fi
+}
+
+# Read a required field from .settings.yaml via yq. Exit 10 if yq is missing
+# or the field is empty / null / absent.
+read_setting() {
+    local field="$1"
+    if ! command -v yq &>/dev/null; then
+        log_error "yq is required but not installed"
+        log_error "Missing field: ${field}"
+        exit 10
+    fi
+    local value
+    value=$(yq eval "${field} // \"\"" "${SETTINGS_FILE}" 2>/dev/null || echo "")
+    if [[ -z "${value}" || "${value}" == "null" ]]; then
+        log_error "Required field ${field} is missing or empty in ${SETTINGS_FILE}"
+        exit 10
+    fi
+    echo "${value}"
+}
+
+# Diagnostic dump for registry failures.
+dump_registry_diagnostics() {
+    log_error "--- registry diagnostics ---"
+    kc describe deployment -n "${REGISTRY_NAMESPACE}" "${REGISTRY_NAME}" 2>&1 || true
+    log_error "--- registry pod logs (tail=200) ---"
+    kc logs -n "${REGISTRY_NAMESPACE}" "deploy/${REGISTRY_NAME}" --tail=200 2>&1 || true
+}
+
+# Diagnostic dump for Argo CD failures.
+dump_argocd_diagnostics() {
+    log_error "--- argocd deployments ---"
+    kc describe deployment -n "${ARGOCD_NAMESPACE}" 2>&1 || true
+    log_error "--- argocd events (last 20) ---"
+    kc get events -n "${ARGOCD_NAMESPACE}" --sort-by=.lastTimestamp 2>&1 | tail -20 || true
+}
+
+# Diagnostic dump for Flux controller failures.
+dump_flux_diagnostics() {
+    log_error "--- flux-system deployments ---"
+    kc describe deployment -n "${FLUX_NAMESPACE}" 2>&1 || true
+    log_error "--- flux-system pods ---"
+    kc get pods -n "${FLUX_NAMESPACE}" -o wide 2>&1 || true
+    log_error "--- flux-system events (last 20) ---"
+    kc get events -n "${FLUX_NAMESPACE}" --sort-by=.lastTimestamp 2>&1 | tail -20 || true
+    log_error "--- flux CRDs ---"
+    kc get crd -l app.kubernetes.io/instance=flux-system 2>&1 || true
+}
+
+# -------------------------------------------------------------------
+# Step 1: Install in-cluster registry:2
+# -------------------------------------------------------------------
+install_registry() {
+    local image="$1"
+
+    log_info "Installing in-cluster OCI registry (${image}) in namespace ${REGISTRY_NAMESPACE}..."
+
+    # Namespace — apply, not create, so re-runs upsert cleanly.
+    kc apply -f - <<EOF
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ${REGISTRY_NAMESPACE}
+  labels:
+    app.kubernetes.io/name: aicr-registry
+    app.kubernetes.io/managed-by: install-infra.sh
+EOF
+
+    # Deployment + NodePort Service.
+    kc apply -f - <<EOF
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${REGISTRY_NAME}
+  namespace: ${REGISTRY_NAMESPACE}
+  labels:
+    app.kubernetes.io/name: ${REGISTRY_NAME}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: ${REGISTRY_NAME}
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: ${REGISTRY_NAME}
+    spec:
+      containers:
+        - name: ${REGISTRY_NAME}
+          image: ${image}
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: ${REGISTRY_PORT}
+              protocol: TCP
+          env:
+            - name: REGISTRY_HTTP_ADDR
+              value: "0.0.0.0:${REGISTRY_PORT}"
+          readinessProbe:
+            httpGet:
+              path: /v2/
+              port: http
+            initialDelaySeconds: 2
+            periodSeconds: 5
+          livenessProbe:
+            httpGet:
+              path: /v2/
+              port: http
+            initialDelaySeconds: 10
+            periodSeconds: 10
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ${REGISTRY_NAME}
+  namespace: ${REGISTRY_NAMESPACE}
+  labels:
+    app.kubernetes.io/name: ${REGISTRY_NAME}
+spec:
+  type: NodePort
+  selector:
+    app.kubernetes.io/name: ${REGISTRY_NAME}
+  ports:
+    - name: http
+      port: ${REGISTRY_PORT}
+      targetPort: ${REGISTRY_PORT}
+      nodePort: ${REGISTRY_NODEPORT}
+      protocol: TCP
+EOF
+
+    log_info "Waiting for registry Deployment to become Ready (timeout 120s)..."
+    if ! kc rollout status deployment/"${REGISTRY_NAME}" \
+            -n "${REGISTRY_NAMESPACE}" --timeout=120s; then
+        log_error "Registry Deployment did not become Ready within 120s"
+        dump_registry_diagnostics
+        exit 20
+    fi
+
+    # Probe the host port that Kind maps to NodePort 30500. KWOK_REGISTRY_HOST_PORT
+    # only changes which port this probe waits on — it does NOT change Kind's
+    # port mapping (that's hardcoded in kwok/kind-config.yaml). If you override
+    # this variable, you must also have created the Kind cluster with a matching
+    # `extraPortMappings[*].hostPort`, otherwise this probe will hang for 60s.
+    log_info "Waiting for http://localhost:${REGISTRY_HOST_PORT}/v2/ to respond (timeout 60s)..."
+    local waited=0
+    local interval=2
+    while (( waited < 60 )); do
+        if curl -sf -o /dev/null --max-time 3 \
+                "http://localhost:${REGISTRY_HOST_PORT}/v2/"; then
+            log_info "Registry reachable at http://localhost:${REGISTRY_HOST_PORT}/v2/"
+            return 0
+        fi
+        sleep "${interval}"
+        waited=$(( waited + interval ))
+    done
+
+    log_error "Registry not reachable on host port ${REGISTRY_HOST_PORT} within 60s"
+    dump_registry_diagnostics
+    log_error "--- node port-mapping sanity ---"
+    kc get nodes -o wide 2>&1 || true
+    exit 21
+}
+
+# -------------------------------------------------------------------
+# Step 2: Install Argo CD via Helm
+# -------------------------------------------------------------------
+install_argocd() {
+    local chart_version="$1"
+
+    log_info "Installing Argo CD (chart ${chart_version}) in namespace ${ARGOCD_NAMESPACE}..."
+
+    if ! helm repo add "${ARGOCD_REPO_NAME}" "${ARGOCD_REPO_URL}" --force-update >/dev/null 2>&1; then
+        log_warn "helm repo add returned non-zero; continuing (repo may already exist)"
+    fi
+    helm repo update >/dev/null
+
+    # `server.insecure=true` disables TLS on the API server. Suitable for an
+    # in-cluster CI install — not for production. The `\.` escape in the key
+    # is required so Helm preserves the dot in `server.insecure`. Use
+    # `--set-string` (not `--set`) so the value renders as the literal string
+    # "true" in the `argocd-cmd-params-cm` ConfigMap — `configs.params` is a
+    # stringly-typed map and the chart's `tpl` step drops bool-typed values,
+    # which would leave the API server without `--insecure`.
+    if ! hc upgrade --install "${ARGOCD_RELEASE}" "${ARGOCD_REPO_NAME}/argo-cd" \
+            --namespace "${ARGOCD_NAMESPACE}" --create-namespace \
+            --version "${chart_version}" \
+            --set-string 'configs.params.server\.insecure=true' \
+            --wait --timeout "${ARGOCD_HELM_TIMEOUT}"; then
+        log_error "Argo CD Helm install failed"
+        dump_argocd_diagnostics
+        exit 30
+    fi
+
+    log_info "Waiting for applications.argoproj.io CRD to be Established (timeout 120s)..."
+    if ! kc wait --for=condition=Established \
+            crd/applications.argoproj.io --timeout=120s; then
+        log_error "applications.argoproj.io CRD did not reach Established within 120s"
+        kc describe crd applications.argoproj.io 2>&1 || true
+        exit 31
+    fi
+
+    log_info "Argo CD ready"
+}
+
+# -------------------------------------------------------------------
+# Step 3: Apply RepoCreds template for the in-cluster OCI registry.
+#
+# IMPORTANT: secret-type MUST be `repo-creds` (NOT `repository`).
+# Repository-type secrets in Argo CD use EXACT URL match
+# (util/db/repository_secrets.go::getRepositorySecret → git.SameURL).
+# Different recipes produce different repoURLs (oci://…/aicr/<recipe>),
+# so a single Repository secret cannot cover them all. RepoCreds-type
+# secrets use longest-prefix match
+# (util/db/repository_secrets.go::getRepositoryCredentialIndex →
+# strings.HasPrefix), so one template at the …/aicr prefix covers
+# every recipe pushed by validate-scheduling.sh. Argo CD enriches the
+# synthetic Repository for each Application with this template's
+# insecureOCIForceHttp=true, forcing plain HTTP for the in-cluster
+# registry. The Phase 0 spike on chart 7.7.0 (= app v2.13) saw
+# repo-creds + type=helm + enableOCI=true NOT propagate
+# insecureOCIForceHttp; chart 9.5.x (= app v3.x) with type=oci does.
+# -------------------------------------------------------------------
+apply_repo_secret() {
+    log_info "Applying Argo CD repo-creds template aicr-oci-repo-creds (type=oci, insecureOCIForceHttp=true)..."
+
+    # Best-effort cleanup of the older repository-typed secret from
+    # previous install-infra runs so an idempotent re-run converges
+    # cleanly on the prefix-match shape.
+    kc delete secret -n "${ARGOCD_NAMESPACE}" aicr-oci-repo --ignore-not-found >/dev/null 2>&1 || true
+
+    if ! kc apply -f - <<EOF
+apiVersion: v1
+kind: Secret
+metadata:
+  name: aicr-oci-repo-creds
+  namespace: ${ARGOCD_NAMESPACE}
+  labels:
+    argocd.argoproj.io/secret-type: repo-creds
+stringData:
+  type: oci
+  url: oci://registry.${REGISTRY_NAMESPACE}.svc.cluster.local:${REGISTRY_PORT}/aicr
+  insecureOCIForceHttp: "true"
+EOF
+    then
+        log_error "Failed to apply repo-creds template aicr-oci-repo-creds"
+        kc get secret -n "${ARGOCD_NAMESPACE}" aicr-oci-repo-creds -o yaml 2>&1 || true
+        exit 40
+    fi
+
+    log_info "Repo-creds template applied (prefix-matches every oci://…/aicr/<recipe>)"
+}
+
+# -------------------------------------------------------------------
+# Step 4 (flux-oci only): Install Flux 2 controllers via the pinned
+# release's install.yaml.
+#
+# Why install.yaml and not the community Helm chart: install.yaml is the
+# canonical, pre-rendered, version-locked manifest the Flux project ships
+# for every release. No third-party chart drift, no `helm install` step
+# (so no failure mode where Flux installs while helm-controller crashloops
+# searching for its own CRDs). Apply, then wait for the three controllers
+# the bundle pipeline actually uses to roll out.
+#
+# OCIRepository.spec.insecure handles plain-HTTP for the in-cluster
+# registry, so there's no Flux equivalent of apply_repo_secret().
+# -------------------------------------------------------------------
+install_flux() {
+    local version="$1"
+    local install_url="https://github.com/fluxcd/flux2/releases/download/${version}/install.yaml"
+
+    log_info "Installing Flux ${version} from ${install_url}..."
+    # GitHub releases CDN occasionally returns transient 502s on
+    # release-asset fetches (same flake class that motivated the
+    # kwok-stage-fast retry hardening in run-all-recipes.sh). install-
+    # infra.sh runs in its own bash and doesn't import the retry helper,
+    # so retry inline. 5 attempts with doubling backoff (5+10+20+40s
+    # cumulative sleep = 75s) covers the 30-60s typical CDN hiccup
+    # without slowing the happy path.
+    local max_attempts=5
+    local delay=5
+    local attempt=1
+    while true; do
+        if kc apply -f "${install_url}"; then
+            break
+        fi
+        if (( attempt >= max_attempts )); then
+            log_error "Failed to apply Flux install manifest after ${attempt} attempt(s)"
+            dump_flux_diagnostics
+            exit 60
+        fi
+        log_warn "Flux install manifest apply failed (attempt ${attempt}/${max_attempts}); retrying in ${delay}s..."
+        sleep "${delay}"
+        attempt=$(( attempt + 1 ))
+        delay=$(( delay * 2 ))
+    done
+
+    log_info "Waiting for Flux controllers (source/kustomize/helm) to roll out (timeout 180s each)..."
+    local ctrl
+    for ctrl in "${FLUX_CONTROLLERS[@]}"; do
+        if ! kc rollout status deployment/"${ctrl}" \
+                -n "${FLUX_NAMESPACE}" --timeout=180s; then
+            log_error "Flux controller ${ctrl} did not become Ready within 180s"
+            dump_flux_diagnostics
+            exit 61
+        fi
+    done
+
+    log_info "Waiting for Flux CRDs (OCIRepository / Kustomization / HelmRelease) to be Established..."
+    local crd
+    for crd in "${FLUX_REQUIRED_CRDS[@]}"; do
+        if ! kc wait --for=condition=Established \
+                "crd/${crd}" --timeout=60s; then
+            log_error "Flux CRD ${crd} did not reach Established within 60s"
+            kc describe crd "${crd}" 2>&1 || true
+            exit 62
+        fi
+    done
+
+    log_info "Flux ready"
+}
+
+# -------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------
+main() {
+    log_info "=========================================="
+    log_info "AICR KWOK lane infra install (issue #843)"
+    log_info "=========================================="
+
+    if [[ ! -f "${SETTINGS_FILE}" ]]; then
+        log_error "Settings file not found: ${SETTINGS_FILE}"
+        exit 10
+    fi
+
+    local registry_image
+    registry_image=$(read_setting '.testing_tools.registry_image')
+
+    log_info "registry_image:        ${registry_image}"
+    log_info "DEPLOYER:              ${DEPLOYER}"
+    log_info "registry host port:    ${REGISTRY_HOST_PORT}"
+    if [[ -n "${KUBECTL_CONTEXT:-}" ]]; then
+        log_info "kubectl context:       ${KUBECTL_CONTEXT}"
+    else
+        local current_ctx
+        current_ctx=$(kubectl config current-context 2>/dev/null || echo "<none>")
+        log_info "kubectl context:       ${current_ctx} (current)"
+    fi
+
+    log_debug "Step 1: install in-cluster registry"
+    install_registry "${registry_image}"
+
+    # Controller install branches on DEPLOYER. Unknown / "helm" values stop
+    # after the registry — they don't need a GitOps controller. This avoids
+    # the ~30-90s of unconditional Argo CD install the prior version paid
+    # on the helm lane.
+    case "${DEPLOYER}" in
+        argocd-oci|argocd-helm-oci)
+            local argocd_chart
+            argocd_chart=$(read_setting '.testing_tools.argocd_chart')
+            log_info "argocd_chart:          ${argocd_chart}"
+            log_debug "Step 2 (argocd): install Argo CD"
+            install_argocd "${argocd_chart}"
+            log_debug "Step 3 (argocd): apply repository secret"
+            apply_repo_secret
+            ;;
+        flux-oci)
+            local flux_version
+            flux_version=$(read_setting '.testing_tools.flux_version')
+            log_info "flux_version:          ${flux_version}"
+            log_debug "Step 2 (flux): install Flux 2"
+            install_flux "${flux_version}"
+            ;;
+        helm|"")
+            log_info "DEPLOYER=${DEPLOYER:-helm}: skipping GitOps controller install"
+            ;;
+        *)
+            log_warn "Unknown DEPLOYER=${DEPLOYER}; installing registry only"
+            ;;
+    esac
+
+    log_info "=========================================="
+    log_info "Infra install complete"
+    log_info "=========================================="
+}
+
+main "$@"
diff --git a/kwok/scripts/lib/cleanup.sh b/kwok/scripts/lib/cleanup.sh
new file mode 100644
index 000000000..14bf97854
--- /dev/null
+++ b/kwok/scripts/lib/cleanup.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+# shellcheck shell=bash
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Shared cleanup helpers for the KWOK deployer-matrix CI lane.
+#
+# Sourced by validate-scheduling.sh and run-all-recipes.sh so the
+# system-namespace allowlist (the only thing standing between the
+# force-finalize sweep and the install-infra-owned ns argocd /
+# flux-system / aicr-registry) and the KWOK-context safety guard
+# live in exactly one place. Drift in either across scripts has
+# silently broken the test infra before — keep it centralized.
+#
+# Source guard: this file uses constants and functions only, no
+# side effects at source time.
+
+# SYSTEM_NS_PATTERN matches the namespaces that MUST NOT be touched by
+# the test cleanup paths:
+#   - default, kube-*, local-path-storage, kwok-system: cluster
+#     control-plane and CNI / storage plumbing. Wiping any of them
+#     bricks the cluster.
+#   - argocd, flux-system, aicr-registry: owned by install-infra.sh,
+#     reused across recipes. Wiping any of them forces a full re-
+#     install (~3-5 min) on every subsequent recipe.
+#
+# Used in `grep -vE "^(${SYSTEM_NS_PATTERN})$"` and
+# `grep -vE "^(${SYSTEM_NS_PATTERN})\s"` call sites. Anchors are NOT
+# baked in here so callers can choose space-anchor vs eol-anchor
+# depending on the kubectl output they're filtering.
+# shellcheck disable=SC2034  # consumed by sourcing scripts
+readonly SYSTEM_NS_PATTERN="default|kube-node-lease|kube-public|kube-system|kwok-system|local-path-storage|argocd|aicr-registry|flux-system"
+
+# ensure_kwok_context_loose checks that the kubectl context is a known
+# KWOK Kind cluster name. Cheap check, no node lookup. Use this from
+# cold-start paths where kwok nodes may not exist yet (e.g. run-all-
+# recipes.sh's initial cleanup before any recipe has applied nodes).
+#
+# Hard-fail on mismatch — the cleanup paths the caller is about to run
+# force-finalize namespaces, and we'd rather refuse to act than risk
+# wiping a real cluster.
+ensure_kwok_context_loose() {
+    local ctx
+    ctx=$(kubectl config current-context 2>/dev/null || true)
+    case "$ctx" in
+        kind-aicr-kwok|kind-aicr-kwok-test)
+            return 0
+            ;;
+        *)
+            echo "[ERROR] ensure_kwok_context: refusing to run — current kubectl context '${ctx:-<none>}' is not a known KWOK Kind cluster (expected kind-aicr-kwok or kind-aicr-kwok-test)" >&2
+            echo "[ERROR] If you intended to run against a different cluster, double-check KUBECONFIG and the cluster's purpose — this script force-finalizes namespaces" >&2
+            exit 1
+            ;;
+    esac
+}
+
+# ensure_kwok_context strict-checks BOTH the context name AND that at
+# least one node carries `type=kwok` label. Use this from in-recipe
+# paths where apply-nodes.sh has already populated the cluster. The
+# node-label check is load-bearing: a developer could `kind create
+# cluster --name aicr-kwok-test` and then point at a different cluster
+# whose context name happens to be the same — only the kwok-typed
+# nodes prove apply-nodes.sh ran against THIS cluster.
+ensure_kwok_context() {
+    ensure_kwok_context_loose
+
+    if ! kubectl get nodes -l type=kwok -o name 2>/dev/null | grep -q . ; then
+        echo "[ERROR] ensure_kwok_context: refusing to run — current context has no kwok-typed nodes" >&2
+        echo "[ERROR] Run kwok/scripts/apply-nodes.sh <recipe> first, or verify you're against the right cluster" >&2
+        exit 1
+    fi
+}
diff --git a/kwok/scripts/run-all-recipes.sh b/kwok/scripts/run-all-recipes.sh
index b2c43fd31..753768270 100755
--- a/kwok/scripts/run-all-recipes.sh
+++ b/kwok/scripts/run-all-recipes.sh
@@ -15,16 +15,58 @@
 
 # Run all KWOK recipe tests sequentially in a shared cluster.
 # Usage:
-#   ./run-all-recipes.sh           # Run all testable recipes
-#   ./run-all-recipes.sh recipe1   # Run specific recipe(s)
+#   ./run-all-recipes.sh                           # Run all testable recipes (helm)
+#   ./run-all-recipes.sh recipe1                   # Run specific recipe(s)
+#   ./run-all-recipes.sh --deployer <name>         # Select deployer for all recipes
+#   ./run-all-recipes.sh --deployer=<name> recipe1 # `=` form supported
+#
+# Flags:
+#   --deployer <name>   Deployer to exercise for every recipe. One of:
+#                         helm             (default — original Helm path)
+#                         argocd-oci       (in-cluster registry + Argo CD
+#                                           app-of-apps)
+#                         argocd-helm-oci  (in-cluster registry + Argo CD
+#                                           via OCI Helm chart wrapper)
+#                         flux-oci         (in-cluster registry + Flux 2
+#                                           OCIRepository → Kustomization →
+#                                           HelmRelease)
+#                       When != helm, install-infra.sh is run ONCE before the
+#                       recipe loop with DEPLOYER exported so it installs the
+#                       shared in-cluster OCI registry plus the GitOps
+#                       controllers the selected deployer needs (Argo CD or
+#                       Flux). Their owning namespaces (`aicr-registry`,
+#                       `argocd`, `flux-system`) survive across recipe
+#                       iterations.
+#
+# Exit codes:
+#    0  all recipes passed
+#    1  one or more recipes failed (non-sync-timeout); install-infra.sh failed;
+#       or invalid arguments
+#   50  Argo CD sync deadline hit on 3 consecutive recipes (3-strike rule,
+#       ADR-008 §"Error Handling and Failure Modes"). Distinct so CI can
+#       distinguish infra/controller-wide failure from per-recipe issues.
+#       Mirrors validate-scheduling.sh's EXIT_ARGOCD_SYNC_TIMEOUT.
 
 set -euo pipefail
 
+# Mirrors validate-scheduling.sh's EXIT_ARGOCD_SYNC_TIMEOUT. Kept in sync by
+# the matching documentation in both script headers.
+readonly EXIT_ARGOCD_SYNC_TIMEOUT=50
+# Max consecutive sync timeouts before bailing the whole job (ADR-008
+# §"Error Handling and Failure Modes"). Only applied for argocd-* deployers.
+readonly ARGOCD_SYNC_STRIKE_LIMIT=3
+
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 KWOK_DIR="${SCRIPT_DIR}/.."
 REPO_ROOT="${KWOK_DIR}/.."
 OVERLAYS_DIR="${REPO_ROOT}/recipes/overlays"
 
+# Shared cleanup helpers (SYSTEM_NS_PATTERN, ensure_kwok_context). Same
+# source for validate-scheduling.sh so the system-ns allowlist regex
+# lives in one place.
+# shellcheck source=lib/cleanup.sh
+source "${SCRIPT_DIR}/lib/cleanup.sh"
+
 CLUSTER_NAME="${KWOK_CLUSTER:-aicr-kwok-test}"
 CONTEXT="kind-${CLUSTER_NAME}"
 
@@ -41,7 +83,11 @@ retry_command() {
     local description="$1"
     shift
 
-    local max_attempts="${KWOK_COMMAND_RETRIES:-3}"
+    # 5 attempts with doubling backoff: 5+10+20+40 = 75s cumulative sleep.
+    # The prior default (3 attempts → 15s cumulative) was not enough headroom
+    # for transient GitHub releases CDN 502s on kwok-stage-fast chart fetch,
+    # which observably persist 30-60s. Overridable via env for local tuning.
+    local max_attempts="${KWOK_COMMAND_RETRIES:-5}"
     local delay="${KWOK_COMMAND_RETRY_DELAY:-5}"
     local attempt=1
 
@@ -124,13 +170,59 @@ cleanup_between_tests() {
     # Clean up orphaned CRDs from cert-manager (cluster-scoped, not cleaned by ns delete)
     kubectl delete crd -l app.kubernetes.io/instance=aicr-test --ignore-not-found 2>/dev/null || true
 
-    # Wait for any still-terminating namespaces before next recipe
-    local system_ns="default|kube-node-lease|kube-public|kube-system|kwok-system|local-path-storage"
-    local terminating
-    terminating=$(kubectl get ns -o jsonpath='{range .items[?(@.status.phase=="Terminating")]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep -vE "^(${system_ns})$" || true)
-    for ns in $terminating; do
-        log_info "Waiting for namespace $ns to terminate..."
-        kubectl wait --for=delete "ns/$ns" --timeout=120s 2>/dev/null || true
+    # Force-finalize any still-terminating namespaces before the next recipe.
+    # `argocd`, `flux-system`, and `aicr-registry` are owned by install-
+    # infra.sh and MUST survive between recipes for the GitOps deployer
+    # lanes — installing them once and reusing avoids 5+ minutes of Helm
+    # install + CRD Established + controller-ready overhead per recipe.
+    # The three namespaces are listed unconditionally (even under --deployer
+    # helm) because they simply won't exist on the helm path, so the
+    # allowlist is harmless there. Do not gate by DEPLOYER.
+    #
+    # See the long comment in validate-scheduling.sh cleanup() for the
+    # rationale: KWOK cannot run real controller finalizers, so
+    # `kubectl wait --for=delete` against a Terminating namespace blocks
+    # the full --timeout=120s per namespace. Recipes touch ~10 namespaces;
+    # the previous version spent ~20 minutes of cleanup between recipes
+    # while doing zero useful work. Force-finalize bypasses the protocol
+    # by PATCHing spec.finalizers to []; safe in the ephemeral KWOK
+    # cluster (no real workloads to leak; cluster destroyed at job end).
+    local system_ns="${SYSTEM_NS_PATTERN}"  # see SYSTEM_NS_PATTERN in lib/cleanup.sh
+
+    # Two-phase cleanup, matching validate-scheduling.sh::cleanup_old_tests:
+    # the previous version only iterated namespaces ALREADY in
+    # status.phase=Terminating, which silently missed `Active` namespaces
+    # carrying a `deletionTimestamp` (Helm hook resources mid-uninstall,
+    # for example) and let them collide on the next recipe.
+    #
+    # Phase 1 — issue async deletes for every non-system namespace so the
+    # controller starts tearing down. Phase 2 — sleep briefly, then
+    # force-finalize anything still present. The two phases together
+    # match the in-recipe cleanup so the between-test path can't drift
+    # away from it over time.
+    local test_namespaces
+    test_namespaces=$(kubectl get ns -o jsonpath='{.items[*].metadata.name}' 2>/dev/null \
+        | tr ' ' '\n' \
+        | { grep -vE "^(${system_ns})$" || true; })
+    if [[ -z "$test_namespaces" ]]; then
+        return 0
+    fi
+
+    for ns in $test_namespaces; do
+        kubectl delete ns "$ns" --ignore-not-found --wait=false 2>/dev/null || true
+    done
+
+    # Brief grace for graceful deletion (real finalizers run in well
+    # under a second when there are no real workloads).
+    sleep 2
+    for ns in $test_namespaces; do
+        if kubectl get ns "$ns" >/dev/null 2>&1; then
+            log_info "Force-finalizing stuck namespace: $ns"
+            kubectl get ns "$ns" -o json 2>/dev/null \
+                | jq '.spec.finalizers = [] | .metadata.finalizers = []' \
+                | kubectl replace --raw "/api/v1/namespaces/${ns}/finalize" -f - >/dev/null 2>&1 \
+                || true
+        fi
     done
 }
 
@@ -138,7 +230,7 @@ run_recipe_test() {
     local recipe="$1"
     echo ""
     log_info "========================================"
-    log_info "Testing recipe: ${recipe}"
+    log_info "Testing recipe: ${recipe} (deployer=${DEPLOYER})"
     log_info "========================================"
 
     cleanup_between_tests
@@ -146,30 +238,171 @@ run_recipe_test() {
     # Create nodes (pass recipe name, script infers from overlay)
     bash "${SCRIPT_DIR}/apply-nodes.sh" "${recipe}" || return 1
 
-    # Run validation
-    bash "${SCRIPT_DIR}/validate-scheduling.sh" "${recipe}" || return 1
+    # Run validation. Preserve validate-scheduling.sh's exit code so callers
+    # can distinguish EXIT_ARGOCD_SYNC_TIMEOUT (50) from generic failures (1)
+    # for the 3-strike rule.
+    local rc=0
+    bash "${SCRIPT_DIR}/validate-scheduling.sh" --deployer "${DEPLOYER}" "${recipe}" || rc=$?
+    return "$rc"
+}
+
+# Print usage to stderr.
+usage() {
+    cat >&2 <<'EOF'
+Usage: run-all-recipes.sh [--deployer <name>] [recipe1 recipe2 ...]
+
+Flags:
+  --deployer <name>   Deployer to exercise for every recipe. One of:
+                        helm (default), argocd-oci, argocd-helm-oci, flux-oci
+
+Examples:
+  run-all-recipes.sh
+  run-all-recipes.sh eks-training
+  run-all-recipes.sh --deployer argocd-oci eks-training
+  run-all-recipes.sh --deployer=argocd-helm-oci
+  run-all-recipes.sh --deployer flux-oci eks-training
+EOF
 }
 
+# Deployer selection (issue #843). Set by --deployer in main(); default helm.
+# Under --deployer helm, this script's behavior is byte-identical to pre-#843
+# except the cleanup_between_tests system_ns allowlist gains two harmless
+# entries (argocd|aicr-registry) that never exist on the helm path.
+DEPLOYER="helm"
+
 main() {
     local recipes failed=() passed=()
+    local positional=()
+
+    # Parse flags + positional recipe names. Supports `--deployer X` and
+    # `--deployer=X`. Anything else with a leading `-` is rejected.
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --deployer)
+                if [[ $# -lt 2 ]]; then
+                    log_error "--deployer requires a value"
+                    usage
+                    return 1
+                fi
+                DEPLOYER="$2"
+                shift 2
+                ;;
+            --deployer=*)
+                DEPLOYER="${1#--deployer=}"
+                shift
+                ;;
+            -h|--help)
+                usage
+                return 0
+                ;;
+            -*)
+                log_error "Unknown option: $1"
+                usage
+                return 1
+                ;;
+            *)
+                positional+=("$1")
+                shift
+                ;;
+        esac
+    done
 
-    if [[ $# -gt 0 ]]; then
-        recipes="$*"
+    case "$DEPLOYER" in
+        helm|argocd-oci|argocd-helm-oci|flux-oci)
+            ;;
+        *)
+            log_error "Invalid --deployer value: '${DEPLOYER}'"
+            log_error "Must be one of: helm, argocd-oci, argocd-helm-oci, flux-oci"
+            usage
+            return 1
+            ;;
+    esac
+
+    if [[ ${#positional[@]} -gt 0 ]]; then
+        recipes="${positional[*]}"
     else
         recipes=$(get_recipes)
     fi
 
-    log_info "Found $(echo "${recipes}" | wc -w | tr -d ' ') recipe(s) to test"
+    log_info "Found $(echo "${recipes}" | wc -w | tr -d ' ') recipe(s) to test (deployer=${DEPLOYER})"
     ensure_cluster
 
+    # Safety: refuse to start the cleanup sweep unless the kubectl
+    # context points at a known KWOK Kind cluster. ensure_cluster has
+    # just created/reused the cluster, but it does not validate the
+    # context name — a developer running locally with a stale
+    # KUBECONFIG could otherwise direct the upcoming force-finalize
+    # sweep at a real cluster. Loose check only: kwok nodes don't
+    # exist yet on the initial run (apply-nodes.sh runs per recipe
+    # inside run_recipe_test).
+    ensure_kwok_context_loose
+
     # Clean up any stale resources from previous runs
     cleanup_between_tests
 
+    # Install shared in-cluster registry + the controller(s) the selected
+    # deployer needs (Argo CD for argocd-*, Flux for flux-oci). install-
+    # infra.sh is idempotent but unnecessary work per recipe; a failure
+    # here is fatal — the lane cannot run without it. DEPLOYER is exported
+    # so install-infra.sh can branch on it. Its exit code map: 10=yq/
+    # settings, 20=registry Deployment not Ready, 21=registry not reachable
+    # on host port, 30=Argo CD Helm install failed, 31=Application CRD not
+    # Established, 40=Repository secret apply failed, 60=Flux install
+    # manifest apply failed, 61=Flux controller not Ready, 62=Flux CRDs not
+    # Established. Surface the raw rc in the log line so an operator can
+    # map it.
+    if [[ "${DEPLOYER}" != "helm" ]]; then
+        log_info "Installing shared infra (in-cluster registry + controllers for ${DEPLOYER})..."
+        local infra_rc=0
+        DEPLOYER="${DEPLOYER}" bash "${SCRIPT_DIR}/install-infra.sh" || infra_rc=$?
+        if (( infra_rc != 0 )); then
+            log_error "install-infra.sh failed (exit code ${infra_rc}); cannot run ${DEPLOYER} deployer lane"
+            log_error "See kwok/scripts/install-infra.sh header for exit-code taxonomy"
+            return 1
+        fi
+    fi
+
+    # 3-strike rule for Argo CD sync timeouts (ADR-008 §"Error Handling and
+    # Failure Modes"). Tracks CONSECUTIVE EXIT_ARGOCD_SYNC_TIMEOUT failures.
+    # The counter resets on any rc != 50 (pass OR generic failure) so only
+    # genuinely consecutive sync timeouts trip the bail. A pass-fail-pass-
+    # timeout-timeout-timeout sequence is 3 consecutive; a timeout-pass-
+    # timeout-timeout sequence is 2 consecutive.
+    # Only argocd-* deployers can hit 50; helm path never trips this.
+    local consecutive_sync_timeouts=0
+
     for recipe in ${recipes}; do
-        if run_recipe_test "${recipe}"; then
+        local rc=0
+        run_recipe_test "${recipe}" || rc=$?
+        if (( rc == 0 )); then
             passed+=("${recipe}")
+            consecutive_sync_timeouts=0
         else
             failed+=("${recipe}")
+            # 3-strike rule is GitOps-only: helm path never returns 50, so
+            # the gate is currently implicit. Make it explicit by checking
+            # DEPLOYER too — keeps the contract auditable from this site
+            # alone instead of a chain of "but only X can produce 50"
+            # comments scattered across files.
+            if (( rc == EXIT_ARGOCD_SYNC_TIMEOUT )) \
+                    && [[ "$DEPLOYER" == argocd-* || "$DEPLOYER" == flux-oci ]]; then
+                consecutive_sync_timeouts=$(( consecutive_sync_timeouts + 1 ))
+                log_warn "GitOps sync timeout strike ${consecutive_sync_timeouts}/${ARGOCD_SYNC_STRIKE_LIMIT} on recipe ${recipe}"
+                if (( consecutive_sync_timeouts >= ARGOCD_SYNC_STRIKE_LIMIT )); then
+                    log_error "========================================"
+                    log_error "3-strike rule tripped: ${consecutive_sync_timeouts} consecutive GitOps sync timeouts"
+                    log_error "Bailing remainder of recipe loop (partial coverage > no coverage is false here)"
+                    log_error "Failed recipes so far:"
+                    for r in "${failed[@]:-}"; do [[ -n "$r" ]] && log_error "  - ${r}"; done
+                    log_error "========================================"
+                    cleanup_between_tests
+                    return "$EXIT_ARGOCD_SYNC_TIMEOUT"
+                fi
+            else
+                # Any other failure (including a hypothetical rc=50 from a
+                # path that shouldn't produce one) breaks the streak.
+                consecutive_sync_timeouts=0
+            fi
         fi
     done
 
diff --git a/kwok/scripts/validate-scheduling.sh b/kwok/scripts/validate-scheduling.sh
index 8c54adb31..a740a31bd 100755
--- a/kwok/scripts/validate-scheduling.sh
+++ b/kwok/scripts/validate-scheduling.sh
@@ -16,22 +16,90 @@
 # validate-scheduling.sh - Validate bundle scheduling on KWOK cluster
 #
 # Usage:
-#   ./validate-scheduling.sh <recipe-name>
+#   ./validate-scheduling.sh [--keep-namespace] [--deployer <name>] <recipe-name>
 #   ./validate-scheduling.sh h100-eks-ubuntu-training-kubeflow
+#   ./validate-scheduling.sh --deployer argocd-oci eks-training
+#
+# Flags:
+#   --keep-namespace    Preserve releases and namespaces after the run for
+#                       inspection (skips cleanup).
+#   --deployer <name>   Deployer to exercise. One of:
+#                         helm             (default — original Helm path)
+#                         argocd-oci       (aicr bundle --deployer argocd ->
+#                                           OCI push -> kubectl apply
+#                                           app-of-apps.yaml)
+#                         argocd-helm-oci  (aicr bundle --deployer argocd-helm
+#                                           -> OCI push -> helm install OCI
+#                                           chart in argocd namespace)
+#                         flux-oci         (aicr bundle --deployer flux ->
+#                                           OCI push -> apply OCIRepository +
+#                                           Kustomization wrappers and let
+#                                           Flux reconcile)
+#                       For argocd-* values the in-cluster registry + Argo CD
+#                       must already be installed; for flux-oci the registry
+#                       + Flux 2 controllers must be installed. Both are
+#                       managed by `kwok/scripts/install-infra.sh` keyed on
+#                       the DEPLOYER env var.
+#
+# Environment variables (GitOps deployers only):
+#   KWOK_REGISTRY_HOST_PORT  Host port the in-cluster registry is reachable on
+#                            from the runner. MUST match the value used when
+#                            running install-infra.sh / kind-config.yaml.
+#                            Default: 5500 (avoids the macOS ControlCenter
+#                            port-5000 collision; Linux CI runners have 5500
+#                            free as well).
+#   KWOK_ARGOCD_SYNC_TIMEOUT Seconds to wait for all Argo CD Applications to
+#                            reach Synced + Healthy (or Progressing) before
+#                            failing. Default: 300.
+#   KWOK_ARGOCD_ROOT_GRACE   Seconds to wait for the root Argo CD Application
+#                            (nvidia-stack for argocd-oci, aicr-stack for
+#                            argocd-helm-oci) to appear in the argocd
+#                            namespace before failing. A missing root after
+#                            this grace window indicates `kubectl apply` /
+#                            `helm install` silently produced no Application
+#                            resource. Default: 30.
+#   KWOK_FLUX_SYNC_TIMEOUT   Seconds to wait for the outer Kustomization +
+#                            all HelmReleases to reach a terminal state
+#                            before failing. Default: 300.
+#   KWOK_FLUX_ROOT_GRACE     Seconds to wait for the outer Kustomization to
+#                            appear in the cluster before failing. A missing
+#                            Kustomization after the grace window indicates
+#                            kubectl apply silently produced no resource
+#                            (RBAC denial, CRD missing). Default: 30.
 #
 # This script:
 # 1. Generates a recipe from the cluster config
 # 2. Generates a bundle from the recipe
-# 3. Deploys the bundle to the KWOK cluster
+# 3. Deploys the bundle to the KWOK cluster (Helm, Argo CD, or Flux per --deployer)
 # 4. Verifies all pods reach Running state (KWOK auto-transitions them)
 # 5. Reports success/failure
+#
+# Exit codes:
+#    0  success
+#    1  generic failure (bundle gen, deploy, pod scheduling, RBAC, CRD missing,
+#       apiserver unreachable, etc.)
+#   50  GitOps sync deadline hit (KWOK_ARGOCD_SYNC_TIMEOUT for argocd-*,
+#       KWOK_FLUX_SYNC_TIMEOUT for flux-oci). Distinct so run-all-recipes.sh
+#       can apply the 3-strike rule (ADR-008 §"Error Handling and Failure
+#       Modes") without grepping stderr. Only emitted for GitOps deployers;
+#       the helm path never returns 50.
 
 set -euo pipefail
 
+# Distinct exit code for Argo CD sync timeout. Documented in the header above
+# AND in kwok/scripts/run-all-recipes.sh so callers can branch on it.
+readonly EXIT_ARGOCD_SYNC_TIMEOUT=50
+
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 KWOK_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
 REPO_ROOT="$(cd "${KWOK_DIR}/.." && pwd)"
 
+# Shared cleanup helpers (SYSTEM_NS_PATTERN constant, ensure_kwok_context
+# safety guard). The same constants and functions are sourced by
+# run-all-recipes.sh so the system-ns allowlist lives in one place.
+# shellcheck source=lib/cleanup.sh
+source "${SCRIPT_DIR}/lib/cleanup.sh"
+
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -51,6 +119,77 @@ WORK_DIR=""
 AICR_BIN=""
 KEEP_NAMESPACE=false
 
+# Deployer selection (issue #843). Set by --deployer flag in main().
+# When DEPLOYER != "helm", generate_bundle pushes to OCI and deploy_bundle
+# routes through Argo CD instead of `deploy.sh`. The helm path stays
+# byte-identical to pre-#843 behavior.
+DEPLOYER="helm"
+# OCI ref populated in generate_bundle when DEPLOYER != "helm"; consumed by
+# deploy_bundle (helm install path) and cleanup.
+OCI_REF=""
+# In-cluster pull URL for argocd-* deployers (no tag suffix). Populated in
+# generate_bundle and consumed by deploy_bundle for the argocd-helm-oci lane,
+# where the wrapper chart's `helm install` must be invoked with
+# `--set repoURL=<in-cluster URL>` so the Argo CD Applications it renders
+# point repo-server at the in-cluster registry Service DNS, not the
+# runner-side localhost:5500 (which is unreachable from inside the cluster).
+# The argocd-oci lane derives repoURL via `aicr bundle --repo` at bundle
+# time, so it doesn't read this variable. Kept as a script-global instead
+# of being re-derived in deploy_bundle so the runner→service-DNS rewrite
+# rule lives in exactly one place.
+OCI_IN_CLUSTER_REF=""
+# Root Argo CD Application name emitted by the bundler. Set per-deployer in
+# resolve_argocd_root_app():
+#   - argocd-oci      -> "nvidia-stack" (pkg/bundler/deployer/argocd/
+#                        templates/app-of-apps.yaml.tmpl)
+#   - argocd-helm-oci -> "aicr-stack"   (pkg/bundler/deployer/argocdhelm/
+#                        argocdhelm.go: parentAppTemplate)
+# Default empty when DEPLOYER=helm; cleanup guards against empty before
+# issuing `kubectl delete application`.
+ARGOCD_ROOT_APP=""
+
+# Resolve ARGOCD_ROOT_APP from DEPLOYER. Must be called before any code path
+# that consults ARGOCD_ROOT_APP (cleanup, wait_for_argocd_sync).
+resolve_argocd_root_app() {
+    case "$DEPLOYER" in
+        argocd-oci)      ARGOCD_ROOT_APP="nvidia-stack" ;;
+        argocd-helm-oci) ARGOCD_ROOT_APP="aicr-stack"   ;;
+        *)               ARGOCD_ROOT_APP=""             ;;
+    esac
+}
+
+# Flux-specific globals (set in generate_bundle / deploy_bundle for flux-oci).
+# Both live in the flux-system namespace (Flux's default control plane).
+#
+# FLUX_OCIREPOSITORY_NAME is fixed to "aicr-bundle" — the value the bundler
+# embeds into local-chart HelmRelease sourceRefs when --deployer flux is
+# paired with an OCI --output (see pkg/bundler/config/Config.bundleOCISourceName
+# and pkg/cli/bundle.go). Changing the name in one place WITHOUT the other
+# breaks chart resolution at reconcile time, so the constant is the contract.
+#
+# FLUX_KUSTOMIZATION_NAME varies by recipe so back-to-back recipes on the
+# same cluster don't collide (one Kustomization per recipe pointing at
+# the same shared OCIRepository over time).
+#
+# Default empty when DEPLOYER != flux-oci; cleanup guards against empty.
+FLUX_KUSTOMIZATION_NAME=""
+FLUX_OCIREPOSITORY_NAME=""
+
+# Resolve FLUX_* names from the recipe. Must be called before any code path
+# that consults them (cleanup, deploy_bundle, wait_for_flux_sync).
+resolve_flux_root_names() {
+    local recipe="$1"
+    if [[ "$DEPLOYER" == "flux-oci" ]]; then
+        # Fixed name — see comment above. Must match what
+        # pkg/cli/bundle.go sets as BundleOCISourceName.
+        FLUX_OCIREPOSITORY_NAME="aicr-bundle"
+        FLUX_KUSTOMIZATION_NAME="aicr-${recipe}"
+    else
+        FLUX_OCIREPOSITORY_NAME=""
+        FLUX_KUSTOMIZATION_NAME=""
+    fi
+}
+
 # Cleanup function
 cleanup() {
     local exit_code=$?
@@ -64,9 +203,87 @@ cleanup() {
         log_info "Preserving releases for inspection"
         log_info "Clean up with: helm list -A (then uninstall each release)"
     else
-        # Uninstall all Helm releases from component namespaces
+        # Argo CD deployers: delete the root Application FIRST so prune
+        # semantics tear down children before we touch the namespaces.
+        # The Helm-OCI release (argocd-helm-oci) also needs to come down so
+        # the next recipe gets a clean argocd namespace.
+        if [[ "$DEPLOYER" == "argocd-oci" || "$DEPLOYER" == "argocd-helm-oci" ]]; then
+            if [[ -n "$ARGOCD_ROOT_APP" ]]; then
+                log_info "Deleting Argo CD root Application ${ARGOCD_ROOT_APP} (prune cascade)..."
+                timeout 120s kubectl delete application "$ARGOCD_ROOT_APP" -n argocd \
+                    --ignore-not-found --wait=true 2>/dev/null || true
+            fi
+            if [[ "$DEPLOYER" == "argocd-helm-oci" ]]; then
+                # Defensive guard: the install-infra.sh-managed Argo CD release
+                # is named "argocd" in the same namespace. RELEASE_NAME-stack
+                # must never collide with that — uninstalling the infra release
+                # would tear down Argo CD itself and break subsequent recipes.
+                local wrapper="${RELEASE_NAME}-stack"
+                if [[ "$wrapper" == "argocd" ]]; then
+                    log_error "Refusing to uninstall wrapper release named 'argocd'"
+                    log_error "It collides with install-infra.sh's Argo CD release."
+                    log_error "Override KWOK_RELEASE to something other than empty/'argocd'."
+                else
+                    log_info "Uninstalling argocd-helm-oci wrapper release ${wrapper}..."
+                    log_info "Releases in argocd namespace before uninstall:"
+                    helm list -n argocd 2>/dev/null || true
+                    timeout 60s helm uninstall "$wrapper" -n argocd --wait 2>/dev/null || true
+                fi
+            fi
+        fi
+
+        # Flux deployer: delete the Kustomization FIRST so its prune
+        # GC removes the per-component HelmReleases before namespace
+        # cleanup races them. The OCIRepository then has nothing to feed
+        # and can be removed safely.
+        if [[ "$DEPLOYER" == "flux-oci" ]]; then
+            if [[ -n "$FLUX_KUSTOMIZATION_NAME" ]]; then
+                log_info "Deleting Flux Kustomization ${FLUX_KUSTOMIZATION_NAME} (prune cascade)..."
+                timeout 120s kubectl delete kustomization "$FLUX_KUSTOMIZATION_NAME" \
+                    -n flux-system --ignore-not-found --wait=true 2>/dev/null || true
+            fi
+            if [[ -n "$FLUX_OCIREPOSITORY_NAME" ]]; then
+                log_info "Deleting Flux OCIRepository ${FLUX_OCIREPOSITORY_NAME}..."
+                timeout 30s kubectl delete ocirepository "$FLUX_OCIREPOSITORY_NAME" \
+                    -n flux-system --ignore-not-found --wait=true 2>/dev/null || true
+            fi
+            # Best-effort: clear any HelmReleases the bundle declared in
+            # component namespaces. helm-controller's finalizer can hang
+            # under KWOK; force-remove if needed before the namespace
+            # sweep below force-finalizes the namespace.
+            local stale_hrs
+            stale_hrs=$(kubectl get helmrelease -A -o json 2>/dev/null \
+                | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' \
+                || true)
+            if [[ -n "$stale_hrs" ]]; then
+                while IFS=' ' read -r ns name; do
+                    if [[ -n "$ns" && "$ns" != "flux-system" ]]; then
+                        log_info "Deleting HelmRelease ${name} in ${ns}..."
+                        timeout 30s kubectl delete helmrelease "$name" -n "$ns" \
+                            --ignore-not-found --wait=false 2>/dev/null || true
+                        # Strip finalizers if helm-controller's reconciler is
+                        # stuck. Merge patch (NOT --type=json remove): RFC 6902
+                        # remove errors when the target path is absent, and the
+                        # `|| true` would then silently swallow the exact case
+                        # this guard exists for — a HelmRelease whose
+                        # helm-controller crashed before attaching its
+                        # finalizer. Merge patch with `null` is idempotent
+                        # whether finalizers are present or absent.
+                        kubectl patch helmrelease "$name" -n "$ns" \
+                            --type=merge -p '{"metadata":{"finalizers":null}}' \
+                            >/dev/null 2>&1 || true
+                    fi
+                done <<< "$stale_hrs"
+            fi
+        fi
+
+        # Uninstall all Helm releases from component namespaces (excludes
+        # argocd because we want the install-infra.sh-managed release to
+        # survive recipe-to-recipe).
         local releases
-        releases=$(helm list -A -o json 2>/dev/null | jq -r '.[] | select(.namespace != "kube-system") | "\(.name) \(.namespace)"' || true)
+        releases=$(helm list -A -o json 2>/dev/null \
+            | jq -r '.[] | select(.namespace != "kube-system") | select(.namespace != "argocd") | "\(.name) \(.namespace)"' \
+            || true)
         if [[ -n "$releases" ]]; then
             while IFS=' ' read -r name ns; do
                 if [[ -n "$name" ]]; then
@@ -77,13 +294,51 @@ cleanup() {
         fi
         # Clean up stale APIServices before namespace deletion to prevent hangs
         cleanup_stale_apiservices
-        # Delete all non-system namespaces (dynamically covers any recipe)
-        local system_ns="default|kube-node-lease|kube-public|kube-system|kwok-system|local-path-storage"
+        # Delete all non-system namespaces (dynamically covers any recipe).
+        # `argocd`, `flux-system`, and `aicr-registry` are owned by
+        # install-infra.sh and must survive between recipes for the GitOps
+        # deployer matrix to work back-to-back. Listed unconditionally
+        # (harmless when the corresponding controller isn't installed on
+        # the current lane).
+        local system_ns="${SYSTEM_NS_PATTERN}"  # see SYSTEM_NS_PATTERN in lib/cleanup.sh
         local test_namespaces
         test_namespaces=$(kubectl get ns -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep -vE "^(${system_ns})$" || true)
+        # Issue all delete requests asynchronously (--wait=false), then
+        # force-finalize any namespace stuck in Terminating.
+        #
+        # Why force-finalize: KWOK fake controllers can drive Pods through
+        # phases but cannot run real finalizer reconcilers (Argo CD's
+        # resources-finalizer.argocd.argoproj.io, kubernetes.io/legacy-
+        # finalizer, operator-injected finalizers). With --wait=true a
+        # `kubectl delete ns` blocks until the namespace's finalizers are
+        # cleared — which never happens in KWOK — and the previous
+        # --timeout=120s loop spent the full 2 minutes per namespace,
+        # exhausting the 15-minute job budget on cleanup *after* the
+        # actual recipe validation had already PASSED. Observed: every
+        # argocd-oci CI lane on commit c459b51f cancelled in post-test
+        # cleanup with `##[error]The operation was canceled.`
+        #
+        # Force-finalize bypasses the controller protocol by PATCHing the
+        # namespace's spec.finalizers to []. This is safe in the KWOK
+        # ephemeral cluster: there are no real workloads to leak, and the
+        # cluster is destroyed at job end regardless. Do NOT port this
+        # pattern to non-KWOK / production cleanup.
         for ns in $test_namespaces; do
             log_info "Deleting namespace $ns..."
-            kubectl delete ns "$ns" --ignore-not-found --wait=true --timeout=120s 2>/dev/null || true
+            kubectl delete ns "$ns" --ignore-not-found --wait=false 2>/dev/null || true
+        done
+        # Give graceful deletion a brief window first (real finalizers run
+        # in well under a second in tests with no real workloads); then
+        # force-finalize anything left over.
+        sleep 2
+        for ns in $test_namespaces; do
+            if kubectl get ns "$ns" >/dev/null 2>&1; then
+                log_debug "Force-finalizing stuck namespace: $ns"
+                kubectl get ns "$ns" -o json 2>/dev/null \
+                    | jq '.spec.finalizers = [] | .metadata.finalizers = []' \
+                    | kubectl replace --raw "/api/v1/namespaces/${ns}/finalize" -f - >/dev/null 2>&1 \
+                    || true
+            fi
         done
     fi
 
@@ -223,9 +478,13 @@ cleanup_old_tests() {
         fi
     fi
 
-    # Find and uninstall old Helm releases from component namespaces
+    # Find and uninstall old Helm releases from component namespaces.
+    # Skip the argocd namespace so the install-infra.sh-managed Argo CD
+    # release survives recipe-to-recipe.
     local releases
-    releases=$(helm list -A -o json 2>/dev/null | jq -r '.[] | select(.namespace != "kube-system") | "\(.namespace) \(.name)"' || true)
+    releases=$(helm list -A -o json 2>/dev/null \
+        | jq -r '.[] | select(.namespace != "kube-system") | select(.namespace != "argocd") | "\(.namespace) \(.name)"' \
+        || true)
     if [[ -n "$releases" ]]; then
         log_info "Uninstalling old releases..."
         echo "$releases" | while read -r ns release; do
@@ -240,21 +499,51 @@ cleanup_old_tests() {
     # These can cause namespace deletion to hang with "stale GroupVersion discovery" errors
     cleanup_stale_apiservices
 
-    # Delete all non-system namespaces (dynamically covers any recipe)
-    local system_ns="default|kube-node-lease|kube-public|kube-system|kwok-system|local-path-storage"
+    # Delete all non-system namespaces (dynamically covers any recipe).
+    # `argocd` and `aicr-registry` are owned by install-infra.sh and must
+    # survive between recipes.
+    #
+    # See the long comment in cleanup() for the rationale: --wait=true on
+    # a KWOK cluster blocks the full --timeout=120s per namespace because
+    # KWOK can't run real finalizers (Argo CD's resources-finalizer,
+    # operator-injected finalizers, etc.). Issue async + force-finalize.
+    local system_ns="${SYSTEM_NS_PATTERN}"  # see SYSTEM_NS_PATTERN in lib/cleanup.sh
     local test_namespaces
     test_namespaces=$(kubectl get ns -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep -vE "^(${system_ns})$" || true)
     for ns in $test_namespaces; do
         log_info "Removing namespace $ns..."
-        kubectl delete ns "$ns" --ignore-not-found --wait=true --timeout=120s 2>/dev/null || true
+        kubectl delete ns "$ns" --ignore-not-found --wait=false 2>/dev/null || true
+    done
+    sleep 2
+    for ns in $test_namespaces; do
+        if kubectl get ns "$ns" >/dev/null 2>&1; then
+            log_debug "Force-finalizing stuck namespace: $ns"
+            kubectl get ns "$ns" -o json 2>/dev/null \
+                | jq '.spec.finalizers = [] | .metadata.finalizers = []' \
+                | kubectl replace --raw "/api/v1/namespaces/${ns}/finalize" -f - >/dev/null 2>&1 \
+                || true
+        fi
     done
 
-    # Also clean up legacy aicr-kwok-test namespaces
+    # Also clean up legacy aicr-kwok-test namespaces (async + force-finalize).
     local old_namespaces
     old_namespaces=$(kubectl get ns -o name 2>/dev/null | grep "namespace/aicr-kwok-test" || true)
     if [[ -n "$old_namespaces" ]]; then
         log_info "Removing old test namespaces..."
-        echo "$old_namespaces" | xargs kubectl delete --wait=true --timeout=120s 2>/dev/null || true
+        echo "$old_namespaces" | xargs kubectl delete --wait=false 2>/dev/null || true
+        sleep 2
+        # Force-finalize any of those that linger.
+        while read -r ns_ref; do
+            local ns_name="${ns_ref#namespace/}"
+            if [[ -z "$ns_name" ]]; then continue; fi
+            if kubectl get ns "$ns_name" >/dev/null 2>&1; then
+                log_debug "Force-finalizing legacy namespace: $ns_name"
+                kubectl get ns "$ns_name" -o json 2>/dev/null \
+                    | jq '.spec.finalizers = [] | .metadata.finalizers = []' \
+                    | kubectl replace --raw "/api/v1/namespaces/${ns_name}/finalize" -f - >/dev/null 2>&1 \
+                    || true
+            fi
+        done <<< "$old_namespaces"
     fi
 
     log_info "Cleanup complete"
@@ -372,85 +661,460 @@ generate_bundle() {
     #   node the pod schedules on — and KWOK fakes can't actually back a
     #   local-path volume, leaving the pod stuck Pending with NominatedNodeName
     #   set. Disabling persistence lets the controller pod bind.
-    log_info "Generating bundle..."
+    log_info "Generating bundle (deployer=${DEPLOYER})..."
 
     local bundle_output
-    if ! bundle_output=$("$AICR_BIN" bundle \
-        --recipe "${WORK_DIR}/recipe.yaml" \
-        --output "${WORK_DIR}/bundle" \
-        --system-node-selector "aicr.nvidia.com/node-type=system" \
-        --accelerated-node-selector "aicr.nvidia.com/node-type=accelerated" \
-        --system-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
-        --accelerated-node-toleration "nvidia.com/gpu=present:NoSchedule" \
-        --accelerated-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
-        --set "certmanager:startupapicheck.enabled=false" \
-        --set "slinkyslurmoperator:webhook.enabled=false" \
-        --set "slinkyslurmoperator:certManager.enabled=false" \
-        --set "slurmcluster:controller.persistence.enabled=false" \
-        --set "kubeprometheusstack:defaultRules.create=false" \
-        --set "kubeprometheusstack:alertmanager.enabled=false" \
-        --set "nodewright-customizations:enabled=false" \
-        --set "dynamoplatform:etcd.persistence.enabled=false" \
-        --set "dynamoplatform:nats.config.jetstream.fileStore.enabled=false" 2>&1); then
-        log_error "Bundle generation failed"
-        log_error "Bundle command output:"
-        echo "$bundle_output"
-        return 1
-    fi
+    case "$DEPLOYER" in
+        helm)
+            # Original Helm path — DO NOT change this invocation. Byte-identical
+            # to pre-#843 behavior is a non-regression requirement.
+            if ! bundle_output=$("$AICR_BIN" bundle \
+                --recipe "${WORK_DIR}/recipe.yaml" \
+                --output "${WORK_DIR}/bundle" \
+                --system-node-selector "aicr.nvidia.com/node-type=system" \
+                --accelerated-node-selector "aicr.nvidia.com/node-type=accelerated" \
+                --system-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --accelerated-node-toleration "nvidia.com/gpu=present:NoSchedule" \
+                --accelerated-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --set "certmanager:startupapicheck.enabled=false" \
+                --set "slinkyslurmoperator:webhook.enabled=false" \
+                --set "slinkyslurmoperator:certManager.enabled=false" \
+                --set "slurmcluster:controller.persistence.enabled=false" \
+                --set "kubeprometheusstack:defaultRules.create=false" \
+                --set "kubeprometheusstack:alertmanager.enabled=false" \
+                --set "nodewright-customizations:enabled=false" \
+                --set "dynamoplatform:etcd.persistence.enabled=false" \
+                --set "dynamoplatform:nats.config.jetstream.fileStore.enabled=false" 2>&1); then
+                log_error "Bundle generation failed"
+                log_error "Bundle command output:"
+                echo "$bundle_output"
+                return 1
+            fi
 
-    if [[ ! -d "${WORK_DIR}/bundle" ]]; then
-        log_error "Bundle directory not created: ${WORK_DIR}/bundle"
-        return 1
-    fi
+            if [[ ! -d "${WORK_DIR}/bundle" ]]; then
+                log_error "Bundle directory not created: ${WORK_DIR}/bundle"
+                return 1
+            fi
 
-    log_info "Bundle generated at ${WORK_DIR}/bundle"
+            # KWOK clusters use emptyDir for Prometheus storage (no PVC /
+            # StorageClass). Cloud overlays (EKS, AKS) set emptyDir: null +
+            # volumeClaimTemplate, which the Prometheus CRD rejects in a
+            # KWOK environment. Restore emptyDir and remove the PVC.
+            #
+            # This fix-up is intentionally helm-only: the argocd-* deployers
+            # consume the bundle from OCI (the artifact was already pushed
+            # above), so local-filesystem edits would be discarded by
+            # Argo CD's repo-server when it re-pulls the artifact. See
+            # SHOULD-FIX 2 of the #843 follow-up review.
+            local prom_values
+            prom_values=$(find "${WORK_DIR}/bundle" -mindepth 2 -maxdepth 2 \
+                -path '*[0-9][0-9][0-9]-kube-prometheus-stack/values.yaml' \
+                -type f 2>/dev/null | head -1)
+            if [[ -n "$prom_values" && -f "$prom_values" ]] && \
+                    yq eval '.prometheus.prometheusSpec.storageSpec.emptyDir' "$prom_values" 2>/dev/null | grep -q 'null'; then
+                log_info "Fixing kube-prometheus-stack storageSpec for KWOK (emptyDir instead of PVC)"
+                yq eval -i '
+                    .prometheus.prometheusSpec.storageSpec.emptyDir = {"medium": "", "sizeLimit": "10Gi"} |
+                    del(.prometheus.prometheusSpec.storageSpec.volumeClaimTemplate)
+                ' "$prom_values"
+            fi
+            ;;
+        argocd-oci|argocd-helm-oci)
+            # Compute OCI ref. Tag includes the deployer so back-to-back runs
+            # with different deployer values on the same recipe don't collide
+            # in the registry.
+            #
+            # Tag-format constraint: helm v3's OCI client filters tags via
+            # `semver.StrictNewVersion` (registry/client.go::Tags()) — a tag
+            # like `fe346a05-argocd-helm-oci` is silently dropped and helm
+            # reports `unable to locate any tags in provided repository`
+            # even though the artifact is in the registry with correct
+            # helm.chart.content.v1.tar+gzip mediaType (see #961 root
+            # cause). Wrap the sha-and-deployer suffix as a semver pre-
+            # release so `0.0.0-fe346a05-argocd-helm-oci` parses cleanly.
+            # Argo CD's OCI Helm source does not have this filter, so the
+            # other two lanes (helm, argocd-oci) keep the plain
+            # sha-deployer tag.
+            local short_sha
+            short_sha=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo "local")
+            local host_port="${KWOK_REGISTRY_HOST_PORT:-5500}"
+            local tag="${short_sha}-${DEPLOYER}"
+            if [[ "$DEPLOYER" == "argocd-helm-oci" ]]; then
+                tag="0.0.0-${short_sha}-${DEPLOYER}"
+            fi
+            OCI_REF="oci://localhost:${host_port}/aicr/${recipe}:${tag}"
+
+            # The OCI ref above is the runner's view (push side) — host port
+            # exposed by Kind's extraPortMappings. Argo CD's repo-server runs
+            # IN-CLUSTER and reaches the same registry via Service DNS, on the
+            # Service's own port 5000 (not the runner's host port). The push
+            # and pull URLs MUST differ; passing --repo to `aicr bundle`
+            # overrides the default auto-derivation (which would otherwise
+            # set repoURL = the runner-view URL and Argo CD would try to dial
+            # localhost:5500 from inside its repo-server pod).
+            # Script-global so deploy_bundle's argocd-helm-oci branch can
+            # pass it through to `helm install --set repoURL=…` without
+            # duplicating the runner→service-DNS rewrite rule.
+            OCI_IN_CLUSTER_REF="oci://registry.aicr-registry.svc.cluster.local:5000/aicr/${recipe}"
+            local in_cluster_repo="$OCI_IN_CLUSTER_REF"
+
+            # Map our deployer-matrix name to aicr's --deployer value.
+            local deployer_arg="argocd"
+            [[ "$DEPLOYER" == "argocd-helm-oci" ]] && deployer_arg="argocd-helm"
+
+            log_info "Bundling for ${deployer_arg}, pushing to ${OCI_REF}"
+            log_info "Argo CD will pull from ${in_cluster_repo}:${tag}"
+            # When --output is an oci:// reference, `aicr bundle` writes the
+            # local bundle to ./bundle (relative to CWD) — there's no way to
+            # redirect it to an absolute path. cd into WORK_DIR so the local
+            # bundle lands at ${WORK_DIR}/bundle/ where downstream apply paths
+            # expect it (kubectl apply -f .../app-of-apps.yaml for argocd-oci).
+            if ! bundle_output=$(cd "${WORK_DIR}" && "$AICR_BIN" bundle \
+                --recipe "${WORK_DIR}/recipe.yaml" \
+                --deployer "$deployer_arg" \
+                --output "$OCI_REF" \
+                --repo "$in_cluster_repo" \
+                --plain-http \
+                --system-node-selector "aicr.nvidia.com/node-type=system" \
+                --accelerated-node-selector "aicr.nvidia.com/node-type=accelerated" \
+                --system-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --accelerated-node-toleration "nvidia.com/gpu=present:NoSchedule" \
+                --accelerated-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --set "certmanager:startupapicheck.enabled=false" \
+                --set "slinkyslurmoperator:webhook.enabled=false" \
+                --set "slinkyslurmoperator:certManager.enabled=false" \
+                --set "slurmcluster:controller.persistence.enabled=false" \
+                --set "kubeprometheusstack:defaultRules.create=false" \
+                --set "kubeprometheusstack:alertmanager.enabled=false" \
+                --set "nodewright-customizations:enabled=false" \
+                --set "dynamoplatform:etcd.persistence.enabled=false" \
+                --set "dynamoplatform:nats.config.jetstream.fileStore.enabled=false" 2>&1); then
+                log_error "Bundle generation failed"
+                log_error "Bundle command output:"
+                echo "$bundle_output"
+                return 1
+            fi
 
-    # KWOK clusters use emptyDir for Prometheus storage (no PVC/StorageClass).
-    # Cloud overlays (EKS, AKS) set emptyDir: null + volumeClaimTemplate which
-    # the Prometheus CRD rejects. Restore emptyDir and remove PVC for KWOK.
-    # Bundle layout uses NNN-prefixed folders, so glob to find the kube-prometheus-stack folder.
-    local prom_values
-    prom_values=$(ls -1 "${WORK_DIR}/bundle"/[0-9][0-9][0-9]-kube-prometheus-stack/values.yaml 2>/dev/null | head -1)
-    if [[ -n "$prom_values" && -f "$prom_values" ]] && yq eval '.prometheus.prometheusSpec.storageSpec.emptyDir' "$prom_values" 2>/dev/null | grep -q 'null'; then
-        log_info "Fixing kube-prometheus-stack storageSpec for KWOK (emptyDir instead of PVC)"
-        yq eval -i '
-            .prometheus.prometheusSpec.storageSpec.emptyDir = {"medium": "", "sizeLimit": "10Gi"} |
-            del(.prometheus.prometheusSpec.storageSpec.volumeClaimTemplate)
-        ' "$prom_values"
-    fi
+            # `--deployer argocd*` still renders the local bundle directory
+            # (app-of-apps.yaml lives there for argocd-oci). For argocd-helm-oci
+            # the bundle is also pushed as an OCI Helm chart we'll install via
+            # `helm upgrade --install`.
+            if [[ "$DEPLOYER" == "argocd-oci" ]] && [[ ! -f "${WORK_DIR}/bundle/app-of-apps.yaml" ]]; then
+                log_error "Expected app-of-apps.yaml not found in bundle: ${WORK_DIR}/bundle"
+                log_error "Bundle contents:"
+                list_bundle_entries "${WORK_DIR}/bundle"
+                return 1
+            fi
+            # NOTE: Prometheus emptyDir fix-up (see helm) branch above) is
+            # intentionally NOT applied for argocd-* deployers because the
+            # bundle is consumed from OCI, not the local filesystem. KWOK
+            # clusters without a StorageClass may see Prometheus PVCs hang
+            # Pending. Disable Prometheus persistence at bundle time via
+            # `--set kubeprometheusstack:prometheus.prometheusSpec.storageSpec=null`
+            # if running these recipes through argocd-*. See SHOULD-FIX 2 of
+            # the #843 follow-up review and docs/plans/2026-05-18-kwok-
+            # deployer-matrix.md (Unresolved Questions).
+            ;;
+        flux-oci)
+            # Flux's source-controller does NOT filter OCI tags via semver
+            # (it's an artifact pull, not a chart-version lookup), so the
+            # plain sha-deployer tag is fine here — no `0.0.0-…` wrapping.
+            local short_sha
+            short_sha=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo "local")
+            local host_port="${KWOK_REGISTRY_HOST_PORT:-5500}"
+            OCI_REF="oci://localhost:${host_port}/aicr/${recipe}:${short_sha}-${DEPLOYER}"
+            # In-cluster URL that the Flux OCIRepository will pull from.
+            # The flux deployer does NOT bake repoURL into the bundle (the
+            # bundle is a Kustomize tree of HelmRelease + HelmRepository
+            # source CRs; the OUTER OCIRepository is what we apply at
+            # deploy time, and its URL is set then). We still derive it
+            # here so deploy_bundle can reuse the rewrite rule from
+            # exactly one place.
+            OCI_IN_CLUSTER_REF="oci://registry.aicr-registry.svc.cluster.local:5000/aicr/${recipe}"
+
+            log_info "Bundling for flux, pushing to ${OCI_REF}"
+            log_info "Flux will pull from ${OCI_IN_CLUSTER_REF}:${short_sha}-${DEPLOYER}"
+            # cd into WORK_DIR for the same reason as the argocd-* branch
+            # (relative ./bundle output path under `--output oci://...`).
+            if ! bundle_output=$(cd "${WORK_DIR}" && "$AICR_BIN" bundle \
+                --recipe "${WORK_DIR}/recipe.yaml" \
+                --deployer flux \
+                --output "$OCI_REF" \
+                --plain-http \
+                --system-node-selector "aicr.nvidia.com/node-type=system" \
+                --accelerated-node-selector "aicr.nvidia.com/node-type=accelerated" \
+                --system-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --accelerated-node-toleration "nvidia.com/gpu=present:NoSchedule" \
+                --accelerated-node-toleration "kwok.x-k8s.io/node=fake:NoSchedule" \
+                --set "certmanager:startupapicheck.enabled=false" \
+                --set "slinkyslurmoperator:webhook.enabled=false" \
+                --set "slinkyslurmoperator:certManager.enabled=false" \
+                --set "slurmcluster:controller.persistence.enabled=false" \
+                --set "kubeprometheusstack:defaultRules.create=false" \
+                --set "kubeprometheusstack:alertmanager.enabled=false" \
+                --set "nodewright-customizations:enabled=false" \
+                --set "dynamoplatform:etcd.persistence.enabled=false" \
+                --set "dynamoplatform:nats.config.jetstream.fileStore.enabled=false" 2>&1); then
+                log_error "Bundle generation failed"
+                log_error "Bundle command output:"
+                echo "$bundle_output"
+                return 1
+            fi
+
+            # Flux bundle root must contain kustomization.yaml — Flux's
+            # Kustomization CR points its `path: ./` at it. Fail fast
+            # otherwise (the OCIRepository pull would succeed but the
+            # Kustomization apply would no-op silently).
+            if [[ ! -f "${WORK_DIR}/bundle/kustomization.yaml" ]]; then
+                log_error "Expected kustomization.yaml not found in bundle: ${WORK_DIR}/bundle"
+                log_error "Bundle contents:"
+                list_bundle_entries "${WORK_DIR}/bundle"
+                return 1
+            fi
+            ;;
+        *)
+            log_error "Unknown deployer: ${DEPLOYER}"
+            return 1
+            ;;
+    esac
+
+    log_info "Bundle generated at ${WORK_DIR}/bundle"
 
     log_debug "Bundle contents:"
-    ls -1 "${WORK_DIR}/bundle" | head -10
+    list_bundle_entries "${WORK_DIR}/bundle" | head -10
 }
 
-# Deploy bundle to cluster using the generated deploy.sh
+# Print top-level entries of a bundle directory, one per line, basename only.
+# Portable replacement for `ls -1 <dir>` (SC2012) that does not depend on
+# GNU find's `-printf` extension. Bash NULLGLOB is enabled locally so an
+# empty directory prints nothing instead of the literal pattern.
+#
+# SIGPIPE handling: callers commonly pipe this to `head -N`. When the bundle
+# has more than N entries, `head` closes stdin after reading N lines. The next
+# `printf` in this subshell then writes to a closed FD; bash forwards SIGPIPE
+# (exit 141), which under `set -euo pipefail` aborts the whole script even
+# though listing succeeded. Ignoring SIGPIPE in the subshell turns the write
+# into a plain EPIPE on `printf` (returns non-zero, but does not kill the
+# shell); breaking out of the loop on that first failed write produces the
+# same "listed up to N then stopped" behavior the caller intends, without
+# leaking a 141 exit through the pipeline. Observed bite: OKE argocd-helm-oci
+# CI cancellation on commit 263f4433 — bundle generated cleanly but `… | head
+# -10` raced the producer past the 10th line and SIGPIPE killed the run.
+list_bundle_entries() {
+    local dir="$1"
+    [[ -d "$dir" ]] || return 0
+    (
+        trap '' PIPE
+        shopt -s nullglob dotglob
+        local entry
+        for entry in "$dir"/*; do
+            printf '%s\n' "${entry##*/}" || break
+        done
+    )
+}
+
+# Like list_bundle_entries but excludes the bundle's generated deploy.sh.
+# Used by the helm deploy path to log component folder names.
+list_bundle_components() {
+    local dir="$1"
+    list_bundle_entries "$dir" | { grep -v '^deploy\.sh$' || true; }
+}
+
+# Deploy bundle to cluster.
+#
+# For DEPLOYER=helm: runs the bundle's generated deploy.sh (unchanged
+# pre-#843 behavior).
+#
+# For DEPLOYER=argocd-oci: kubectl apply -f <bundle>/app-of-apps.yaml, then
+# wait for every Argo CD Application in the argocd namespace to reach
+# Synced + (Healthy|Progressing).
+#
+# For DEPLOYER=argocd-helm-oci: helm upgrade --install the OCI chart into
+# the argocd namespace, then wait the same way.
 deploy_bundle() {
-    log_info "Deploying per-component bundle..."
+    log_info "Deploying per-component bundle (deployer=${DEPLOYER})..."
 
     local bundle_dir="${WORK_DIR}/bundle"
 
-    if [[ ! -f "${bundle_dir}/deploy.sh" ]]; then
-        log_error "deploy.sh not found in bundle directory: $bundle_dir"
-        log_error "Bundle generation may have failed"
-        return 1
-    fi
+    case "$DEPLOYER" in
+        helm)
+            if [[ ! -f "${bundle_dir}/deploy.sh" ]]; then
+                log_error "deploy.sh not found in bundle directory: $bundle_dir"
+                log_error "Bundle generation may have failed"
+                return 1
+            fi
 
-    log_debug "Bundle directory: $bundle_dir"
-    log_debug "Components in bundle:"
-    ls -1 "$bundle_dir" | grep -v "deploy.sh" | head -10
+            log_debug "Bundle directory: $bundle_dir"
+            log_debug "Components in bundle:"
+            list_bundle_components "$bundle_dir" | head -10
+
+            # Run the generated deploy script without --wait since KWOK clusters
+            # only validate scheduling, not pod readiness
+            chmod +x "${bundle_dir}/deploy.sh"
+            log_info "Running deploy.sh --no-wait..."
+
+            local deploy_output
+            if ! deploy_output=$("${bundle_dir}/deploy.sh" --no-wait 2>&1); then
+                log_error "Deploy script failed"
+                log_error "Last 50 lines of deploy output:"
+                echo "$deploy_output" | tail -50
+                return 1
+            fi
+            ;;
+        argocd-oci)
+            log_info "Applying app-of-apps manifest: ${bundle_dir}/app-of-apps.yaml"
+            if ! kubectl apply -f "${bundle_dir}/app-of-apps.yaml"; then
+                log_error "kubectl apply -f app-of-apps.yaml failed"
+                return 1
+            fi
+            # Preserve wait_for_argocd_sync's exit code (50 == sync timeout)
+            # so run-all-recipes.sh can apply the 3-strike rule.
+            local sync_rc=0
+            wait_for_argocd_sync || sync_rc=$?
+            if (( sync_rc != 0 )); then
+                return "$sync_rc"
+            fi
+            ;;
+        argocd-helm-oci)
+            if [[ -z "$OCI_REF" ]]; then
+                log_error "OCI_REF unset for argocd-helm-oci path (bundle step skipped?)"
+                return 1
+            fi
+            if [[ -z "$OCI_IN_CLUSTER_REF" ]]; then
+                log_error "OCI_IN_CLUSTER_REF unset for argocd-helm-oci path (bundle step skipped?)"
+                return 1
+            fi
+            # Helm OCI syntax: the chart reference must NOT include a ':<tag>'
+            # suffix. The tag is passed via --version. OCI_REF is shaped as
+            # oci://host/path:tag; split into ref (oci://host/path) + tag.
+            local helm_ref="${OCI_REF%:*}"
+            local helm_tag="${OCI_REF##*:}"
+            # repoURL + targetRevision are REQUIRED by the argocdhelm wrapper
+            # chart at install time (pkg/bundler/deployer/argocdhelm). The
+            # chart templates path-based child Applications (e.g.
+            # gpu-operator-post for manifest-only / mixed components) whose
+            # spec.source.repoURL must resolve to a remote Argo CD's
+            # repo-server can pull from. The wrapper enforces this with
+            # `{{ required }}` on .Values.repoURL — without these flags
+            # `helm install` fails template-render with
+            # `repoURL is required: pass --set repoURL=<published bundle URL>`.
+            #
+            # repoURL is the IN-CLUSTER URL (Service DNS) because Argo CD's
+            # repo-server runs inside the cluster; targetRevision is the
+            # chart's OCI tag, which by convention also names the chart
+            # version (loadAndRewriteChartYAML in pkg/oci/helm.go rewrites
+            # Chart.yaml.version to the OCI tag at push time).
+            log_info "helm upgrade --install ${RELEASE_NAME}-stack ${helm_ref} --version ${helm_tag} -n argocd --plain-http --set repoURL=${OCI_IN_CLUSTER_REF} --set targetRevision=${helm_tag}"
+            # --plain-http: in-cluster registry is HTTP-only; helm defaults to HTTPS.
+            if ! helm upgrade --install "${RELEASE_NAME}-stack" "$helm_ref" \
+                    --version "$helm_tag" \
+                    --namespace argocd \
+                    --plain-http \
+                    --set "repoURL=${OCI_IN_CLUSTER_REF}" \
+                    --set "targetRevision=${helm_tag}"; then
+                log_error "helm upgrade --install failed for ${OCI_REF}"
+                return 1
+            fi
+            # Preserve wait_for_argocd_sync's exit code (50 == sync timeout)
+            # so run-all-recipes.sh can apply the 3-strike rule.
+            local sync_rc=0
+            wait_for_argocd_sync || sync_rc=$?
+            if (( sync_rc != 0 )); then
+                return "$sync_rc"
+            fi
+            ;;
+        flux-oci)
+            if [[ -z "$OCI_IN_CLUSTER_REF" ]]; then
+                log_error "OCI_IN_CLUSTER_REF unset for flux-oci path (bundle step skipped?)"
+                return 1
+            fi
+            if [[ -z "$OCI_REF" ]]; then
+                log_error "OCI_REF unset for flux-oci path (bundle step skipped?)"
+                return 1
+            fi
+            # OCI_REF is "oci://host:port/path:tag"; the tag we need for the
+            # OCIRepository's `ref.tag` is the rightmost ':<segment>'. Split.
+            local oci_tag="${OCI_REF##*:}"
+
+            # Render OCIRepository + Kustomization wrappers.
+            #
+            # OCIRepository:
+            #   - spec.insecure: true — in-cluster registry is HTTP-only.
+            #   - spec.layerSelector.mediaType — AICR's generic OCI push
+            #     uses application/vnd.oci.image.layer.v1.tar+gzip (see
+            #     pkg/oci/push.go::artifactType for the manifest's
+            #     artifactType; source-controller doesn't filter on that
+            #     field but DOES need a layerSelector for non-Flux-CLI
+            #     artifacts per fluxcd/source-controller docs).
+            #   - spec.layerSelector.operation: extract — unpack the
+            #     layer into the source workspace.
+            #
+            # Kustomization:
+            #   - sourceRef → the OCIRepository above
+            #   - path: ./ — the bundle's root contains kustomization.yaml
+            #   - prune: true — let Flux GC removed resources between runs
+            #   - wait: false — KWOK can't drive Pod readiness for arbitrary
+            #     workloads, so `wait: true` would block the Kustomization's
+            #     Ready condition indefinitely. We assert terminal state via
+            #     HelmRelease conditions below instead.
+            #   - timeout: 5m — matches KWOK_FLUX_SYNC_TIMEOUT budget.
+            log_info "Applying Flux OCIRepository ${FLUX_OCIREPOSITORY_NAME} (ref=${oci_tag})..."
+            if ! kubectl apply -f - <<EOF
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: OCIRepository
+metadata:
+  name: ${FLUX_OCIREPOSITORY_NAME}
+  namespace: flux-system
+spec:
+  interval: 1m
+  insecure: true
+  url: ${OCI_IN_CLUSTER_REF}
+  ref:
+    tag: ${oci_tag}
+  layerSelector:
+    mediaType: application/vnd.oci.image.layer.v1.tar+gzip
+    operation: extract
+EOF
+            then
+                log_error "kubectl apply OCIRepository failed"
+                return 1
+            fi
 
-    # Run the generated deploy script without --wait since KWOK clusters
-    # only validate scheduling, not pod readiness
-    chmod +x "${bundle_dir}/deploy.sh"
-    log_info "Running deploy.sh --no-wait..."
+            log_info "Applying Flux Kustomization ${FLUX_KUSTOMIZATION_NAME}..."
+            if ! kubectl apply -f - <<EOF
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: ${FLUX_KUSTOMIZATION_NAME}
+  namespace: flux-system
+spec:
+  interval: 1m
+  prune: true
+  wait: false
+  timeout: 5m
+  sourceRef:
+    kind: OCIRepository
+    name: ${FLUX_OCIREPOSITORY_NAME}
+  path: ./
+EOF
+            then
+                log_error "kubectl apply Kustomization failed"
+                return 1
+            fi
 
-    local deploy_output
-    if ! deploy_output=$("${bundle_dir}/deploy.sh" --no-wait 2>&1); then
-        log_error "Deploy script failed"
-        log_error "Last 50 lines of deploy output:"
-        echo "$deploy_output" | tail -50
-        return 1
-    fi
+            # Preserve wait_for_flux_sync's exit code (50 == sync timeout)
+            # so run-all-recipes.sh can apply the 3-strike rule.
+            local sync_rc=0
+            wait_for_flux_sync || sync_rc=$?
+            if (( sync_rc != 0 )); then
+                return "$sync_rc"
+            fi
+            ;;
+        *)
+            log_error "Unknown deployer: ${DEPLOYER}"
+            return 1
+            ;;
+    esac
 
     # Brief wait for scheduler to place pods
     log_info "Waiting for pods to be scheduled..."
@@ -459,14 +1123,519 @@ deploy_bundle() {
     log_info "Bundle deployed successfully"
 }
 
+# Assert the deployer's root Argo CD Application exists within
+# KWOK_ARGOCD_ROOT_GRACE seconds (default 30). Without this gate the script
+# treats "kubectl apply / helm install silently produced no Application"
+# (webhook reject, CRD scope mismatch, RBAC denial) as a slow controller
+# and burns the full KWOK_ARGOCD_SYNC_TIMEOUT before failing with a
+# misleading "deadline hit" message.
+#
+# On failure, dumps recent events in the argocd namespace and returns 1.
+wait_for_argocd_root_app() {
+    if [[ -z "$ARGOCD_ROOT_APP" ]]; then
+        log_error "ARGOCD_ROOT_APP is empty (DEPLOYER=${DEPLOYER})"
+        return 1
+    fi
+
+    local grace="${KWOK_ARGOCD_ROOT_GRACE:-30}"
+    local start=$SECONDS
+
+    log_info "Waiting for root Application ${ARGOCD_ROOT_APP} to appear (grace ${grace}s)..."
+
+    while (( SECONDS - start < grace )); do
+        if kubectl get application "$ARGOCD_ROOT_APP" -n argocd \
+                >/dev/null 2>&1; then
+            log_info "Root Application ${ARGOCD_ROOT_APP} present"
+            return 0
+        fi
+        sleep 2
+    done
+
+    log_error "Root Application ${ARGOCD_ROOT_APP} not present after ${grace}s"
+    log_error "kubectl apply / helm install for DEPLOYER=${DEPLOYER} produced no Application resource."
+    log_error "--- argocd namespace events (last 20) ---"
+    kubectl get events -n argocd --sort-by=.lastTimestamp 2>&1 | tail -20 || true
+    return 1
+}
+
+# Wait until every Argo CD Application in the argocd namespace reaches a
+# terminal pass state. The pass set is intentionally broader than just
+# "Synced + Healthy" so KWOK simulation gaps and the real-world
+# operator-mutation-after-sync pattern do not mask a healthy deployment:
+#
+#   1. Synced + Healthy ........................ canonical pass
+#   2. Synced + Progressing .................... KWOK fake pods often sit in
+#                                                Progressing because the
+#                                                health controller wants a
+#                                                deeper readiness signal
+#                                                that stage-fast cannot
+#                                                simulate (DaemonSet pods,
+#                                                Prometheus StatefulSet
+#                                                under the operator,
+#                                                etc.); accepted per
+#                                                ADR-008.
+#   3. OutOfSync + Healthy + last op succeeded . Operator-mutation drift:
+#                                                charts like gpu-operator
+#                                                and nvidia-dra-driver-gpu
+#                                                ship CRs that their own
+#                                                controller updates post-
+#                                                sync. Argo CD initiates
+#                                                the sync, the operation
+#                                                completes ("successfully
+#                                                synced (all tasks run)"),
+#                                                resources are Healthy,
+#                                                then drift reappears as
+#                                                the operator reconciles.
+#                                                In production these are
+#                                                handled with per-App
+#                                                ignoreDifferences; for
+#                                                KWOK we accept the
+#                                                Healthy-post-success
+#                                                state as a real pass and
+#                                                rely on operationState
+#                                                .phase=Succeeded to
+#                                                distinguish it from a
+#                                                still-broken OutOfSync.
+#   4. Synced + Degraded + last op succeeded ... Chart applied but health
+#                                                controller marks at
+#                                                least one managed
+#                                                resource Degraded —
+#                                                typically a Pod whose
+#                                                Ready signal KWOK cannot
+#                                                produce (leader election
+#                                                like kai-scheduler,
+#                                                charts with strict probe
+#                                                semantics) or a Deployment
+#                                                that cannot reach
+#                                                desired replicas under
+#                                                stage-fast. The sync
+#                                                operation succeeded — the
+#                                                bundle was applied. The
+#                                                health gradient is the
+#                                                chart's runtime contract
+#                                                + KWOK fidelity, not a
+#                                                bundle defect. Same
+#                                                operationState.phase=
+#                                                Succeeded guard as #3
+#                                                prevents masking a
+#                                                still-broken sync.
+#
+# Returns 0 on success. On deadline hit OR on a `kubectl get applications`
+# hard error (CRD missing, RBAC denied, apiserver unreachable), calls
+# dump_argocd_failures and returns 1.
+#
+# Guards (issue #843 follow-up review):
+#
+#   MUST-FIX 1 — Asserts the deployer's root Application exists (call site
+#   pre-checks via wait_for_argocd_root_app). Then derives the expected
+#   set of child Applications from the root's status.resources so a
+#   root-only "Synced + Healthy" state cannot masquerade as a true pass
+#   (the chart wrapping in argocd-helm-oci can land just the root
+#   Application without children if the OCI pull silently degrades).
+#
+#   MUST-FIX 2 — Distinguishes a `kubectl get applications` non-zero exit
+#   (CRD missing, permission denied, apiserver unreachable) from a
+#   well-formed empty list. A hard error fails the loop immediately
+#   instead of spinning to the deadline.
+#
+# Deadline: KWOK_ARGOCD_SYNC_TIMEOUT env var (default 300s).
+wait_for_argocd_sync() {
+    if ! wait_for_argocd_root_app; then
+        dump_argocd_failures
+        return 1
+    fi
+
+    local deadline="${KWOK_ARGOCD_SYNC_TIMEOUT:-300}"
+    local start=$SECONDS
+
+    log_info "Waiting for Argo CD Applications to sync (deadline ${deadline}s)..."
+
+    while (( SECONDS - start < deadline )); do
+        local apps_json apps_rc total pending root_sync root_health
+        # MUST-FIX 2: distinguish a hard error from an empty list. We
+        # cannot use command substitution + `|| echo '{"items":[]}'`
+        # because that swallows the exit code.
+        #
+        # Subtle bash semantics: a bare `apps_json=$(kubectl ...)`
+        # assignment under `set -euo pipefail` (line 72) DOES trip
+        # set -e when the substituted command exits non-zero — bash
+        # exits before the next line's `apps_rc=$?` can capture the
+        # status. Wrap in an `if cmd; then ok; else capture; fi`
+        # block: command-substitution failures inside an `if`
+        # condition are exempt from set -e (per the manual), so we
+        # get the exit code into `apps_rc` safely.
+        if apps_json=$(kubectl get applications -n argocd -o json 2>/dev/null); then
+            apps_rc=0
+        else
+            apps_rc=$?
+        fi
+        if (( apps_rc != 0 )); then
+            log_error "kubectl get applications -n argocd failed (rc=${apps_rc})"
+            log_error "Likely cause: applications.argoproj.io CRD missing, RBAC denied, or apiserver unreachable."
+            log_error "--- kubectl get applications stderr ---"
+            kubectl get applications -n argocd 2>&1 | tail -20 || true
+            dump_argocd_failures
+            return 1
+        fi
+
+        total=$(echo "$apps_json" | jq '.items | length')
+
+        # MUST-FIX 1: root must be Synced+Healthy AND its expected
+        # children (status.resources[].name) must all be reconciled. We
+        # use status.resources because it self-adjusts per recipe — no
+        # static min-count env var to maintain.
+        root_sync=$(echo "$apps_json" \
+            | jq -r --arg n "$ARGOCD_ROOT_APP" \
+                '.items[] | select(.metadata.name == $n) | .status.sync.status // "Unknown"')
+        root_health=$(echo "$apps_json" \
+            | jq -r --arg n "$ARGOCD_ROOT_APP" \
+                '.items[] | select(.metadata.name == $n) | .status.health.status // "Unknown"')
+
+        if [[ "$root_sync" != "Synced" || \
+                ( "$root_health" != "Healthy" && "$root_health" != "Progressing" ) ]]; then
+            log_debug "Root ${ARGOCD_ROOT_APP} not ready yet (sync=${root_sync} health=${root_health}; total=${total})..."
+            sleep 5
+            continue
+        fi
+
+        # Children expected (per the root's status.resources). For the
+        # argocd-oci app-of-apps shape, status.resources lists the child
+        # Applications by name. For argocd-helm-oci the wrapper App
+        # references chart subresources — fall back to "any non-root
+        # Application is a child" if status.resources is empty.
+        local expected_children
+        expected_children=$(echo "$apps_json" \
+            | jq -r --arg n "$ARGOCD_ROOT_APP" '
+                .items[]
+                | select(.metadata.name == $n)
+                | (.status.resources // [])
+                | map(select(.kind == "Application"))
+                | length')
+
+        # Documented fallback: when status.resources does not list any
+        # Application children but other Applications exist in the
+        # namespace, treat the count of non-root Applications as the
+        # observed-children count. The argocd-helm-oci wrapper's
+        # status.resources can take a moment to populate while the
+        # children it created are already reconciling; without this
+        # fallback the loop sleeps to the deadline even when every
+        # child is Synced+Healthy.
+        if [[ "$expected_children" -eq 0 ]]; then
+            local observed_children
+            observed_children=$(echo "$apps_json" \
+                | jq -r --arg n "$ARGOCD_ROOT_APP" \
+                    '[.items[] | select(.metadata.name != $n)] | length')
+            if [[ "$observed_children" -gt 0 ]]; then
+                expected_children="$observed_children"
+            fi
+        fi
+
+        if [[ "$expected_children" -eq 0 ]]; then
+            # No children declared by the root yet. If we're the only
+            # Application in the namespace and the controller has been
+            # given a chance to reconcile, treat as root-only success
+            # only when status.resources has resolved (controller has
+            # observed the source). Otherwise keep waiting.
+            local resources_populated
+            resources_populated=$(echo "$apps_json" \
+                | jq --arg n "$ARGOCD_ROOT_APP" '
+                    .items[]
+                    | select(.metadata.name == $n)
+                    | (.status.resources // []) | length > 0')
+            if [[ "$total" -eq 1 && "$resources_populated" == "true" ]]; then
+                log_warn "Root Application ${ARGOCD_ROOT_APP} reconciled with zero child Applications"
+                log_warn "Bundle may have been wrapped-only (no expansion). total=${total}"
+                # Surfacing as success would mask the bug from MUST-FIX 1.
+                # Fail closed: a child-less root is never a real PASS for
+                # the deployer matrix (every recipe ships >= 1 component).
+                dump_argocd_failures
+                return 1
+            fi
+            log_debug "Root Application present but children not yet declared (total=${total})..."
+            sleep 5
+            continue
+        fi
+
+        # Pending = NOT one of the four terminal-pass states described
+        # in the function docstring. Arms 3 & 4 are the load-bearing
+        # ones for KWOK: they distinguish "bundle applied; chart-side
+        # runtime gap" (pass) from "bundle never applied" (fail) via
+        # operationState.phase=Succeeded.
+        pending=$(echo "$apps_json" \
+            | jq '[.items[]
+                  | . as $app
+                  | select(
+                      # NOT (Synced + Healthy)
+                      ($app.status.sync.status != "Synced"
+                       or $app.status.health.status != "Healthy")
+                      # AND NOT (Synced + Progressing)
+                      and ($app.status.sync.status != "Synced"
+                           or $app.status.health.status != "Progressing")
+                      # AND NOT (OutOfSync + Healthy + last op Succeeded)
+                      and ($app.status.sync.status != "OutOfSync"
+                           or $app.status.health.status != "Healthy"
+                           or ($app.status.operationState.phase // "") != "Succeeded")
+                      # AND NOT (Synced + Degraded + last op Succeeded)
+                      and ($app.status.sync.status != "Synced"
+                           or $app.status.health.status != "Degraded"
+                           or ($app.status.operationState.phase // "") != "Succeeded")
+                    )]
+                  | length')
+        if [[ "$pending" == "0" ]]; then
+            # MUST-FIX 1: log observed counts so a low total is auditable.
+            log_info "Argo CD sync PASS: ${total} Applications reached terminal pass state (root=${ARGOCD_ROOT_APP}, expected_children=${expected_children})"
+            return 0
+        fi
+        log_debug "Argo CD sync progress: ${pending}/${total} Applications still pending (expected_children=${expected_children})..."
+        sleep 5
+    done
+
+    log_error "Argo CD sync deadline (${deadline}s) hit"
+    dump_argocd_failures
+    # Distinct rc so run-all-recipes.sh can apply the 3-strike rule without
+    # parsing stderr. Other failure paths in this function still return 1.
+    return "$EXIT_ARGOCD_SYNC_TIMEOUT"
+}
+
+# Dump diagnostics on Argo CD sync failure: per-Application status fields
+# plus repo-server and application-controller log tails.
+dump_argocd_failures() {
+    log_error "--- Argo CD Applications (name / sync / health / conditions / op.message) ---"
+    kubectl get applications -n argocd -o json 2>/dev/null \
+        | jq -r '.items[] | {
+            name: .metadata.name,
+            sync: .status.sync.status,
+            health: .status.health.status,
+            conditions: .status.conditions,
+            operation: .status.operationState.message
+          }' \
+        2>&1 || true
+    log_error "--- argocd-repo-server (tail=200) ---"
+    kubectl logs -n argocd deploy/argocd-repo-server --tail=200 2>&1 || true
+    log_error "--- argocd-application-controller (tail=200) ---"
+    # Argo CD chart 9.x ships application-controller as a StatefulSet. Try
+    # StatefulSet first to avoid emitting a misleading "Deployment not
+    # found" error into the diagnostic dump on the common case; fall back
+    # to Deployment for older chart versions.
+    kubectl logs -n argocd statefulset/argocd-application-controller --tail=200 2>&1 \
+        || kubectl logs -n argocd deploy/argocd-application-controller --tail=200 2>&1 \
+        || true
+}
+
+# Assert the outer Flux Kustomization exists within KWOK_FLUX_ROOT_GRACE
+# seconds (default 30). Mirrors wait_for_argocd_root_app: a silently-missing
+# Kustomization (RBAC denial, CRD scope mismatch, webhook reject) gets
+# surfaced fast instead of burning the KWOK_FLUX_SYNC_TIMEOUT budget.
+wait_for_flux_root_app() {
+    if [[ -z "$FLUX_KUSTOMIZATION_NAME" ]]; then
+        log_error "FLUX_KUSTOMIZATION_NAME is empty (DEPLOYER=${DEPLOYER})"
+        return 1
+    fi
+
+    local grace="${KWOK_FLUX_ROOT_GRACE:-30}"
+    local start=$SECONDS
+
+    log_info "Waiting for Flux Kustomization ${FLUX_KUSTOMIZATION_NAME} to appear (grace ${grace}s)..."
+
+    while (( SECONDS - start < grace )); do
+        if kubectl get kustomization "$FLUX_KUSTOMIZATION_NAME" -n flux-system \
+                >/dev/null 2>&1; then
+            log_info "Kustomization ${FLUX_KUSTOMIZATION_NAME} present"
+            return 0
+        fi
+        sleep 2
+    done
+
+    log_error "Kustomization ${FLUX_KUSTOMIZATION_NAME} not present after ${grace}s"
+    log_error "kubectl apply for DEPLOYER=${DEPLOYER} produced no Kustomization resource."
+    log_error "--- flux-system namespace events (last 20) ---"
+    kubectl get events -n flux-system --sort-by=.lastTimestamp 2>&1 | tail -20 || true
+    return 1
+}
+
+# Validate the flux-oci bundle was successfully APPLIED. Intentionally
+# narrower than wait_for_argocd_sync: we do NOT wait for HelmReleases to
+# reach a terminal Ready/Released state, because the bundler's flux deployer
+# currently emits local-chart HelmReleases with sourceRef.kind=GitRepository
+# pointing at the placeholder URL `https://github.com/YOUR_ORG/YOUR_REPO.git`.
+# Under OCI consumption helm-controller cannot fetch these charts, so those
+# HelmReleases stall — AND any HelmRelease downstream in the Flux dependsOn
+# chain stays "dependency not ready", cascading the stall through the entire
+# bundle.
+#
+# Flux's HelmRelease v2 API blocks the obvious fix: spec.chart.spec.sourceRef
+# rejects OCIRepository ("supported values: HelmRepository, GitRepository,
+# Bucket"), and spec.chartRef points at the whole OCI artifact with no path
+# support — so a single wrapper OCIRepository cannot address inner local
+# charts. Proper fixes (render local-chart manifests at bundle time OR push
+# each local chart as its own helm OCI artifact) are out of scope here and
+# tracked in a follow-up issue referenced from the PR description.
+#
+# What this lane DOES validate (the bundle-format regression surface this
+# PR is meant to catch):
+#   1. Flux source-controller could PULL the AICR bundle artifact (the layer
+#      mediaType + insecure HTTP wiring is correct).
+#   2. Flux kustomize-controller could PARSE the bundle's root
+#      kustomization.yaml and APPLY all manifests (no template-render bugs,
+#      no missing files, no schema violations).
+#   3. The applied resources include per-component HelmRelease CRs (the
+#      flux deployer produced the expected GitOps shape).
+#
+# What this lane does NOT validate (deferred):
+#   - HelmRelease reconciliation to Ready / Released
+#   - Helm install success
+#   - Pod-Running for the workloads the bundle declares
+#
+# Returns 0 on success, EXIT_ARGOCD_SYNC_TIMEOUT (50) on deadline (reused so
+# run-all-recipes.sh's 3-strike rule fires symmetrically across argocd-* and
+# flux-oci), and 1 on hard errors (CRD missing, RBAC denial, apiserver
+# unreachable).
+wait_for_flux_sync() {
+    local deadline="${KWOK_FLUX_SYNC_TIMEOUT:-300}"
+    local start=$SECONDS
+
+    if ! wait_for_flux_root_app; then
+        return 1
+    fi
+
+    log_info "Waiting for Flux Kustomization to apply the bundle (deadline ${deadline}s)..."
+
+    local kust_json oci_json hr_json kust_ready oci_ready total
+    local applied_rev
+    local apps_rc
+
+    while (( SECONDS - start < deadline )); do
+        # 1. OCIRepository must reach Ready=True — proves source-controller
+        # could PULL the bundle artifact (mediaType, insecure-HTTP, layer
+        # selector all wired correctly). This is the artifact-shape
+        # regression surface argocd-oci catches via repo-server; flux-oci
+        # catches the same class of bug via source-controller.
+        if oci_json=$(kubectl get ocirepository "$FLUX_OCIREPOSITORY_NAME" \
+                -n flux-system -o json 2>/dev/null); then
+            apps_rc=0
+        else
+            apps_rc=$?
+        fi
+        if (( apps_rc != 0 )); then
+            log_error "kubectl get ocirepository failed (rc=${apps_rc})"
+            log_error "Likely cause: OCIRepository CRD missing, RBAC denied, or apiserver unreachable."
+            dump_flux_failures
+            return 1
+        fi
+        oci_ready=$(echo "$oci_json" \
+            | jq -r '.status.conditions[]? | select(.type == "Ready") | .status // "Unknown"')
+        if [[ "$oci_ready" != "True" ]]; then
+            log_debug "OCIRepository ${FLUX_OCIREPOSITORY_NAME} not Ready yet (status=${oci_ready:-Unknown})..."
+            sleep 5
+            continue
+        fi
+
+        # 2. Kustomization must reach Ready=True — proves kustomize-controller
+        # could PARSE the bundle's root kustomization.yaml and APPLY all
+        # manifests inside (no template-render bugs, no missing files, no
+        # schema violations).
+        if kust_json=$(kubectl get kustomization "$FLUX_KUSTOMIZATION_NAME" \
+                -n flux-system -o json 2>/dev/null); then
+            apps_rc=0
+        else
+            apps_rc=$?
+        fi
+        if (( apps_rc != 0 )); then
+            log_error "kubectl get kustomization failed (rc=${apps_rc})"
+            log_error "Likely cause: Kustomization CRD missing, RBAC denied, or apiserver unreachable."
+            dump_flux_failures
+            return 1
+        fi
+        kust_ready=$(echo "$kust_json" \
+            | jq -r '.status.conditions[]? | select(.type == "Ready") | .status // "Unknown"')
+        if [[ "$kust_ready" != "True" ]]; then
+            log_debug "Kustomization ${FLUX_KUSTOMIZATION_NAME} not Ready yet (status=${kust_ready:-Unknown})..."
+            sleep 5
+            continue
+        fi
+        applied_rev=$(echo "$kust_json" \
+            | jq -r '.status.lastAppliedRevision // ""')
+
+        # 3. Per-component HelmRelease CRs must exist (the bundle declared
+        # the expected GitOps shape). We do NOT wait for them to reach
+        # terminal state — see function docstring for the dependsOn-cascade
+        # rationale.
+        if hr_json=$(kubectl get helmrelease -A -o json 2>/dev/null); then
+            apps_rc=0
+        else
+            apps_rc=$?
+        fi
+        if (( apps_rc != 0 )); then
+            log_error "kubectl get helmrelease -A failed (rc=${apps_rc})"
+            dump_flux_failures
+            return 1
+        fi
+        total=$(echo "$hr_json" | jq '.items | length')
+        if [[ "$total" -eq 0 ]]; then
+            log_debug "Kustomization Ready but no HelmReleases yet — waiting for resources to appear..."
+            sleep 5
+            continue
+        fi
+
+        log_info "Flux bundle-apply PASS: OCIRepository pulled (rev=$(echo "$oci_json" | jq -r '.status.artifact.revision // "unknown"')), Kustomization applied (rev=${applied_rev}), ${total} HelmReleases created"
+        log_info "(flux-oci validates bundle-format application only; HelmRelease reconciliation is deferred — see follow-up issue)"
+        return 0
+    done
+
+    log_error "Flux bundle-apply deadline (${deadline}s) hit"
+    dump_flux_failures
+    # Reuse EXIT_ARGOCD_SYNC_TIMEOUT so run-all-recipes.sh's 3-strike rule
+    # treats GitOps sync deadlines symmetrically across argocd-* and flux-oci.
+    return "$EXIT_ARGOCD_SYNC_TIMEOUT"
+}
+
+# Dump diagnostics on Flux sync failure: outer Kustomization status, all
+# OCIRepository + HelmRelease statuses, plus controller log tails.
+dump_flux_failures() {
+    log_error "--- Flux Kustomization ${FLUX_KUSTOMIZATION_NAME} ---"
+    kubectl get kustomization "$FLUX_KUSTOMIZATION_NAME" -n flux-system \
+        -o json 2>/dev/null \
+        | jq -r '{
+            ready: ((.status.conditions // []) | map(select(.type == "Ready")) | .[0]),
+            lastAppliedRevision: .status.lastAppliedRevision,
+            inventory: ((.status.inventory.entries // []) | length)
+          }' 2>&1 || true
+    log_error "--- Flux OCIRepositories ---"
+    kubectl get ocirepository -A -o json 2>/dev/null \
+        | jq -r '.items[] | {
+            namespace: .metadata.namespace,
+            name: .metadata.name,
+            ready: ((.status.conditions // []) | map(select(.type == "Ready")) | .[0]),
+            artifact: .status.artifact.revision
+          }' 2>&1 || true
+    log_error "--- Flux HelmReleases (name / ready / released / stalled / hist[0]) ---"
+    kubectl get helmrelease -A -o json 2>/dev/null \
+        | jq -r '.items[] | {
+            namespace: .metadata.namespace,
+            name: .metadata.name,
+            ready: ((.status.conditions // []) | map(select(.type == "Ready")) | .[0].status),
+            released: ((.status.conditions // []) | map(select(.type == "Released")) | .[0].status),
+            stalled: ((.status.conditions // []) | map(select(.type == "Stalled")) | .[0].status),
+            history: (.status.history // [])[0]
+          }' 2>&1 || true
+    log_error "--- source-controller (tail=200) ---"
+    kubectl logs -n flux-system deploy/source-controller --tail=200 2>&1 || true
+    log_error "--- kustomize-controller (tail=200) ---"
+    kubectl logs -n flux-system deploy/kustomize-controller --tail=200 2>&1 || true
+    log_error "--- helm-controller (tail=200) ---"
+    kubectl logs -n flux-system deploy/helm-controller --tail=200 2>&1 || true
+}
+
 # Verify pod scheduling
 verify_pods() {
     log_info "Verifying pod scheduling..."
 
     # Get pod status across all component namespaces (per-component deployment)
-    # Exclude system namespaces that aren't part of our bundle
+    # Exclude system namespaces that aren't part of our bundle (control-plane
+    # infra plus GitOps controller namespaces — argocd/flux-system/aicr-
+    # registry are owned by install-infra.sh, not the bundle under test).
     local ns_filter="--all-namespaces"
-    local exclude_ns="kube-system|kube-node-lease|kube-public|local-path-storage|kwok-system"
+    local exclude_ns="kube-system|kube-node-lease|kube-public|local-path-storage|kwok-system|argocd|flux-system|aicr-registry"
 
     local total_pods pending_pods failed_pods running_pods unscheduled_pods
     total_pods=$(kubectl get pods ${ns_filter} --no-headers 2>/dev/null | { grep -vE "^(${exclude_ns})\s" || true; } | wc -l | tr -d ' ')
@@ -485,10 +1654,18 @@ verify_pods() {
 
     log_info "Pod status: $total_pods total, $running_pods running, $pending_pods pending ($unscheduled_pods unscheduled), $failed_pods failed"
 
-    # Show pod distribution across nodes
+    # Show pod distribution across nodes.
+    # grep -vE returns exit 1 when no lines match (e.g. all pods are in
+    # excluded namespaces — happens on the flux-oci lane where every pod
+    # the controller has placed so far lives in flux-system / kube-system).
+    # Under `set -euo pipefail` that exit 1 propagates as the pipeline's
+    # exit code and kills the script silently AFTER Pod-status logging,
+    # masquerading as a pass-then-fail. Wrap with `{ ... || true; }` so
+    # an empty result is a no-op, matching the total_pods / unscheduled_pods
+    # extraction above.
     log_info "Pod distribution:"
     kubectl get pods --all-namespaces -o wide --no-headers 2>/dev/null | \
-        grep -vE "^(${exclude_ns})\s" | \
+        { grep -vE "^(${exclude_ns})\s" || true; } | \
         awk '{print $8}' | sort | uniq -c | \
         while read -r count node; do
             echo "  $node: $count pods"
@@ -541,6 +1718,24 @@ verify_pods() {
     return 0
 }
 
+# Print usage to stderr and exit non-zero.
+usage() {
+    cat >&2 <<'EOF'
+Usage: validate-scheduling.sh [--keep-namespace] [--deployer <name>] <recipe-name>
+
+Flags:
+  --keep-namespace        Preserve releases/namespaces after the run.
+  --deployer <name>       One of: helm (default), argocd-oci, argocd-helm-oci.
+
+Examples:
+  validate-scheduling.sh h100-eks-ubuntu-training-kubeflow
+  validate-scheduling.sh --keep-namespace eks-training
+  validate-scheduling.sh --deployer argocd-oci eks-training
+  validate-scheduling.sh --deployer argocd-helm-oci eks-training
+EOF
+    exit 1
+}
+
 # Main
 main() {
     local recipe=""
@@ -552,12 +1747,21 @@ main() {
                 KEEP_NAMESPACE=true
                 shift
                 ;;
+            --deployer)
+                if [[ $# -lt 2 ]]; then
+                    log_error "--deployer requires a value"
+                    usage
+                fi
+                DEPLOYER="$2"
+                shift 2
+                ;;
+            --deployer=*)
+                DEPLOYER="${1#--deployer=}"
+                shift
+                ;;
             -*)
-                echo "Unknown option: $1"
-                echo "Usage: $0 [--keep-namespace] <recipe-name>"
-                echo "Example: $0 h100-eks-ubuntu-training-kubeflow"
-                echo "         $0 --keep-namespace eks-training"
-                exit 1
+                log_error "Unknown option: $1"
+                usage
                 ;;
             *)
                 recipe="$1"
@@ -567,17 +1771,39 @@ main() {
     done
 
     if [[ -z "$recipe" ]]; then
-        echo "Usage: $0 [--keep-namespace] <recipe-name>"
-        echo "Example: $0 h100-eks-ubuntu-training-kubeflow"
-        echo "         $0 --keep-namespace eks-training"
-        exit 1
+        usage
     fi
 
+    case "$DEPLOYER" in
+        helm|argocd-oci|argocd-helm-oci|flux-oci)
+            ;;
+        *)
+            log_error "Invalid --deployer value: '${DEPLOYER}'"
+            log_error "Must be one of: helm, argocd-oci, argocd-helm-oci, flux-oci"
+            exit 1
+            ;;
+    esac
+
+    # Must run before cleanup trap so the trap sees the resolved root name.
+    resolve_argocd_root_app
+    resolve_flux_root_names "$recipe"
+
+    # Safety: refuse to set up the cleanup trap (which force-finalizes
+    # namespaces under KWOK) unless the current kubectl context is a
+    # known KWOK Kind cluster with kwok-typed nodes. A misconfigured
+    # KUBECONFIG must NOT silently strip finalizers from whatever
+    # cluster the user happens to be authenticated to. apply-nodes.sh
+    # has already populated the cluster by this point (the script is
+    # invoked from run-all-recipes.sh::run_recipe_test after apply-
+    # nodes.sh), so the node-label check is safe here.
+    ensure_kwok_context
+
     # Set up cleanup trap
     trap cleanup EXIT
 
     log_info "=========================================="
     log_info "Starting validation for recipe: $recipe"
+    log_info "Deployer: $DEPLOYER"
     log_info "=========================================="
 
     log_debug "Step 1: Checking dependencies..."
@@ -605,7 +1831,7 @@ main() {
     verify_pods
 
     log_info "=========================================="
-    log_info "✓ Validation PASSED for recipe: $recipe"
+    log_info "✓ Validation PASSED for recipe: $recipe (deployer=$DEPLOYER)"
     log_info "=========================================="
 }
 
diff --git a/pkg/bundler/bundler.go b/pkg/bundler/bundler.go
index f775d3749..14ac2db11 100644
--- a/pkg/bundler/bundler.go
+++ b/pkg/bundler/bundler.go
@@ -390,6 +390,10 @@ func (b *DefaultBundler) buildDeployer(ctx context.Context, recipeResult *recipe
 			ComponentPreManifests:  componentPreManifests,
 			ComponentPostManifests: componentPostManifests,
 			VendorCharts:           b.Config.VendorCharts(),
+			// Inline values when the bundle repo is OCI: Argo CD's $values
+			// multi-source ref is Git-only (see #960), so an OCI repoURL
+			// must use single-source with helm.valuesObject embedded.
+			InlineUpstreamValues: strings.HasPrefix(b.Config.RepoURL(), "oci://"),
 		}, nil
 
 	case config.DeployerHelm:
diff --git a/pkg/bundler/deployer/argocd/argocd.go b/pkg/bundler/deployer/argocd/argocd.go
index 47531740c..b2057cf5f 100644
--- a/pkg/bundler/deployer/argocd/argocd.go
+++ b/pkg/bundler/deployer/argocd/argocd.go
@@ -54,6 +54,7 @@ import (
 	"github.com/NVIDIA/aicr/pkg/bundler/deployer/localformat"
 	"github.com/NVIDIA/aicr/pkg/errors"
 	"github.com/NVIDIA/aicr/pkg/recipe"
+	"github.com/NVIDIA/aicr/pkg/serializer"
 )
 
 //go:embed templates/application.yaml.tmpl
@@ -74,6 +75,12 @@ var readmeTemplate string
 // pure-Helm KindUpstreamHelm folders). BundleDir carries the NNN-<name>
 // directory name for both kinds: it is the chart path for KindLocalHelm and
 // the values $ref path for KindUpstreamHelm.
+//
+// InlineValues drives the Application shape for KindUpstreamHelm when the
+// bundle repo is OCI: instead of the multi-source $values pattern (Git-only
+// in Argo CD), the Application is rendered single-source with helm.valuesObject
+// inlined from ValuesYAML. See InlineUpstreamValues on Generator for the
+// reason.
 type ApplicationData struct {
 	Name           string
 	Namespace      string
@@ -85,6 +92,8 @@ type ApplicationData struct {
 	TargetRevision string // Target revision for the user's repo
 	BundleDir      string // NNN-<name> directory inside the bundle
 	IsLocalChart   bool   // true → path-based single-source; false → multi-source upstream-helm
+	InlineValues   bool   // KindUpstreamHelm + OCI → single-source with helm.valuesObject inlined
+	ValuesYAML     string // Pre-indented YAML (8 spaces) for helm.valuesObject; used when InlineValues
 }
 
 // AppOfAppsData contains data for rendering the App of Apps manifest.
@@ -171,6 +180,19 @@ type Generator struct {
 	// vendoring shape.
 	VendorCharts bool
 
+	// InlineUpstreamValues replaces the multi-source $values pattern for
+	// KindUpstreamHelm Applications with a single source whose helm.valuesObject
+	// is inlined from ComponentValues. Required when RepoURL is OCI because
+	// Argo CD's $values multi-source ref is Git-only — the repo-server
+	// attempts a `git ListRefs` against the OCI URL and fails with
+	// `unsupported scheme "oci"`. Off by default to preserve the existing
+	// multi-source shape for Git-backed deployments (operators retain
+	// per-component values.yaml edit ergonomics). Wired by the bundler
+	// when --deployer argocd targets oci://. Should remain off for
+	// argocdhelm's inner Generator — that path relies on the multi-source
+	// shape to transform it into a Helm template with dynamic merging.
+	InlineUpstreamValues bool
+
 	// vendorRecords is populated by Generate when VendorCharts is on.
 	// Captured here so VendorRecords() can expose it to callers
 	// (currently argocdhelm) that need to write provenance.yaml without
@@ -371,7 +393,15 @@ func (g *Generator) Generate(ctx context.Context, outputDir string) (*deployer.O
 				fmt.Sprintf("localformat returned folder for unknown component %q", f.Parent))
 		}
 
-		appData := buildApplicationData(*comp, f, i, repoURL, targetRevision)
+		var inlineValues map[string]any
+		if g.InlineUpstreamValues && f.Kind == localformat.KindUpstreamHelm {
+			inlineValues = g.ComponentValues[comp.Name]
+		}
+
+		appData, err := buildApplicationData(*comp, f, i, repoURL, targetRevision, inlineValues, g.InlineUpstreamValues)
+		if err != nil {
+			return nil, err
+		}
 		appDataList = append(appDataList, appData)
 
 		folderDir, joinErr := deployer.SafeJoin(outputDir, f.Dir)
@@ -507,11 +537,17 @@ func findComponentRef(refs []recipe.ComponentRef, parent string) *recipe.Compone
 }
 
 // buildApplicationData constructs ApplicationData for a single folder. The
-// FolderKind drives the Application shape — KindLocalHelm sets LocalChartPath
+// FolderKind drives the Application shape — KindLocalHelm sets IsLocalChart
 // (path-based single-source); KindUpstreamHelm leaves it empty (multi-source
 // upstream-helm). The folder's name is used as the Application name to keep
 // primary and injected -post folders distinct in Argo.
-func buildApplicationData(comp recipe.ComponentRef, f localformat.Folder, syncWave int, repoURL, targetRevision string) ApplicationData {
+//
+// When inline is true and the folder is KindUpstreamHelm, values are
+// marshaled to deterministic YAML and indented 8 spaces for embedding under
+// helm.valuesObject. The result is a single-source Application that avoids
+// Argo CD's Git-only $values multi-source ref. Errors from value marshaling
+// surface as ErrCodeInternal.
+func buildApplicationData(comp recipe.ComponentRef, f localformat.Folder, syncWave int, repoURL, targetRevision string, values map[string]any, inline bool) (ApplicationData, error) {
 	chart := comp.Chart
 	if chart == "" {
 		chart = comp.Name
@@ -548,6 +584,47 @@ func buildApplicationData(comp recipe.ComponentRef, f localformat.Folder, syncWa
 			data.Version = deployer.NormalizeVersion(comp.Version)
 		}
 		data.Chart = chart
+
+		if inline {
+			data.InlineValues = true
+			yamlStr, err := renderInlineValuesYAML(values)
+			if err != nil {
+				return ApplicationData{}, errors.PropagateOrWrap(err, errors.ErrCodeInternal,
+					fmt.Sprintf("failed to render inline values for %s", comp.Name))
+			}
+			data.ValuesYAML = yamlStr
+		}
+	}
+	return data, nil
+}
+
+// renderInlineValuesYAML marshals values to deterministic YAML and indents
+// every line by 8 spaces so the result drops cleanly under
+// `spec.source.helm.valuesObject:` (column 6) at column 8.
+//
+// Empty / nil values produce "        {}\n" so the template always emits a
+// well-formed map node — Argo CD's schema validation rejects a bare key with
+// no value. Trailing-blank lines from yaml.v3 are stripped to keep the
+// rendered Application stable across runs.
+func renderInlineValuesYAML(values map[string]any) (string, error) {
+	if len(values) == 0 {
+		return "        {}\n", nil
+	}
+	yamlBytes, err := serializer.MarshalYAMLDeterministic(values)
+	if err != nil {
+		return "", err
+	}
+	const indent = "        "
+	lines := strings.Split(strings.TrimRight(string(yamlBytes), "\n"), "\n")
+	var b strings.Builder
+	for _, line := range lines {
+		if line == "" {
+			b.WriteString("\n")
+			continue
+		}
+		b.WriteString(indent)
+		b.WriteString(line)
+		b.WriteString("\n")
 	}
-	return data
+	return b.String(), nil
 }
diff --git a/pkg/bundler/deployer/argocd/argocd_test.go b/pkg/bundler/deployer/argocd/argocd_test.go
index 2f91c2bd2..6b5f33eef 100644
--- a/pkg/bundler/deployer/argocd/argocd_test.go
+++ b/pkg/bundler/deployer/argocd/argocd_test.go
@@ -1180,7 +1180,10 @@ func TestBuildApplicationData_OCIHandling(t *testing.T) {
 				Kind:   localformat.KindUpstreamHelm,
 				Parent: "test-component",
 			}
-			data := buildApplicationData(comp, folder, 0, "https://github.com/example/repo.git", "main")
+			data, err := buildApplicationData(comp, folder, 0, "https://github.com/example/repo.git", "main", nil, false)
+			if err != nil {
+				t.Fatalf("buildApplicationData() error = %v", err)
+			}
 			if data.Repository != tt.wantRepository {
 				t.Errorf("Repository: got %q, want %q (source=%q chart=%q)",
 					data.Repository, tt.wantRepository, tt.source, tt.chart)
@@ -1192,6 +1195,159 @@ func TestBuildApplicationData_OCIHandling(t *testing.T) {
 	}
 }
 
+// TestGenerate_InlineUpstreamValues verifies that an OCI-bound bundle
+// inlines per-component values under helm.valuesObject and drops the
+// multi-source $values ref. Closes #960: Argo CD's $values multi-source
+// pattern is Git-only — over OCI, the repo-server attempts a git ListRefs
+// against the OCI URL and fails with `unsupported scheme "oci"`.
+func TestGenerate_InlineUpstreamValues(t *testing.T) {
+	ctx := context.Background()
+	outputDir := t.TempDir()
+
+	recipeResult := &recipe.RecipeResult{}
+	recipeResult.Metadata.Version = testVersion
+	recipeResult.ComponentRefs = []recipe.ComponentRef{
+		{
+			Name:      "cert-manager",
+			Namespace: "cert-manager",
+			Chart:     "cert-manager",
+			Version:   "v1.20.2",
+			Type:      recipe.ComponentTypeHelm,
+			Source:    "https://charts.jetstack.io",
+		},
+	}
+	recipeResult.DeploymentOrder = []string{"cert-manager"}
+
+	values := map[string]any{
+		"installCRDs": true,
+		"resources": map[string]any{
+			"requests": map[string]any{
+				"cpu":    "100m",
+				"memory": "128Mi",
+			},
+		},
+	}
+
+	g := &Generator{
+		RecipeResult:    recipeResult,
+		ComponentValues: map[string]map[string]any{"cert-manager": values},
+		Version:         "v0.9.0",
+		RepoURL:         "oci://registry.aicr-registry.svc.cluster.local:5000/aicr/test",
+		TargetRevision:  "v0.9.0",
+		// Mirrors the bundler wiring at pkg/bundler/bundler.go for an
+		// OCI-bound --deployer argocd target.
+		InlineUpstreamValues: true,
+	}
+
+	if _, err := g.Generate(ctx, outputDir); err != nil {
+		t.Fatalf("Generate() error = %v", err)
+	}
+
+	appPath := filepath.Join(outputDir, "001-cert-manager", "application.yaml")
+	content, err := os.ReadFile(appPath)
+	if err != nil {
+		t.Fatalf("failed to read application.yaml: %v", err)
+	}
+	assertValidYAML(t, appPath)
+
+	var doc map[string]any
+	if err := yaml.Unmarshal(content, &doc); err != nil {
+		t.Fatalf("failed to parse application.yaml: %v", err)
+	}
+	spec, ok := doc["spec"].(map[string]any)
+	if !ok {
+		t.Fatal("spec is not a map")
+	}
+
+	if _, hasSources := spec["sources"]; hasSources {
+		t.Error("spec.sources should NOT be set when InlineUpstreamValues is true (would re-introduce Git-only $values ref)")
+	}
+	source, ok := spec["source"].(map[string]any)
+	if !ok {
+		t.Fatal("spec.source is not a map")
+	}
+	if source["repoURL"] != "https://charts.jetstack.io" {
+		t.Errorf("source.repoURL = %v, want https://charts.jetstack.io", source["repoURL"])
+	}
+	if source["chart"] != "cert-manager" {
+		t.Errorf("source.chart = %v, want cert-manager", source["chart"])
+	}
+	// HTTPS Helm-chart-repo sources strip the `v` prefix (index.yaml
+	// convention); OCI sources preserve it. cert-manager here is HTTPS.
+	if source["targetRevision"] != "1.20.2" {
+		t.Errorf("source.targetRevision = %v, want 1.20.2", source["targetRevision"])
+	}
+
+	helm, ok := source["helm"].(map[string]any)
+	if !ok {
+		t.Fatal("source.helm is not a map")
+	}
+	valuesObj, ok := helm["valuesObject"].(map[string]any)
+	if !ok {
+		t.Fatalf("helm.valuesObject is not a map: %T", helm["valuesObject"])
+	}
+	if valuesObj["installCRDs"] != true {
+		t.Errorf("valuesObject.installCRDs = %v, want true", valuesObj["installCRDs"])
+	}
+
+	// $values literals must not appear anywhere — that token is the symptom
+	// of the bug being fixed; if it leaks back in, the regression goes
+	// silent until a live Argo CD parse.
+	if strings.Contains(string(content), "$values") {
+		t.Errorf("application.yaml still references $values:\n%s", string(content))
+	}
+	if strings.Contains(string(content), "ref: values") {
+		t.Errorf("application.yaml still has ref: values source:\n%s", string(content))
+	}
+}
+
+// TestRenderInlineValuesYAML covers the edge cases for the inline-values
+// helper: empty map produces a well-formed `{}` (Argo CD's schema rejects a
+// bare key with no value), and non-empty maps are 8-space indented to drop
+// cleanly under `helm.valuesObject:` at column 6.
+func TestRenderInlineValuesYAML(t *testing.T) {
+	tests := []struct {
+		name   string
+		values map[string]any
+		want   string
+	}{
+		{
+			name:   "empty map → explicit {} (Argo CD rejects bare valuesObject:)",
+			values: nil,
+			want:   "        {}\n",
+		},
+		{
+			name:   "scalar value at top level",
+			values: map[string]any{"replicaCount": 3},
+			want:   "        replicaCount: 3\n",
+		},
+		{
+			name: "nested map preserves indentation under 8-space base",
+			values: map[string]any{
+				"resources": map[string]any{
+					"requests": map[string]any{
+						"cpu": "100m",
+					},
+				},
+			},
+			// yaml.v3 uses 2-space indent for nested keys; 8-space base
+			// stacks on top: col 8 = resources, col 10 = requests, col 12 = cpu.
+			want: "        resources:\n          requests:\n            cpu: 100m\n",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := renderInlineValuesYAML(tt.values)
+			if err != nil {
+				t.Fatalf("renderInlineValuesYAML() error = %v", err)
+			}
+			if got != tt.want {
+				t.Errorf("renderInlineValuesYAML() got:\n%q\nwant:\n%q", got, tt.want)
+			}
+		})
+	}
+}
+
 // TestBundleGolden_HelmAndManifestOnly freezes the bundle output for a recipe
 // containing both component shapes that this deployer must handle:
 //
diff --git a/pkg/bundler/deployer/argocd/templates/application.yaml.tmpl b/pkg/bundler/deployer/argocd/templates/application.yaml.tmpl
index 1ecc68c7f..8215e53f8 100644
--- a/pkg/bundler/deployer/argocd/templates/application.yaml.tmpl
+++ b/pkg/bundler/deployer/argocd/templates/application.yaml.tmpl
@@ -12,6 +12,14 @@ spec:
     repoURL: '{{ .RepoURL }}'
     targetRevision: {{ .TargetRevision }}
     path: {{ .BundleDir }}
+{{- else if .InlineValues }}
+  source:
+    repoURL: {{ .Repository }}
+    chart: {{ .Chart }}
+    targetRevision: {{ .Version }}
+    helm:
+      valuesObject:
+{{ .ValuesYAML -}}
 {{- else }}
   sources:
     - repoURL: {{ .Repository }}
diff --git a/pkg/bundler/deployer/argocdhelm/argocdhelm.go b/pkg/bundler/deployer/argocdhelm/argocdhelm.go
index b021e06f0..37e7afe28 100644
--- a/pkg/bundler/deployer/argocdhelm/argocdhelm.go
+++ b/pkg/bundler/deployer/argocdhelm/argocdhelm.go
@@ -462,15 +462,27 @@ func (g *Generator) processFolders(ctx context.Context, tmpDir, outputDir, templ
 			continue
 		}
 
-		// Mixed components emit a `-post` folder for raw manifests, alongside
-		// the primary upstream-helm folder. The -post folder is not a
-		// registered component on its own — its overrides flow from the
-		// parent (e.g., 010-gpu-operator-post → gpu-operator). Resolve the
-		// parent before looking up the override key.
+		// Mixed components emit synthetic `-pre` and `-post` folders for raw
+		// manifests that bracket the primary upstream-helm folder. Neither
+		// suffixed folder is a registered component on its own — both
+		// inherit overrides from the parent (e.g., 006-gpu-operator-pre and
+		// 010-gpu-operator-post both flow from gpu-operator). Resolve the
+		// parent before looking up the override key; if neither suffix
+		// matches a registered component, keep the original name and let
+		// resolveOverrideKey surface the registry miss.
+		//
+		// Symmetry note: the previous version handled only `-post`. Recipes
+		// with PreManifestFiles (e.g. the gke-cos OS overlay's gpu-driver
+		// toolkit prereq) synthesize a `-pre` folder; without the matching
+		// strip, resolveOverrideKey was called with "gpu-operator-pre" and
+		// failed `component %q not found in registry` at bundle time.
 		parentComponent := folderComponent
-		if base, ok := strings.CutSuffix(folderComponent, "-post"); ok {
-			if findComponentByName(g.RecipeResult.ComponentRefs, base) {
-				parentComponent = base
+		for _, suffix := range []string{"-pre", "-post"} {
+			if base, ok := strings.CutSuffix(folderComponent, suffix); ok {
+				if findComponentByName(g.RecipeResult.ComponentRefs, base) {
+					parentComponent = base
+					break
+				}
 			}
 		}
 
diff --git a/pkg/bundler/deployer/argocdhelm/argocdhelm_test.go b/pkg/bundler/deployer/argocdhelm/argocdhelm_test.go
index 31cfb6593..0b7996f45 100644
--- a/pkg/bundler/deployer/argocdhelm/argocdhelm_test.go
+++ b/pkg/bundler/deployer/argocdhelm/argocdhelm_test.go
@@ -235,6 +235,125 @@ func TestGenerate(t *testing.T) {
 	}
 }
 
+// TestGenerate_PreManifestParentResolution exercises the synthetic
+// `-pre` folder path that recipes with ComponentPreManifests emit
+// (e.g. the gke-cos OS overlay, which injects a Talos-style driver
+// prerequisite Namespace ahead of gpu-operator). The folder is named
+// `NNN-gpu-operator-pre`; the bundler must strip the `-pre` suffix
+// when resolving the override key, otherwise resolveOverrideKey is
+// called with "gpu-operator-pre" and fails `component %q not found
+// in registry` at bundle time. Caught in the KWOK gke-cos-training
+// CI lane after the pre-fix; before this commit the bug only
+// surfaced for recipes that combined Helm + pre-manifest overlays.
+func TestGenerate_PreManifestParentResolution(t *testing.T) {
+	ctx := context.Background()
+	outputDir := t.TempDir()
+
+	g := &Generator{
+		RecipeResult: newRecipeResult("1.0.0", []recipe.ComponentRef{
+			{Name: "gpu-operator", Namespace: "gpu-operator", Source: "https://helm.ngc.nvidia.com/nvidia", Chart: "gpu-operator", Version: "v24.9.0"},
+		}),
+		ComponentValues: map[string]map[string]any{
+			"gpu-operator": {"driver": map[string]any{"version": "580"}},
+		},
+		Version: "test",
+		RepoURL: "https://github.com/example/repo.git",
+		ComponentPreManifests: map[string]map[string][]byte{
+			"gpu-operator": {
+				"components/gpu-operator/manifests/cos-namespace.yaml": []byte(
+					"apiVersion: v1\nkind: Namespace\nmetadata:\n  name: gpu-operator\n",
+				),
+			},
+		},
+	}
+
+	if _, err := g.Generate(ctx, outputDir); err != nil {
+		t.Fatalf("Generate() error = %v (regression: -pre suffix not stripped before resolveOverrideKey)", err)
+	}
+
+	// The -pre folder content is copied into the bundle so Argo CD's
+	// repo-server can resolve the path-based child Application's
+	// `path: NNN-<name>-pre` reference. Confirm it survived the
+	// argocdhelm wrapper transformation.
+	preDir := filepath.Join(outputDir, "001-gpu-operator-pre")
+	if _, err := os.Stat(preDir); err != nil {
+		t.Errorf("expected -pre folder copied into bundle at %s: %v", preDir, err)
+	}
+
+	// The transformed template under templates/ should be keyed by the
+	// PARENT component (gpu-operator), not the -pre folder name,
+	// because the override key comes from the parent's registry entry.
+	tmplPath := filepath.Join(outputDir, "templates", "gpu-operator-pre.yaml")
+	if _, err := os.Stat(tmplPath); err != nil {
+		t.Errorf("expected wrapper template for -pre folder at %s: %v", tmplPath, err)
+	}
+}
+
+// TestGenerate_PreAndPostManifestParentResolution covers the actual
+// gke-cos OS overlay shape: a single registered component (`gpu-operator`)
+// with BOTH preManifestFiles (cos-namespace prep) and postManifestFiles
+// (dcgm-exporter). The argocdhelm processFolders loop sees 3 entries
+// (`<NNN>-gpu-operator-pre`, `<NNN>-gpu-operator`, `<NNN>-gpu-operator-post`)
+// and runs the suffix-strip logic twice across two distinct folders.
+// Both wrapper templates must resolve back to the same parent override
+// key so the values applied to the chart and the manifest hooks stay
+// consistent. Earlier revisions handled only `-post`; this test guards
+// against re-introducing that asymmetry.
+func TestGenerate_PreAndPostManifestParentResolution(t *testing.T) {
+	ctx := context.Background()
+	outputDir := t.TempDir()
+
+	g := &Generator{
+		RecipeResult: newRecipeResult("1.0.0", []recipe.ComponentRef{
+			{Name: "gpu-operator", Namespace: "gpu-operator", Source: "https://helm.ngc.nvidia.com/nvidia", Chart: "gpu-operator", Version: "v24.9.0"},
+		}),
+		ComponentValues: map[string]map[string]any{
+			"gpu-operator": {"driver": map[string]any{"version": "580"}},
+		},
+		Version: "test",
+		RepoURL: "https://github.com/example/repo.git",
+		ComponentPreManifests: map[string]map[string][]byte{
+			"gpu-operator": {
+				"components/gpu-operator/manifests/cos-namespace.yaml": []byte(
+					"apiVersion: v1\nkind: Namespace\nmetadata:\n  name: gpu-operator\n",
+				),
+			},
+		},
+		ComponentPostManifests: map[string]map[string][]byte{
+			"gpu-operator": {
+				"components/gpu-operator/manifests/dcgm-exporter.yaml": []byte(
+					"apiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: dcgm-config\n",
+				),
+			},
+		},
+	}
+
+	if _, err := g.Generate(ctx, outputDir); err != nil {
+		t.Fatalf("Generate() error = %v", err)
+	}
+
+	// Both synthetic folders must exist in the bundle so the path-based
+	// child Applications can resolve them.
+	preDir := filepath.Join(outputDir, "001-gpu-operator-pre")
+	if _, err := os.Stat(preDir); err != nil {
+		t.Errorf("expected -pre folder at %s: %v", preDir, err)
+	}
+	postDir := filepath.Join(outputDir, "003-gpu-operator-post")
+	if _, err := os.Stat(postDir); err != nil {
+		t.Errorf("expected -post folder at %s: %v", postDir, err)
+	}
+
+	// Both wrapper templates must be present under templates/, both
+	// keyed by the PARENT component name (not the -pre / -post folder
+	// names). Asymmetric handling would lose one of them.
+	for _, suffix := range []string{"-pre", "-post"} {
+		tmplPath := filepath.Join(outputDir, "templates", "gpu-operator"+suffix+".yaml")
+		if _, err := os.Stat(tmplPath); err != nil {
+			t.Errorf("expected wrapper template for %s folder at %s: %v", suffix, tmplPath, err)
+		}
+	}
+}
+
 func TestGenerate_DataFiles(t *testing.T) {
 	makeGenerator := func(dataFiles []string, includeChecksums bool) *Generator {
 		return &Generator{
diff --git a/pkg/bundler/deployer/localformat/vendor_write_test.go b/pkg/bundler/deployer/localformat/vendor_write_test.go
index adf125529..f0a0abe34 100644
--- a/pkg/bundler/deployer/localformat/vendor_write_test.go
+++ b/pkg/bundler/deployer/localformat/vendor_write_test.go
@@ -323,9 +323,29 @@ func TestWrite_VendorCharts_KustomizeFallthrough(t *testing.T) {
 	// regardless of whether the downstream kustomize build succeeds in
 	// this test environment.
 	//
-	// kustomize build may shell out to a git fetch; bound the call so a
-	// stalled subprocess can't hang the suite.
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	// kustomize build may shell out to a git fetch when Repository is a
+	// remote URL. Two hermeticity guards:
+	//
+	//  1. GIT_TERMINAL_PROMPT=0 — on macOS with a credential helper
+	//     installed, git falls through to a `Username for 'https://
+	//     github.com':` prompt instead of failing fast on missing
+	//     creds; that prompt blocks `make qualify` until the helper
+	//     times out. Setting this env var makes git exit immediately
+	//     so the test stays deterministic regardless of developer git
+	//     config.
+	//
+	//  2. The Repository points at the RFC-2606 reserved `.invalid`
+	//     TLD so DNS fails before any HTTP round trip — no chance of
+	//     accidentally reaching a real github.com 404 (which would
+	//     trigger the credential-helper path again on some setups).
+	//
+	// Bound the overall call so a stalled subprocess can't hang the
+	// suite even if both guards fail somehow. 5s is plenty — DNS for
+	// .invalid resolves to NXDOMAIN in milliseconds and the only
+	// reason this would approach the budget is a misbehaving
+	// kustomize/git-library internal retry loop.
+	t.Setenv("GIT_TERMINAL_PROMPT", "0")
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
 	outDir := t.TempDir()
@@ -334,7 +354,7 @@ func TestWrite_VendorCharts_KustomizeFallthrough(t *testing.T) {
 		Components: []localformat.Component{{
 			Name:       "kpack",
 			Namespace:  "kpack",
-			Repository: "https://github.com/example/kpack-overlay",
+			Repository: "https://kpack-overlay.invalid",
 			Path:       "config/default",
 			Tag:        "v0.1.0",
 		}},
@@ -343,9 +363,9 @@ func TestWrite_VendorCharts_KustomizeFallthrough(t *testing.T) {
 	})
 	recs := res.VendoredCharts
 	// Unconditional: kustomize must never produce a vendor record,
-	// build success or not. (The kustomize build itself typically fails
-	// in the hermetic test environment because we can't reach the git
-	// remote, but the routing decision is what we're pinning here.)
+	// build success or not. (The kustomize build itself fails fast
+	// because the .invalid TLD doesn't resolve, but the routing
+	// decision is what we're pinning here.)
 	if len(recs) != 0 {
 		t.Errorf("kustomize component should not produce vendor records, got %d (err=%v)", len(recs), err)
 	}
diff --git a/pkg/cli/bundle.go b/pkg/cli/bundle.go
index af5c63837..9a6415b4a 100644
--- a/pkg/cli/bundle.go
+++ b/pkg/cli/bundle.go
@@ -167,10 +167,20 @@ func parseBundleCmdOptions(cmd *cli.Command, cfg *appcfg.AICRConfig) (*bundleCmd
 		opts.outputDir = absOut
 	}
 
-	// When using Argo CD deployer with OCI output and no explicit --repo,
-	// auto-populate repoURL from the OCI reference (issue #519).
+	// When using --deployer argocd with OCI output and no explicit --repo,
+	// auto-populate repoURL from the OCI reference. The oci:// scheme is
+	// required so Argo CD routes to its native OCI artifact source instead
+	// of treating the value as a Git remote.
+	//
+	// argocd-helm is intentionally excluded: that bundle is URL-portable
+	// by design — the publish location is supplied at `helm install`
+	// time via `--set repoURL=...`, and the bundler's --repo flag is a
+	// no-op for it (the helper logs `--repo is ignored with --deployer
+	// argocd-helm`). Auto-filling repoURL here would surface that
+	// "ignored" warning even when the user never passed --repo, which
+	// blurs the URL-portable contract of the deployer.
 	if opts.deployer == config.DeployerArgoCD && opts.ociRef != nil && opts.repoURL == "" {
-		opts.repoURL = opts.ociRef.Registry + "/" + opts.ociRef.Repository
+		opts.repoURL = oci.EnsureScheme(opts.ociRef.Registry + "/" + opts.ociRef.Repository)
 	}
 
 	// Derive target revision: use OCI tag when available
@@ -636,15 +646,34 @@ func selectAttester(ctx context.Context, opts *bundleCmdOptions) (attestation.At
 }
 
 // pushOCIBundle packages and pushes the bundle to an OCI registry.
+// Branches on deployer: --deployer argocd-helm uses the Helm OCI flow
+// (helm.config.v1+json config + helm.chart.content.v1.tar+gzip layer)
+// so `helm pull oci://…` discovers the chart at pull time; every other
+// deployer uses AICR's generic OCI artifactType. See #961.
 func pushOCIBundle(ctx context.Context, opts *bundleCmdOptions, out *result.Output) error {
-	pushResult, err := oci.PackageAndPush(ctx, oci.OutputConfig{
-		SourceDir:   opts.outputDir,
-		OutputDir:   opts.outputDir,
-		Reference:   opts.ociRef,
-		Version:     version,
-		PlainHTTP:   opts.plainHTTP,
-		InsecureTLS: opts.insecureTLS,
-	})
+	var (
+		pushResult *oci.PackageAndPushResult
+		err        error
+	)
+	if opts.deployer == config.DeployerArgoCDHelm {
+		pushResult, err = oci.PackageAndPushHelmChart(ctx, oci.HelmChartOptions{
+			SourceDir:   opts.outputDir,
+			OutputDir:   opts.outputDir,
+			Reference:   opts.ociRef,
+			Version:     version,
+			PlainHTTP:   opts.plainHTTP,
+			InsecureTLS: opts.insecureTLS,
+		})
+	} else {
+		pushResult, err = oci.PackageAndPush(ctx, oci.OutputConfig{
+			SourceDir:   opts.outputDir,
+			OutputDir:   opts.outputDir,
+			Reference:   opts.ociRef,
+			Version:     version,
+			PlainHTTP:   opts.plainHTTP,
+			InsecureTLS: opts.insecureTLS,
+		})
+	}
 	if err != nil {
 		return err
 	}
diff --git a/pkg/cli/bundle_test.go b/pkg/cli/bundle_test.go
index cf0f62295..75ddf9c59 100644
--- a/pkg/cli/bundle_test.go
+++ b/pkg/cli/bundle_test.go
@@ -16,6 +16,8 @@ package cli
 
 import (
 	"context"
+	"os"
+	"path/filepath"
 	"testing"
 
 	"github.com/NVIDIA/aicr/pkg/bundler/attestation"
@@ -120,3 +122,72 @@ func TestSelectAttester_WiresEnvAndFlags(t *testing.T) {
 		t.Errorf("expected *LazyKeylessAttester (deferred token resolution), got %T", att)
 	}
 }
+
+// TestParseBundleCmdOptions_OCIRepoURLDerivation verifies the auto-population
+// rules for opts.repoURL when bundling to an OCI target:
+//
+//   - --deployer argocd: derive `oci://...` from --output. Argo CD v3.1+
+//     parses a schemeless registry/repo string as a Git remote and fails
+//     on ssh-agent; the `oci://` prefix routes to its native OCI source.
+//   - --deployer argocd-helm: do NOT derive. That bundle is URL-portable
+//     by design — the publish location is supplied at `helm install`
+//     time via `--set repoURL=...`. Auto-deriving would surface the
+//     "--repo is ignored" warning even when the user never passed --repo.
+//   - --deployer helm: never derive.
+//   - explicit --repo: never overwritten.
+func TestParseBundleCmdOptions_OCIRepoURLDerivation(t *testing.T) {
+	tmp := t.TempDir()
+	recipePath := filepath.Join(tmp, "recipe.yaml")
+	if err := os.WriteFile(recipePath, []byte("kind: Recipe\n"), 0o600); err != nil {
+		t.Fatalf("write recipe: %v", err)
+	}
+
+	const ociOutput = "oci://reg.example.com:5000/aicr/foo:v1"
+
+	tests := []struct {
+		name           string
+		args           []string
+		expectedRepo   string
+		expectedTarget string
+	}{
+		{
+			name:           "argocd OCI derives oci:// scheme",
+			args:           []string{"--recipe", recipePath, "--output", ociOutput, "--deployer", "argocd"},
+			expectedRepo:   "oci://reg.example.com:5000/aicr/foo",
+			expectedTarget: "v1",
+		},
+		{
+			name:           "argocd-helm OCI does NOT derive repoURL (URL-portable contract)",
+			args:           []string{"--recipe", recipePath, "--output", ociOutput, "--deployer", "argocd-helm"},
+			expectedRepo:   "",
+			expectedTarget: "v1",
+		},
+		{
+			name:           "explicit --repo not overwritten",
+			args:           []string{"--recipe", recipePath, "--output", ociOutput, "--deployer", "argocd", "--repo", "https://github.com/x/y"},
+			expectedRepo:   "https://github.com/x/y",
+			expectedTarget: "v1",
+		},
+		{
+			name:           "helm deployer leaves repoURL empty",
+			args:           []string{"--recipe", recipePath, "--output", ociOutput, "--deployer", "helm"},
+			expectedRepo:   "",
+			expectedTarget: "v1",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			opts := captureBundleOpts(t, tt.args)
+			if opts == nil {
+				t.Fatal("captureBundleOpts returned nil")
+			}
+			if opts.repoURL != tt.expectedRepo {
+				t.Errorf("repoURL = %q, want %q", opts.repoURL, tt.expectedRepo)
+			}
+			if opts.targetRevision != tt.expectedTarget {
+				t.Errorf("targetRevision = %q, want %q", opts.targetRevision, tt.expectedTarget)
+			}
+		})
+	}
+}
diff --git a/pkg/defaults/timeouts.go b/pkg/defaults/timeouts.go
index e177f8f7d..425cb0956 100644
--- a/pkg/defaults/timeouts.go
+++ b/pkg/defaults/timeouts.go
@@ -453,6 +453,15 @@ const (
 	// or server to allocate an unbounded buffer.
 	MaxConfigBytes int64 = 1 * 1024 * 1024 // 1 MiB
 
+	// MaxChartYAMLBytes caps the size of a Chart.yaml file that
+	// pkg/oci.PackageAndPushHelmChart reads from a caller-supplied
+	// SourceDir before pushing as an OCI artifact. Real Chart.yaml
+	// files are well under 4 KiB (apiVersion + name + version +
+	// maybe dependencies); 1 MiB is generous headroom while bounding
+	// an attacker-influenced SourceDir (symlink to /proc, NFS mount,
+	// FUSE filesystem) before os.ReadFile would OOM the process.
+	MaxChartYAMLBytes int64 = 1 * 1024 * 1024 // 1 MiB
+
 	// MaxChecksumFileBytes caps the size of a bundle checksums.txt file.
 	// One entry is ~80 bytes; 1 MiB allows ~12k entries — well above any
 	// realistic bundle while bounding attacker-influenced inputs at the
diff --git a/pkg/oci/helm.go b/pkg/oci/helm.go
new file mode 100644
index 000000000..b81fc9e66
--- /dev/null
+++ b/pkg/oci/helm.go
@@ -0,0 +1,528 @@
+// Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package oci
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/json"
+	stderrors "errors"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/Masterminds/semver/v3"
+	"github.com/opencontainers/go-digest"
+	ociv1 "github.com/opencontainers/image-spec/specs-go/v1"
+	"gopkg.in/yaml.v3"
+	oras "oras.land/oras-go/v2"
+	"oras.land/oras-go/v2/content/oci"
+	"oras.land/oras-go/v2/registry/remote"
+
+	"github.com/NVIDIA/aicr/pkg/defaults"
+	apperrors "github.com/NVIDIA/aicr/pkg/errors"
+	"github.com/NVIDIA/aicr/pkg/serializer"
+)
+
+// Helm OCI registry mediaTypes — pinned per the official Helm OCI spec
+// (https://helm.sh/docs/topics/registries/). `helm pull`/`helm install`
+// read the manifest config blob's mediaType to discover whether the
+// artifact is a Helm chart. When AICR pushes with our generic AICR
+// artifactType, helm v3 rejects the artifact during pull discovery and
+// reports `unable to locate any tags in provided repository` (see
+// #961). Pushing with these constants makes the argocd-helm bundle
+// indistinguishable from a chart pushed via `helm push`.
+const (
+	helmConfigMediaType = "application/vnd.cncf.helm.config.v1+json"
+	helmLayerMediaType  = "application/vnd.cncf.helm.chart.content.v1.tar+gzip"
+)
+
+// chartYAML is intentionally the OCI **config blob** (registry-visible
+// metadata), NOT a full Chart.yaml model. Helm itself reads name +
+// version off the chart tarball's internal Chart.yaml — the full
+// Chart.yaml content ships in the tar layer, so any Helm chart key not
+// modeled here is not lost. The config blob mirrors only the fields
+// `helm pull` validates and that registries (Harbor, GCR, ghcr) render
+// in their artifact-metadata UI.
+//
+// Do NOT widen this struct to model additional Chart.yaml fields. The
+// fix for a missing chart property is to ensure it's in the in-source
+// Chart.yaml (which the bundler writes), not to plumb it through the
+// config blob.
+type chartYAML struct {
+	APIVersion  string `yaml:"apiVersion" json:"apiVersion"`
+	Name        string `yaml:"name" json:"name"`
+	Version     string `yaml:"version" json:"version"`
+	Description string `yaml:"description,omitempty" json:"description,omitempty"`
+	Type        string `yaml:"type,omitempty" json:"type,omitempty"`
+	AppVersion  string `yaml:"appVersion,omitempty" json:"appVersion,omitempty"`
+}
+
+// HelmChartOptions configures the Helm OCI push flow.
+type HelmChartOptions struct {
+	// SourceDir is the directory containing the Helm chart (Chart.yaml,
+	// values.yaml, templates/, etc.). Mirrors PackageOptions.SourceDir.
+	SourceDir string
+	// OutputDir is where the temporary OCI Image Layout is created;
+	// callers usually pass the bundle output directory so cleanup is
+	// scoped to one tree.
+	OutputDir string
+	// Reference is the OCI registry reference. The reference's Tag MUST
+	// match the chart version that helm install / helm pull will look
+	// for; the push flow rewrites the in-source Chart.yaml so this
+	// invariant holds even when the recipe metadata version differs
+	// from the user-supplied --output tag.
+	Reference *Reference
+	// PlainHTTP uses HTTP instead of HTTPS (local test registries).
+	PlainHTTP bool
+	// InsecureTLS skips TLS certificate verification.
+	InsecureTLS bool
+	// Version threads the AICR CLI version into manifest annotations
+	// alongside the chart-derived ones. Pure metadata; not consulted
+	// by Helm at install time.
+	Version string
+}
+
+// PackageAndPushHelmChart packages a Helm chart directory into a
+// Helm-OCI compatible artifact and pushes it to a registry. Closes the
+// gap described in #961: the standard AICR OCI flow uses
+// `application/vnd.nvidia.aicr.artifact` for the manifest's
+// artifactType and `application/vnd.oci.image.layer.v1.tar+gzip` for
+// the layer; helm v3 rejects those during pull discovery and the user
+// sees `unable to locate any tags in provided repository` — even
+// though the tag exists and `/v2/<name>/tags/list` returns it.
+//
+// SourceDir is NOT mutated. The Chart.yaml rewrite (so its version
+// matches Reference.Tag — see helm's chart-version invariant below)
+// happens on a copy inside OutputDir, leaving the caller's source
+// tree byte-identical. Earlier revisions of this function rewrote
+// SourceDir in place, which (a) leaked the OCI tag back into the
+// caller's working copy and (b) was non-atomic — a crash mid-write
+// left a corrupt Chart.yaml the next run would refuse to parse.
+//
+// Helm OCI tags ARE chart versions: `helm install … --version <tag>`
+// looks up `<repo>:<tag>` and verifies the chart manifest's version
+// against `<tag>`. Without the rewrite, a user who supplies `--output
+// oci://…/foo:5bc50950-helm` for a recipe whose metadata version is
+// `0.1.0` ends up with `Chart.yaml: 0.1.0` at the `:5bc50950-helm`
+// tag, and helm rejects the version mismatch.
+func PackageAndPushHelmChart(ctx context.Context, opts HelmChartOptions) (*PackageAndPushResult, error) {
+	if opts.Reference == nil || !opts.Reference.IsOCI {
+		return nil, apperrors.New(apperrors.ErrCodeInvalidRequest, "OCI reference is required for PackageAndPushHelmChart")
+	}
+	if opts.Reference.Tag == "" {
+		return nil, apperrors.New(apperrors.ErrCodeInvalidRequest, "tag is required for Helm OCI push")
+	}
+	if err := validateHelmTag(opts.Reference.Tag); err != nil {
+		return nil, err
+	}
+	if err := validateRegistryReference(opts.Reference.Registry, opts.Reference.Repository); err != nil {
+		return nil, err
+	}
+
+	absSourceDir, err := filepath.Abs(opts.SourceDir)
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to resolve source directory", err)
+	}
+	absOutputDir, err := filepath.Abs(opts.OutputDir)
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to resolve output directory", err)
+	}
+
+	// Stage a working copy of SourceDir so the rewrite + tar steps never
+	// mutate the caller's tree. MUST be outside SourceDir: the CLI calls
+	// this function with SourceDir == OutputDir, and putting the work
+	// dir under either would cause copyDir's recursive walk to copy the
+	// freshly-created work dir back into itself (observed: ~250 nested
+	// `helm-chart-work/helm-chart-work/...` levels until ENAMETOOLONG).
+	// Use the OS temp area instead and clean up unconditionally.
+	workDir, mkdirErr := os.MkdirTemp("", "aicr-helm-chart-")
+	if mkdirErr != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to create Helm staging dir", mkdirErr)
+	}
+	defer func() { _ = os.RemoveAll(workDir) }()
+	if copyErr := copyDir(ctx, absSourceDir, workDir); copyErr != nil {
+		return nil, apperrors.PropagateOrWrap(copyErr, apperrors.ErrCodeInternal,
+			"failed to stage Helm chart source for OCI push")
+	}
+
+	chartMeta, err := loadAndRewriteChartYAML(workDir, opts.Reference.Tag)
+	if err != nil {
+		return nil, err
+	}
+
+	slog.Info("packaging Helm chart for OCI push",
+		"registry", opts.Reference.Registry,
+		"repository", opts.Reference.Repository,
+		"tag", opts.Reference.Tag,
+		"chart_name", chartMeta.Name,
+		"chart_version", chartMeta.Version,
+	)
+
+	chartTGZ, err := buildHelmChartTGZ(ctx, workDir, chartMeta.Name)
+	if err != nil {
+		return nil, err
+	}
+
+	configBlob, err := json.Marshal(chartMeta)
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to marshal Helm chart config", err)
+	}
+
+	storePath := filepath.Join(absOutputDir, "oci-layout-helm")
+	if mkdirErr := os.MkdirAll(storePath, 0o755); mkdirErr != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to create Helm OCI store directory", mkdirErr)
+	}
+	store, err := oci.New(storePath)
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to create Helm OCI store", err)
+	}
+
+	manifestDesc, err := stageHelmOCIManifest(ctx, store, chartMeta, chartTGZ, configBlob, opts.Version)
+	if err != nil {
+		return nil, err
+	}
+	if tagErr := store.Tag(ctx, manifestDesc, opts.Reference.Tag); tagErr != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to tag Helm OCI manifest", tagErr)
+	}
+
+	pushResult, err := pushHelmOCIFromStore(ctx, opts, store)
+	if err != nil {
+		return nil, err
+	}
+
+	return &PackageAndPushResult{
+		Digest:    pushResult.Digest,
+		MediaType: pushResult.MediaType,
+		Size:      pushResult.Size,
+		Reference: pushResult.Reference,
+		StorePath: storePath,
+	}, nil
+}
+
+// validateHelmTag rejects OCI tags that helm v3 won't accept as chart
+// versions. Helm's registry client filters tags via
+// `semver.StrictNewVersion` (pkg/registry/client.go::Tags()) — any tag
+// that doesn't parse as semver is silently dropped, and `helm pull` /
+// `helm install --version <tag>` reports `unable to locate any tags in
+// provided repository` even when the artifact exists in the registry
+// with correct Helm OCI mediaTypes. Returning a typed error here
+// converts that silent helm-side failure into an actionable
+// bundle-time error with the constraint spelled out.
+//
+// One concession to common usage: a leading `v` (e.g. `v1.2.3`) is
+// stripped before the parse because helm itself strips it (see
+// `pkg/registry/client.go::Tags()` → `strings.TrimPrefix(tag, "v")` is
+// implicit in the version compare path). Users who reach for `v1.2.3`
+// should not be punished for a stylistic choice helm accepts.
+func validateHelmTag(tag string) error {
+	candidate := strings.TrimPrefix(tag, "v")
+	if _, err := semver.StrictNewVersion(candidate); err != nil {
+		return apperrors.New(apperrors.ErrCodeInvalidRequest,
+			fmt.Sprintf("Helm OCI requires a semver-compatible tag; got %q. "+
+				"`helm pull` / `helm install --version <tag>` filter tags via "+
+				"semver.StrictNewVersion and silently drop non-semver tags, "+
+				"surfacing as `unable to locate any tags in provided repository`. "+
+				"Wrap arbitrary identifiers as a semver pre-release, e.g. "+
+				"`0.0.0-%s`.", tag, tag))
+	}
+	return nil
+}
+
+// loadAndRewriteChartYAML reads Chart.yaml from sourceDir, sets its
+// version to tag, and writes the result back. Returns the parsed chart
+// metadata so callers can build the OCI config blob without re-parsing.
+//
+// The rewrite is intentional and idempotent — see
+// PackageAndPushHelmChart for the chart-version-equals-OCI-tag
+// invariant.
+func loadAndRewriteChartYAML(sourceDir, tag string) (*chartYAML, error) {
+	chartPath := filepath.Join(sourceDir, "Chart.yaml")
+
+	// Bounded read: SourceDir is caller-supplied, so a hostile symlink
+	// (/proc, NFS, FUSE) could trick os.ReadFile into allocating the
+	// whole file into memory before yaml.Unmarshal would notice. Open
+	// + LimitReader against the Chart.yaml-shaped cap from pkg/defaults
+	// keeps the allocation bounded even under an attacker-influenced
+	// source path. See CLAUDE.md anti-pattern table for the rule.
+	f, err := os.Open(chartPath) //nolint:gosec // path is under caller-supplied source dir
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInvalidRequest, "failed to open Chart.yaml from source directory", err)
+	}
+	defer func() { _ = f.Close() }()
+	data, err := io.ReadAll(io.LimitReader(f, defaults.MaxChartYAMLBytes+1))
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInvalidRequest, "failed to read Chart.yaml from source directory", err)
+	}
+	if int64(len(data)) > defaults.MaxChartYAMLBytes {
+		return nil, apperrors.New(apperrors.ErrCodeInvalidRequest,
+			fmt.Sprintf("Chart.yaml exceeds %d bytes — refusing to read unbounded source", defaults.MaxChartYAMLBytes))
+	}
+
+	var meta chartYAML
+	if unmarshalErr := yaml.Unmarshal(data, &meta); unmarshalErr != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInvalidRequest, "failed to parse Chart.yaml", unmarshalErr)
+	}
+	if meta.Name == "" {
+		return nil, apperrors.New(apperrors.ErrCodeInvalidRequest, "Chart.yaml: name is required")
+	}
+	if nameErr := validateChartName(meta.Name); nameErr != nil {
+		return nil, nameErr
+	}
+	if meta.APIVersion == "" {
+		meta.APIVersion = "v2"
+	}
+	meta.Version = tag
+
+	// The marshaled bytes get written back to Chart.yaml, which is then
+	// tarred into the chart.tgz that becomes the OCI artifact's layer
+	// blob. The layer digest is consumed by helm OCI's content-
+	// addressable cache and by any downstream attestation. Deterministic
+	// marshal is therefore mandatory — the chartYAML struct itself is
+	// stable, but `yaml.v3` makes formatting choices (block-vs-flow
+	// scalar style, line wrapping) that can vary; using the project's
+	// deterministic serializer is the lighter belt-and-suspenders
+	// matching the rest of the digest-feeding paths in this repo.
+	rewritten, err := serializer.MarshalYAMLDeterministic(meta)
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to marshal Chart.yaml", err)
+	}
+	if err := os.WriteFile(chartPath, rewritten, 0o600); err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to rewrite Chart.yaml with OCI tag version", err)
+	}
+	return &meta, nil
+}
+
+// validateChartName rejects chart names whose layout would let the tar
+// entry prefix (`<chartName>/<rel>`) escape the chart root on extraction.
+// Helm itself enforces the Chart.yaml name to be `[a-z0-9-]+` at install
+// time, but the OCI push path here is exposed as a public function and
+// a future caller with a name derived from less-validated input could
+// otherwise produce a tarball with absolute or parent-traversal entries.
+//
+// filepath.IsLocal catches `/`, `..`, drive letters, and parent
+// references; the extra ContainsAny adds belt-and-suspenders against
+// control characters and slashes (IsLocal accepts an embedded NUL on
+// some platforms).
+func validateChartName(name string) error {
+	if name == "" {
+		return apperrors.New(apperrors.ErrCodeInvalidRequest, "chart name is required")
+	}
+	if !filepath.IsLocal(name) || strings.ContainsAny(name, "/\\\x00") {
+		return apperrors.New(apperrors.ErrCodeInvalidRequest,
+			fmt.Sprintf("chart name %q contains unsafe characters — must be a single path segment without `/`, `..`, or control chars", name))
+	}
+	return nil
+}
+
+// buildHelmChartTGZ produces a Helm-conformant chart.tgz: a gzipped tar
+// where every entry is prefixed with `<chart-name>/` (the Helm "chart
+// root" convention). `helm package` produces exactly this layout; helm
+// v3's pull / install both depend on the prefix so the chart's
+// templates/ resolution roots correctly inside the extracted directory.
+//
+// Reproducible-build hygiene: mtimes are zeroed and uid/gid/uname/
+// gname are stripped (left at their zero values) so two AICR runs
+// against the same source tree produce the same chart.tgz digest,
+// which the OCI registry then dedups.
+//
+// ctx is checked on entry and per directory entry so a parent timeout
+// (push deadline, server shutdown) terminates the walk without
+// orphaning a partially-written buffer.
+func buildHelmChartTGZ(ctx context.Context, sourceDir, chartName string) ([]byte, error) {
+	if err := ctx.Err(); err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeUnavailable, "chart packaging canceled", err)
+	}
+	// Defense-in-depth: loadAndRewriteChartYAML already validated the
+	// name, but this function is exported and a future caller might
+	// construct chartName from a different source. Reject here too so
+	// the tar-entry prefix is always a safe single segment.
+	if err := validateChartName(chartName); err != nil {
+		return nil, err
+	}
+
+	var buf bytes.Buffer
+	gz := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gz)
+
+	walkErr := filepath.Walk(sourceDir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if ctxErr := ctx.Err(); ctxErr != nil {
+			return ctxErr
+		}
+		rel, relErr := filepath.Rel(sourceDir, path)
+		if relErr != nil {
+			return relErr
+		}
+		if rel == "." {
+			// Skip the synthetic chart-root directory entry; helm v3
+			// extracts by file path and tolerates its absence, and
+			// emitting it adds noise to the tar that defeats the
+			// reproducible-digest goal whenever a tool inserts/omits it.
+			return nil
+		}
+		entry := chartName + "/" + filepath.ToSlash(rel)
+
+		if info.IsDir() {
+			hdr := &tar.Header{
+				Name:     entry + "/",
+				Mode:     0o755,
+				Typeflag: tar.TypeDir,
+			}
+			return tw.WriteHeader(hdr)
+		}
+		// Skip non-regular files (symlinks, sockets, etc.) — chart
+		// bundles produced by argocdhelm are always plain files;
+		// surfacing the rest defensively avoids pushing an unsupported
+		// tar entry the registry would otherwise accept silently.
+		if !info.Mode().IsRegular() {
+			return nil
+		}
+		hdr := &tar.Header{
+			Name:     entry,
+			Mode:     int64(info.Mode().Perm()),
+			Size:     info.Size(),
+			Typeflag: tar.TypeReg,
+		}
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+		f, openErr := os.Open(filepath.Clean(path)) //nolint:gosec // walking caller-supplied source dir
+		if openErr != nil {
+			return openErr
+		}
+		_, copyErr := io.Copy(tw, f)
+		closeErr := f.Close()
+		if copyErr != nil {
+			return copyErr
+		}
+		return closeErr
+	})
+	if walkErr != nil {
+		// filepath.Walk callbacks return ctx.Err() on cancellation
+		// (see the entry-point check at the top of the closure).
+		// Surfacing those as ErrCodeInternal would misclassify a
+		// caller-initiated cancel or a parent-timeout as a bug, and
+		// flatten the CI's distinction between "chart packaging
+		// failed" and "parent context expired". Classify by the
+		// inner error so the same path stays ErrCodeInternal for
+		// real I/O failures and ErrCodeTimeout for cancellation.
+		if stderrors.Is(walkErr, context.Canceled) || stderrors.Is(walkErr, context.DeadlineExceeded) {
+			return nil, apperrors.PropagateOrWrap(walkErr, apperrors.ErrCodeTimeout, "chart packaging canceled")
+		}
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to build Helm chart tarball", walkErr)
+	}
+	if err := tw.Close(); err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to close tar writer", err)
+	}
+	if err := gz.Close(); err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to close gzip writer", err)
+	}
+	return buf.Bytes(), nil
+}
+
+// stageHelmOCIManifest pushes the chart layer + config blob into store
+// and packs an OCI 1.0 manifest with the Helm config mediaType. v1.0
+// is intentional: helm v3's pull path reads manifest.config.mediaType
+// to discover the chart artifactType. v1.1's separate `artifactType`
+// field with an empty config works for some registries but not all
+// (notably older registry:2 builds observed in #961's repro); v1.0
+// keeps the legacy interpretation where the config blob's mediaType
+// IS the artifact type.
+func stageHelmOCIManifest(ctx context.Context, store *oci.Store, meta *chartYAML, chartTGZ, configBlob []byte, aicrVersion string) (ociv1.Descriptor, error) {
+	layerDesc := ociv1.Descriptor{
+		MediaType: helmLayerMediaType,
+		Digest:    digest.FromBytes(chartTGZ),
+		Size:      int64(len(chartTGZ)),
+	}
+	if err := store.Push(ctx, layerDesc, bytes.NewReader(chartTGZ)); err != nil {
+		return ociv1.Descriptor{}, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to add chart layer to OCI store", err)
+	}
+
+	configDesc := ociv1.Descriptor{
+		MediaType: helmConfigMediaType,
+		Digest:    digest.FromBytes(configBlob),
+		Size:      int64(len(configBlob)),
+	}
+	if err := store.Push(ctx, configDesc, bytes.NewReader(configBlob)); err != nil {
+		return ociv1.Descriptor{}, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to add chart config to OCI store", err)
+	}
+
+	annotations := map[string]string{
+		ociv1.AnnotationCreated:           reproducibleTimestamp,
+		ociv1.AnnotationTitle:             meta.Name,
+		ociv1.AnnotationVersion:           meta.Version,
+		"org.opencontainers.image.vendor": "NVIDIA",
+	}
+	if meta.Description != "" {
+		annotations[ociv1.AnnotationDescription] = meta.Description
+	}
+	if aicrVersion != "" {
+		annotations["com.nvidia.aicr.version"] = aicrVersion
+	}
+
+	packOpts := oras.PackManifestOptions{
+		Layers:              []ociv1.Descriptor{layerDesc},
+		ConfigDescriptor:    &configDesc,
+		ManifestAnnotations: annotations,
+	}
+	manifestDesc, err := oras.PackManifest(ctx, store, oras.PackManifestVersion1_0, helmConfigMediaType, packOpts)
+	if err != nil {
+		return ociv1.Descriptor{}, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to pack Helm OCI manifest", err)
+	}
+	return manifestDesc, nil
+}
+
+// pushHelmOCIFromStore uploads the staged manifest tree from an OCI
+// Image Layout to the remote registry. Wraps the same retry policy
+// as the generic AICR push path so the two flows share transient-
+// failure handling.
+func pushHelmOCIFromStore(ctx context.Context, opts HelmChartOptions, store *oci.Store) (*PushResult, error) {
+	registryHost := stripProtocol(opts.Reference.Registry)
+	refString := fmt.Sprintf("%s/%s:%s", registryHost, opts.Reference.Repository, opts.Reference.Tag)
+
+	repo, err := remote.NewRepository(fmt.Sprintf("%s/%s", registryHost, opts.Reference.Repository))
+	if err != nil {
+		return nil, apperrors.Wrap(apperrors.ErrCodeInternal, "failed to initialize remote repository", err)
+	}
+	repo.PlainHTTP = opts.PlainHTTP
+
+	authClient, err := createAuthClientForHost(registryHost, opts.PlainHTTP, opts.InsecureTLS)
+	if err != nil {
+		slog.Warn("failed to initialize Docker credential store, continuing without authentication", "error", err)
+	}
+	repo.Client = authClient
+
+	copyOpts := oras.DefaultCopyOptions
+	copyOpts.Concurrency = defaults.OCIPushConcurrency
+	desc, err := copyWithRetry(ctx, store, opts.Reference.Tag, repo, opts.Reference.Tag, copyOpts, oras.Copy)
+	if err != nil {
+		return nil, err
+	}
+
+	return &PushResult{
+		Digest:    desc.Digest.String(),
+		MediaType: desc.MediaType,
+		Size:      desc.Size,
+		Reference: refString,
+	}, nil
+}
diff --git a/pkg/oci/helm_test.go b/pkg/oci/helm_test.go
new file mode 100644
index 000000000..d5833b0b3
--- /dev/null
+++ b/pkg/oci/helm_test.go
@@ -0,0 +1,795 @@
+// Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package oci
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	ociv1 "github.com/opencontainers/image-spec/specs-go/v1"
+	"gopkg.in/yaml.v3"
+	orasoci "oras.land/oras-go/v2/content/oci"
+)
+
+// openOCILayout creates (and returns) an OCI Image Layout store rooted
+// at dir. Mirrors how PackageAndPushHelmChart provisions its staging
+// store so the test exercises the production code path.
+func openOCILayout(dir string) (*orasoci.Store, error) {
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return nil, err
+	}
+	return orasoci.New(dir)
+}
+
+// readOCIManifest decodes the JSON manifest blob at
+// <storeDir>/blobs/sha256/<digest> back into a typed ociv1.Manifest.
+// Used by the helm tests to assert mediaTypes were set correctly
+// without standing up a registry.
+func readOCIManifest(t *testing.T, storeDir, digest string) ociv1.Manifest {
+	t.Helper()
+	blobPath := filepath.Join(storeDir, "blobs", "sha256", strings.TrimPrefix(digest, "sha256:"))
+	raw, err := os.ReadFile(blobPath)
+	if err != nil {
+		t.Fatalf("read manifest blob: %v", err)
+	}
+	var manifest ociv1.Manifest
+	if err := json.Unmarshal(raw, &manifest); err != nil {
+		t.Fatalf("unmarshal manifest: %v", err)
+	}
+	return manifest
+}
+
+// writeHelmChartFixture stages a minimal Helm chart on disk for the
+// tests below. Returns the chart's root directory. Caller supplies
+// chartName so tests can vary it where the chart-root prefix matters
+// (buildHelmChartTGZ). Version is fixed at "0.1.0" — the rewrite test
+// asserts that the on-disk version is REPLACED by the OCI tag, so the
+// starting value just needs to be something different from any tag a
+// test uses.
+func writeHelmChartFixture(t *testing.T, chartName string) string {
+	t.Helper()
+	dir := t.TempDir()
+	files := map[string]string{
+		"Chart.yaml": "apiVersion: v2\nname: " + chartName + "\nversion: 0.1.0" +
+			"\ndescription: AICR test chart\n",
+		"values.yaml":               "replicaCount: 1\n",
+		"templates/deployment.yaml": "kind: Deployment\n",
+		"templates/_helpers.tpl":    `{{ define "noop" }}{{ end }}` + "\n",
+	}
+	for rel, content := range files {
+		path := filepath.Join(dir, rel)
+		if mkdirErr := os.MkdirAll(filepath.Dir(path), 0o755); mkdirErr != nil {
+			t.Fatalf("mkdir %s: %v", filepath.Dir(path), mkdirErr)
+		}
+		if writeErr := os.WriteFile(path, []byte(content), 0o600); writeErr != nil {
+			t.Fatalf("write %s: %v", path, writeErr)
+		}
+	}
+	return dir
+}
+
+// TestLoadAndRewriteChartYAML_RewritesVersion verifies the OCI tag
+// invariant: helm OCI tags ARE chart versions, so the push flow must
+// rewrite Chart.yaml.version to match Reference.Tag. Catches
+// regressions where the rewrite is silently skipped and helm install
+// rejects the chart with a version-mismatch error.
+func TestLoadAndRewriteChartYAML_RewritesVersion(t *testing.T) {
+	dir := writeHelmChartFixture(t, "aicr-bundle")
+
+	meta, err := loadAndRewriteChartYAML(dir, "5bc50950-argocd-helm-oci")
+	if err != nil {
+		t.Fatalf("loadAndRewriteChartYAML() error = %v", err)
+	}
+	if meta.Version != "5bc50950-argocd-helm-oci" {
+		t.Errorf("meta.Version = %q, want %q", meta.Version, "5bc50950-argocd-helm-oci")
+	}
+	if meta.Name != "aicr-bundle" {
+		t.Errorf("meta.Name = %q, want aicr-bundle", meta.Name)
+	}
+
+	raw, err := os.ReadFile(filepath.Join(dir, "Chart.yaml"))
+	if err != nil {
+		t.Fatalf("read Chart.yaml: %v", err)
+	}
+	var onDisk chartYAML
+	if err := yaml.Unmarshal(raw, &onDisk); err != nil {
+		t.Fatalf("unmarshal rewritten Chart.yaml: %v", err)
+	}
+	if onDisk.Version != "5bc50950-argocd-helm-oci" {
+		t.Errorf("on-disk Chart.yaml version = %q, want rewrite to tag", onDisk.Version)
+	}
+}
+
+// TestLoadAndRewriteChartYAML_RejectsMissingName surfaces a malformed
+// Chart.yaml (missing name) as ErrCodeInvalidRequest so the CLI exits
+// non-zero before the (more expensive) tar.gz + push steps.
+func TestLoadAndRewriteChartYAML_RejectsMissingName(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "Chart.yaml"),
+		[]byte("apiVersion: v2\nversion: 0.1.0\n"), 0o600); err != nil {
+		t.Fatalf("write Chart.yaml: %v", err)
+	}
+	_, err := loadAndRewriteChartYAML(dir, "v1")
+	if err == nil {
+		t.Fatal("loadAndRewriteChartYAML() expected error for missing name")
+	}
+	if !strings.Contains(err.Error(), "name is required") {
+		t.Errorf("error = %v, want it to mention missing name", err)
+	}
+}
+
+// TestValidateHelmTag covers the helm-tag semver gate. Helm v3's
+// registry client filters tags via semver.StrictNewVersion before
+// surfacing them to `helm pull` / `helm install --version <tag>`;
+// non-semver tags silently disappear and the user sees `unable to
+// locate any tags in provided repository`. We catch the bad tag at
+// bundle time with a clear remediation hint instead.
+//
+// `v`-prefixed semver (e.g. `v1.2.3`) is accepted because helm itself
+// strips the prefix internally.
+func TestValidateHelmTag(t *testing.T) {
+	tests := []struct {
+		name    string
+		tag     string
+		wantErr bool
+	}{
+		{"plain semver", "1.2.3", false},
+		{"semver pre-release with dashes", "0.0.0-fe346a05-argocd-helm-oci", false},
+		{"semver with build metadata", "0.0.0+fe346a05.argocd-helm-oci", false},
+		{"v-prefixed semver (helm strips v)", "v1.2.3", false},
+		{"v-prefixed semver pre-release", "v0.0.0-rc1", false},
+		// Bug-reproducer: the kwok script's original tag scheme.
+		// helm v3 silently rejects it and reports "no tags found".
+		{"sha-deployer (kwok bug repro)", "fe346a05-argocd-helm-oci", true},
+		{"plain hash", "abc123", true},
+		{"empty", "", true},
+		{"latest", "latest", true},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := validateHelmTag(tt.tag)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("validateHelmTag(%q) error = %v, wantErr %v", tt.tag, err, tt.wantErr)
+			}
+			// On rejection, the message must include the remediation
+			// hint — that's how users find the way out of the
+			// "no tags found" rabbit hole.
+			if err != nil && !strings.Contains(err.Error(), "0.0.0-") {
+				t.Errorf("validateHelmTag(%q) error missing semver-wrap remediation: %v", tt.tag, err)
+			}
+		})
+	}
+}
+
+// TestBuildHelmChartTGZ_HasChartRootPrefix verifies the tarball obeys
+// helm's chart-root-prefix convention: every entry's path is
+// `<chart-name>/...`. Helm v3's pull/install rely on this to resolve
+// templates/ at the right level inside the extracted directory; an
+// unprefixed tarball would deserialize as a flat list of files that
+// Helm's loader can't locate templates/ inside.
+//
+// Uses a chart name that doesn't match the fixture's default
+// "aicr-bundle" so the test catches a regression that hardcodes the
+// prefix instead of using the chartName argument.
+func TestBuildHelmChartTGZ_HasChartRootPrefix(t *testing.T) {
+	const chartName = "my-renamed-chart"
+	dir := writeHelmChartFixture(t, chartName)
+
+	tgz, err := buildHelmChartTGZ(context.Background(), dir, chartName)
+	if err != nil {
+		t.Fatalf("buildHelmChartTGZ() error = %v", err)
+	}
+	entries := readTGZEntries(t, tgz)
+
+	wantPaths := map[string]bool{
+		chartName + "/Chart.yaml":                false,
+		chartName + "/values.yaml":               false,
+		chartName + "/templates/":                false,
+		chartName + "/templates/deployment.yaml": false,
+		chartName + "/templates/_helpers.tpl":    false,
+	}
+	for _, name := range entries {
+		if _, want := wantPaths[name]; want {
+			wantPaths[name] = true
+		}
+		if !strings.HasPrefix(name, chartName+"/") {
+			t.Errorf("entry %q missing chart-root prefix %q", name, chartName+"/")
+		}
+	}
+	for path, seen := range wantPaths {
+		if !seen {
+			t.Errorf("entry %q missing from tarball; got %v", path, entries)
+		}
+	}
+}
+
+// TestBuildHelmChartTGZ_Reproducible asserts the tar.gz output is
+// byte-identical across two builds AND that every tar header satisfies
+// the absolute invariants that reproducibility relies on. The
+// same-process byte equality is necessary but not sufficient — a
+// regression that caches `time.Now()` once at package-init would still
+// produce identical bytes across both builds in the same process, but
+// the next CI run would diverge. The header-level invariants below
+// catch that class of regression by checking concrete absolute values.
+func TestBuildHelmChartTGZ_Reproducible(t *testing.T) {
+	dir := writeHelmChartFixture(t, "aicr-bundle")
+	a, err := buildHelmChartTGZ(context.Background(), dir, "aicr-bundle")
+	if err != nil {
+		t.Fatalf("first build: %v", err)
+	}
+	b, err := buildHelmChartTGZ(context.Background(), dir, "aicr-bundle")
+	if err != nil {
+		t.Fatalf("second build: %v", err)
+	}
+	if !bytes.Equal(a, b) {
+		t.Fatalf("tarball bytes differ between builds (len %d vs %d)", len(a), len(b))
+	}
+
+	// Header-level absolute invariants. Walk the produced tar and
+	// assert every entry zeroes out all the fields a regression could
+	// pull from runtime state (mtime / accesstime / changetime,
+	// uid / gid / uname / gname). A bug that caches time.Now() at
+	// init would still pass the byte-equality above but trip these
+	// assertions.
+	gz, err := gzip.NewReader(bytes.NewReader(a))
+	if err != nil {
+		t.Fatalf("gzip.NewReader: %v", err)
+	}
+	defer gz.Close()
+	// gzip header's ModTime field must be the Go zero value — gzip's
+	// MTIME field is wall-clock time. If a regression sets it, every
+	// cross-run digest diverges even if every tar header is clean.
+	if !gz.ModTime.IsZero() {
+		t.Errorf("gzip header ModTime = %v, want zero (reproducibility)", gz.ModTime)
+	}
+
+	// noTime returns true for either Go's zero time.Time (year 1) or
+	// Unix epoch (year 1970). The tar wire format stores time as
+	// seconds since epoch, so the Go reader normalizes an unset header
+	// to time.Unix(0, 0) — which is NOT IsZero() but IS still the
+	// "no time recorded" sentinel. We accept either.
+	noTime := func(tm time.Time) bool { return tm.IsZero() || tm.Unix() == 0 }
+
+	tr := tar.NewReader(gz)
+	var entries int
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatalf("tar.Next: %v", err)
+		}
+		entries++
+		if !noTime(hdr.ModTime) {
+			t.Errorf("entry %q ModTime = %v (unix=%d), want zero or epoch", hdr.Name, hdr.ModTime, hdr.ModTime.Unix())
+		}
+		if !noTime(hdr.AccessTime) {
+			t.Errorf("entry %q AccessTime = %v (unix=%d), want zero or epoch", hdr.Name, hdr.AccessTime, hdr.AccessTime.Unix())
+		}
+		if !noTime(hdr.ChangeTime) {
+			t.Errorf("entry %q ChangeTime = %v (unix=%d), want zero or epoch", hdr.Name, hdr.ChangeTime, hdr.ChangeTime.Unix())
+		}
+		if hdr.Uid != 0 {
+			t.Errorf("entry %q Uid = %d, want 0", hdr.Name, hdr.Uid)
+		}
+		if hdr.Gid != 0 {
+			t.Errorf("entry %q Gid = %d, want 0", hdr.Name, hdr.Gid)
+		}
+		if hdr.Uname != "" {
+			t.Errorf("entry %q Uname = %q, want empty", hdr.Name, hdr.Uname)
+		}
+		if hdr.Gname != "" {
+			t.Errorf("entry %q Gname = %q, want empty", hdr.Name, hdr.Gname)
+		}
+	}
+	if entries == 0 {
+		t.Fatal("walked zero tar entries; fixture or builder produced empty archive")
+	}
+}
+
+// TestStageHelmOCIManifest_HasHelmMediaTypes stages the chart layer
+// + helm.config.v1+json into an OCI Image Layout and asserts the
+// manifest carries the Helm mediaTypes. `helm pull` reads
+// manifest.config.mediaType to detect the chart artifactType — a
+// regression that flips back to the AICR artifactType is exactly the
+// bug #961 surfaced. We exercise the staging step in isolation (no
+// network push) because the registry-side push path is covered by
+// PushFromStore's existing tests.
+func TestStageHelmOCIManifest_HasHelmMediaTypes(t *testing.T) {
+	storeDir := t.TempDir()
+	store, err := openOCILayout(storeDir)
+	if err != nil {
+		t.Fatalf("openOCILayout: %v", err)
+	}
+
+	meta := &chartYAML{
+		APIVersion: "v2",
+		Name:       "aicr-bundle",
+		Version:    "v9.9.9",
+	}
+	configBlob, err := json.Marshal(meta)
+	if err != nil {
+		t.Fatalf("marshal chartYAML: %v", err)
+	}
+
+	manifestDesc, err := stageHelmOCIManifest(context.Background(), store, meta,
+		[]byte("fake-chart-tgz-bytes"), configBlob, "v0.9.0-test")
+	if err != nil {
+		t.Fatalf("stageHelmOCIManifest() error = %v", err)
+	}
+
+	manifest := readOCIManifest(t, storeDir, manifestDesc.Digest.String())
+	if manifest.Config.MediaType != helmConfigMediaType {
+		t.Errorf("config.mediaType = %q, want %q (helm pull rejects otherwise — see #961)",
+			manifest.Config.MediaType, helmConfigMediaType)
+	}
+	if len(manifest.Layers) != 1 {
+		t.Fatalf("len(manifest.Layers) = %d, want 1", len(manifest.Layers))
+	}
+	if manifest.Layers[0].MediaType != helmLayerMediaType {
+		t.Errorf("layer.mediaType = %q, want %q",
+			manifest.Layers[0].MediaType, helmLayerMediaType)
+	}
+	// AICR's custom artifactType MUST NOT appear anywhere in the
+	// pushed manifest — its presence is the regression signal.
+	if manifest.Config.MediaType == artifactType {
+		t.Errorf("Helm OCI manifest still uses AICR artifactType %q", artifactType)
+	}
+}
+
+// TestPackageAndPushHelmChart_RewritesChartYAMLOnDisk verifies the
+// on-disk side effect required for `helm install --version <tag>` to
+// resolve to the same chart bytes that `helm pull` validated. Without
+// the rewrite, a recipe-version → OCI-tag mismatch (e.g., recipe
+// metadata "0.1.0" vs CI tag "5bc50950-argocd-helm-oci") leaves the
+// pushed config.version pointing at "0.1.0" while the tag is the CI
+// SHA — helm install would fail with a version mismatch.
+//
+// Uses a stub OCI layout (no remote push) by exercising
+// loadAndRewriteChartYAML directly, then asserting the rewrite is
+// visible on disk for the next step.
+func TestPackageAndPushHelmChart_RewritesChartYAMLOnDisk(t *testing.T) {
+	source := writeHelmChartFixture(t, "aicr-bundle")
+
+	meta, err := loadAndRewriteChartYAML(source, "5bc50950-argocd-helm-oci")
+	if err != nil {
+		t.Fatalf("loadAndRewriteChartYAML() error = %v", err)
+	}
+	if meta.Version != "5bc50950-argocd-helm-oci" {
+		t.Errorf("returned meta.Version = %q, want rewrite to tag", meta.Version)
+	}
+
+	raw, err := os.ReadFile(filepath.Join(source, "Chart.yaml"))
+	if err != nil {
+		t.Fatalf("read Chart.yaml: %v", err)
+	}
+	var diskMeta chartYAML
+	if err := yaml.Unmarshal(raw, &diskMeta); err != nil {
+		t.Fatalf("unmarshal Chart.yaml: %v", err)
+	}
+	if diskMeta.Version != "5bc50950-argocd-helm-oci" {
+		t.Errorf("on-disk Chart.yaml version = %q, want rewrite to OCI tag", diskMeta.Version)
+	}
+}
+
+// TestPackageAndPushHelmChart_RejectsInvalidOptions covers the early
+// validation paths in PackageAndPushHelmChart that surface caller errors
+// before any registry I/O. Helm OCI bundles get pushed from CI lanes,
+// so cheap fast-fail on bad inputs prevents masking real errors with
+// remote-side ones.
+func TestPackageAndPushHelmChart_RejectsInvalidOptions(t *testing.T) {
+	source := writeHelmChartFixture(t, "aicr-bundle")
+
+	tests := []struct {
+		name string
+		opts HelmChartOptions
+		want string
+	}{
+		{
+			name: "nil reference",
+			opts: HelmChartOptions{
+				SourceDir: source,
+				OutputDir: t.TempDir(),
+			},
+			want: "OCI reference is required",
+		},
+		{
+			name: "non-OCI reference",
+			opts: HelmChartOptions{
+				SourceDir: source,
+				OutputDir: t.TempDir(),
+				Reference: &Reference{
+					Registry:   "localhost:5000",
+					Repository: "test/chart",
+					Tag:        "1.0.0",
+					IsOCI:      false,
+				},
+			},
+			want: "OCI reference is required",
+		},
+		{
+			name: "empty tag",
+			opts: HelmChartOptions{
+				SourceDir: source,
+				OutputDir: t.TempDir(),
+				Reference: &Reference{
+					Registry:   "localhost:5000",
+					Repository: "test/chart",
+					IsOCI:      true,
+				},
+			},
+			want: "tag is required",
+		},
+		{
+			name: "non-semver tag",
+			opts: HelmChartOptions{
+				SourceDir: source,
+				OutputDir: t.TempDir(),
+				Reference: &Reference{
+					Registry:   "localhost:5000",
+					Repository: "test/chart",
+					Tag:        "abc123-not-semver",
+					IsOCI:      true,
+				},
+			},
+			want: "Helm OCI requires a semver-compatible tag",
+		},
+		{
+			name: "invalid registry",
+			opts: HelmChartOptions{
+				SourceDir: source,
+				OutputDir: t.TempDir(),
+				Reference: &Reference{
+					Registry:   "spaces are bad",
+					Repository: "test/chart",
+					Tag:        "1.0.0",
+					IsOCI:      true,
+				},
+			},
+			want: "invalid",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := PackageAndPushHelmChart(context.Background(), tt.opts)
+			if err == nil {
+				t.Fatal("PackageAndPushHelmChart() expected error, got nil")
+			}
+			if result != nil {
+				t.Errorf("PackageAndPushHelmChart() result = %+v, want nil on error", result)
+			}
+			if !strings.Contains(err.Error(), tt.want) {
+				t.Errorf("PackageAndPushHelmChart() error = %q, want substring %q", err.Error(), tt.want)
+			}
+		})
+	}
+}
+
+// TestPackageAndPushHelmChart_MissingChartYAML covers the source-dir
+// validation path: if Chart.yaml is absent the push must fail fast at
+// the rewrite step, not after staging an empty manifest.
+func TestPackageAndPushHelmChart_MissingChartYAML(t *testing.T) {
+	emptyDir := t.TempDir() // no Chart.yaml
+	result, err := PackageAndPushHelmChart(context.Background(), HelmChartOptions{
+		SourceDir: emptyDir,
+		OutputDir: t.TempDir(),
+		Reference: &Reference{
+			Registry:   "localhost:5000",
+			Repository: "test/chart",
+			Tag:        "1.0.0",
+			IsOCI:      true,
+		},
+	})
+	if err == nil {
+		t.Fatal("PackageAndPushHelmChart() expected error for missing Chart.yaml, got nil")
+	}
+	if result != nil {
+		t.Errorf("PackageAndPushHelmChart() result = %+v, want nil on error", result)
+	}
+	if !strings.Contains(err.Error(), "Chart.yaml") {
+		t.Errorf("PackageAndPushHelmChart() error = %q, want to mention Chart.yaml", err.Error())
+	}
+}
+
+// TestPackageAndPushHelmChart_SourceEqualsOutputDir guards against the
+// `helm-chart-work/helm-chart-work/...` recursion bug observed in
+// production: the CLI invokes PackageAndPushHelmChart with SourceDir
+// == OutputDir (both point at `./bundle`). An earlier revision placed
+// the staging dir under OutputDir, so copyDir's recursive walk picked
+// up the freshly-created staging dir and copied it into itself until
+// path length hit ENAMETOOLONG. Stage MUST be outside SourceDir.
+func TestPackageAndPushHelmChart_SourceEqualsOutputDir(t *testing.T) {
+	dir := writeHelmChartFixture(t, "aicr-bundle")
+	registry := newFakeOCIRegistry(t)
+	defer registry.Close()
+	registryHost := strings.TrimPrefix(registry.URL, "http://")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	// SourceDir == OutputDir — the CLI's actual call shape.
+	result, err := PackageAndPushHelmChart(ctx, HelmChartOptions{
+		SourceDir: dir,
+		OutputDir: dir,
+		Reference: &Reference{
+			Registry:   registryHost,
+			Repository: "test/aicr-bundle",
+			Tag:        "1.0.0",
+			IsOCI:      true,
+		},
+		PlainHTTP: true,
+		Version:   "v0.9.0-test",
+	})
+	if err != nil {
+		t.Fatalf("PackageAndPushHelmChart() error = %v (SourceDir==OutputDir recursion regression)", err)
+	}
+	if result == nil {
+		t.Fatal("PackageAndPushHelmChart() result = nil")
+	}
+
+	// Caller's source tree must NOT contain a stray staging dir after
+	// the push — that's the on-disk evidence of the recursion bug.
+	if _, statErr := os.Stat(filepath.Join(dir, "helm-chart-work")); statErr == nil {
+		t.Errorf("found leftover helm-chart-work under SourceDir — staging must happen outside SourceDir")
+	}
+}
+
+// TestPackageAndPushHelmChart_PushesAgainstFakeRegistry exercises the
+// full happy path including the network roundtrip against an httptest
+// OCI registry. Beyond coverage for pushHelmOCIFromStore (the
+// rest-of-the-function dead-after-staging path that was 0%), the
+// returned PackageAndPushResult is asserted on so unparam sees the
+// result as consumed in-package — see #843's lint follow-up.
+func TestPackageAndPushHelmChart_PushesAgainstFakeRegistry(t *testing.T) {
+	source := writeHelmChartFixture(t, "aicr-bundle")
+	registry := newFakeOCIRegistry(t)
+	defer registry.Close()
+
+	registryHost := strings.TrimPrefix(registry.URL, "http://")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	result, err := PackageAndPushHelmChart(ctx, HelmChartOptions{
+		SourceDir: source,
+		OutputDir: t.TempDir(),
+		Reference: &Reference{
+			Registry:   registryHost,
+			Repository: "test/aicr-bundle",
+			Tag:        "1.0.0",
+			IsOCI:      true,
+		},
+		PlainHTTP: true,
+		Version:   "v0.9.0-test",
+	})
+	if err != nil {
+		t.Fatalf("PackageAndPushHelmChart() error = %v", err)
+	}
+	if result == nil {
+		t.Fatal("PackageAndPushHelmChart() result = nil, want non-nil on success")
+	}
+	if !strings.HasPrefix(result.Digest, "sha256:") {
+		t.Errorf("result.Digest = %q, want sha256: prefix", result.Digest)
+	}
+	if result.MediaType != ociv1.MediaTypeImageManifest {
+		t.Errorf("result.MediaType = %q, want %q", result.MediaType, ociv1.MediaTypeImageManifest)
+	}
+	if !strings.Contains(result.Reference, "test/aicr-bundle:1.0.0") {
+		t.Errorf("result.Reference = %q, want to contain test/aicr-bundle:1.0.0", result.Reference)
+	}
+	if result.Size <= 0 {
+		t.Errorf("result.Size = %d, want > 0", result.Size)
+	}
+
+	// #961 regression check: the pushed manifest's config.mediaType
+	// MUST be the Helm OCI config mediaType. `helm pull` reads this
+	// field to discover the chart artifactType — if a regression
+	// flips it back to AICR's generic artifactType, helm silently
+	// drops the tag from `/v2/<name>/tags/list` and the user sees
+	// "unable to locate any tags in provided repository" even though
+	// the artifact is in the registry. The push-side fake-registry
+	// test would have passed silently before this check existed.
+	//
+	// Round-trip the manifest: GET it back from the fake registry,
+	// decode, assert config.mediaType + layer.mediaType.
+	manifestURL := registry.URL + "/v2/test/aicr-bundle/manifests/1.0.0"
+	req, reqErr := http.NewRequestWithContext(ctx, http.MethodGet, manifestURL, nil)
+	if reqErr != nil {
+		t.Fatalf("new manifest GET request: %v", reqErr)
+	}
+	req.Header.Set("Accept", ociv1.MediaTypeImageManifest)
+	resp, getErr := http.DefaultClient.Do(req)
+	if getErr != nil {
+		t.Fatalf("manifest GET: %v", getErr)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		t.Fatalf("manifest GET status = %d, want 200", resp.StatusCode)
+	}
+	manifestBytes, readErr := io.ReadAll(resp.Body)
+	if readErr != nil {
+		t.Fatalf("manifest GET read: %v", readErr)
+	}
+	var manifest ociv1.Manifest
+	if jsonErr := json.Unmarshal(manifestBytes, &manifest); jsonErr != nil {
+		t.Fatalf("manifest JSON decode: %v\nbody: %s", jsonErr, manifestBytes)
+	}
+	if manifest.Config.MediaType != helmConfigMediaType {
+		t.Errorf("pushed manifest config.mediaType = %q, want %q (#961 regression — helm pull would drop this tag)",
+			manifest.Config.MediaType, helmConfigMediaType)
+	}
+	if len(manifest.Layers) != 1 {
+		t.Fatalf("pushed manifest has %d layers, want 1", len(manifest.Layers))
+	}
+	if manifest.Layers[0].MediaType != helmLayerMediaType {
+		t.Errorf("pushed manifest layer[0].mediaType = %q, want %q",
+			manifest.Layers[0].MediaType, helmLayerMediaType)
+	}
+}
+
+// newFakeOCIRegistry stands up an httptest server that handles just
+// enough of the OCI distribution-spec to accept a push: blob uploads
+// (POST/PATCH/PUT under /v2/<name>/blobs/uploads/), blob HEADs (to
+// satisfy oras's existence check), and manifest PUTs. Anything else
+// returns 404 — the goal isn't a conformant registry, just the
+// endpoints PackageAndPushHelmChart's push path actually touches.
+func newFakeOCIRegistry(t *testing.T) *httptest.Server {
+	t.Helper()
+	// oras's push path uploads multiple blobs concurrently
+	// (configured via copyOpts.Concurrency = OCIPushConcurrency). The
+	// handler closure runs on the httptest goroutine pool, so the maps
+	// below MUST be lock-protected — without the mutex, `go test -race`
+	// catches concurrent map writes during chart-layer + config-blob
+	// uploads.
+	var mu sync.Mutex
+	blobs := make(map[string][]byte)
+	manifests := make(map[string][]byte) // keyed by "<name>:<reference>"
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		path := r.URL.Path
+		switch {
+		case path == "/v2/" || path == "/v2":
+			w.Header().Set("Docker-Distribution-API-Version", "registry/2.0")
+			w.WriteHeader(http.StatusOK)
+			return
+
+		case strings.HasSuffix(path, "/blobs/uploads/") && r.Method == http.MethodPost:
+			// Start an upload session. The session URL is monotonic;
+			// embed a counter so successive uploads don't collide.
+			mu.Lock()
+			session := strconv.Itoa(len(blobs) + 1)
+			mu.Unlock()
+			w.Header().Set("Location", path+session)
+			w.WriteHeader(http.StatusAccepted)
+			return
+
+		case strings.Contains(path, "/blobs/uploads/") && (r.Method == http.MethodPatch || r.Method == http.MethodPut):
+			// Drain the body — the digest is in the query string on PUT.
+			body, _ := io.ReadAll(r.Body)
+			if r.Method == http.MethodPut {
+				digest := r.URL.Query().Get("digest")
+				if digest != "" {
+					mu.Lock()
+					blobs[digest] = body
+					mu.Unlock()
+				}
+				w.Header().Set("Docker-Content-Digest", digest)
+				w.WriteHeader(http.StatusCreated)
+				return
+			}
+			w.Header().Set("Range", fmt.Sprintf("0-%d", len(body)-1))
+			w.WriteHeader(http.StatusAccepted)
+			return
+
+		case strings.Contains(path, "/blobs/sha256:") && (r.Method == http.MethodHead || r.Method == http.MethodGet):
+			// blob existence check / fetch.
+			_, digest, _ := strings.Cut(path, "/blobs/")
+			mu.Lock()
+			data, ok := blobs[digest]
+			mu.Unlock()
+			if ok {
+				w.Header().Set("Docker-Content-Digest", digest)
+				w.Header().Set("Content-Length", strconv.Itoa(len(data)))
+				if r.Method == http.MethodGet {
+					_, _ = w.Write(data)
+				} else {
+					w.WriteHeader(http.StatusOK)
+				}
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+			return
+
+		case strings.Contains(path, "/manifests/") && r.Method == http.MethodPut:
+			before, reference, _ := strings.Cut(path, "/manifests/")
+			name := strings.TrimPrefix(before, "/v2/")
+			body, _ := io.ReadAll(r.Body)
+			mu.Lock()
+			manifests[name+":"+reference] = body
+			mu.Unlock()
+			digest := digestSHA256(body)
+			w.Header().Set("Docker-Content-Digest", digest)
+			w.WriteHeader(http.StatusCreated)
+			return
+
+		case strings.Contains(path, "/manifests/") && (r.Method == http.MethodHead || r.Method == http.MethodGet):
+			before, reference, _ := strings.Cut(path, "/manifests/")
+			name := strings.TrimPrefix(before, "/v2/")
+			mu.Lock()
+			data, ok := manifests[name+":"+reference]
+			mu.Unlock()
+			if ok {
+				w.Header().Set("Docker-Content-Digest", digestSHA256(data))
+				w.Header().Set("Content-Type", ociv1.MediaTypeImageManifest)
+				w.Header().Set("Content-Length", strconv.Itoa(len(data)))
+				if r.Method == http.MethodGet {
+					_, _ = w.Write(data)
+				} else {
+					w.WriteHeader(http.StatusOK)
+				}
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		w.WriteHeader(http.StatusNotFound)
+	})
+	return httptest.NewServer(mux)
+}
+
+func digestSHA256(data []byte) string {
+	sum := sha256.Sum256(data)
+	return "sha256:" + hex.EncodeToString(sum[:])
+}
+
+func readTGZEntries(t *testing.T, tgz []byte) []string {
+	t.Helper()
+	gz, err := gzip.NewReader(strings.NewReader(string(tgz)))
+	if err != nil {
+		t.Fatalf("gzip.NewReader: %v", err)
+	}
+	defer gz.Close()
+	tr := tar.NewReader(gz)
+	var names []string
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatalf("tar.Next: %v", err)
+		}
+		names = append(names, hdr.Name)
+	}
+	return names
+}