Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/actions/pr-gate/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ outputs:
should_run:
description: "true if the workflow should proceed, false otherwise"
value: ${{ steps.gate.outputs.should_run }}
labels_json:
description: "JSON array of PR label names for push-triggered mirror runs, or [] otherwise"
value: ${{ steps.gate.outputs.labels_json }}

runs:
using: composite
Expand All @@ -35,20 +38,23 @@ runs:
REQUIRED_LABEL: ${{ inputs.required_label }}
run: |
if [ "$EVENT_NAME" != "push" ]; then
echo "labels_json=[]" >> "$GITHUB_OUTPUT"
echo "should_run=true" >> "$GITHUB_OUTPUT"
exit 0
fi

if [ "$GET_PR_INFO_OUTCOME" != "success" ]; then
echo "labels_json=[]" >> "$GITHUB_OUTPUT"
echo "should_run=false" >> "$GITHUB_OUTPUT"
exit 0
fi

head_sha="$(jq -r '.head.sha' <<< "$PR_INFO")"
labels_json="$(jq -c '[.labels[].name]' <<< "$PR_INFO")"
if [ -z "$REQUIRED_LABEL" ]; then
has_label=true
else
has_label="$(jq -r --arg L "$REQUIRED_LABEL" '[.labels[].name] | index($L) != null' <<< "$PR_INFO")"
has_label="$(jq -r --arg L "$REQUIRED_LABEL" 'index($L) != null' <<< "$labels_json")"
fi

# Only trust copied pull-request/* pushes that still match the PR head
Expand All @@ -59,4 +65,5 @@ runs:
should_run=false
fi

echo "labels_json=$labels_json" >> "$GITHUB_OUTPUT"
echo "should_run=$should_run" >> "$GITHUB_OUTPUT"
7 changes: 5 additions & 2 deletions .github/workflows/branch-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,14 @@ jobs:
- name: Cache Rust target and registry
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2
with:
# Separate caches for clippy (check-like) vs test (full build)
# so they don't thrash each other's artifacts
# Keep branch-check caches partitioned by runner architecture; lint
# and test intentionally share the same job-local target directory.
shared-key: rust-checks-${{ matrix.runner }}
# Cache the sccache directory too
cache-directories: .cache/sccache
# Preserve compiled artifacts from failed lint/test runs so the next
# push to the same PR branch does not start from a cold cache.
cache-on-failure: "true"

- name: Format
run: mise run rust:format:check
Expand Down
122 changes: 112 additions & 10 deletions .github/workflows/branch-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:

permissions: {}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
pr_metadata:
name: Resolve PR metadata
Expand All @@ -17,44 +21,142 @@ jobs:
pull-requests: read
outputs:
should_run: ${{ steps.gate.outputs.should_run }}
run_core_e2e: ${{ steps.labels.outputs.run_core_e2e }}
run_gpu_e2e: ${{ steps.labels.outputs.run_gpu_e2e }}
run_any_e2e: ${{ steps.labels.outputs.run_any_e2e }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- id: gate
uses: ./.github/actions/pr-gate
with:
required_label: test:e2e
- id: labels
if: steps.gate.outputs.should_run == 'true'
env:
EVENT_NAME: ${{ github.event_name }}
LABELS_JSON: ${{ steps.gate.outputs.labels_json }}
shell: bash
run: |
set -euo pipefail
if [ "$EVENT_NAME" != "push" ]; then
run_core_e2e=true
run_gpu_e2e=true
else
run_core_e2e="$(jq -r 'index("test:e2e") != null' <<< "$LABELS_JSON")"
run_gpu_e2e="$(jq -r 'index("test:e2e-gpu") != null' <<< "$LABELS_JSON")"
fi
if [ "$run_core_e2e" = "true" ] || [ "$run_gpu_e2e" = "true" ]; then
run_any_e2e=true
else
run_any_e2e=false
fi
{
echo "run_core_e2e=$run_core_e2e"
echo "run_gpu_e2e=$run_gpu_e2e"
echo "run_any_e2e=$run_any_e2e"
} >> "$GITHUB_OUTPUT"

build-gateway:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_core_e2e == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: gateway
platform: linux/arm64
publish-manifest: false
image-tag: ${{ github.sha }}

build-supervisor:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_any_e2e == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: supervisor
platform: linux/arm64
publish-manifest: false
image-tag: ${{ github.sha }}

e2e:
needs: [pr_metadata, build-gateway, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true'
if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_core_e2e == 'true'
permissions:
contents: read
packages: read
uses: ./.github/workflows/e2e-test.yml
with:
image-tag: ${{ github.sha }}-arm64
image-tag: ${{ github.sha }}
runner: linux-arm64-cpu8

gpu-e2e:
needs: [pr_metadata, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_gpu_e2e == 'true'
permissions:
contents: read
packages: read
uses: ./.github/workflows/e2e-gpu-test.yaml
with:
image-tag: ${{ github.sha }}

kubernetes-e2e:
needs: [pr_metadata, build-gateway, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_core_e2e == 'true'
permissions:
contents: read
packages: read
uses: ./.github/workflows/e2e-kubernetes-test.yml
with:
image-tag: ${{ github.sha }}

core-e2e-result:
name: Core E2E result
needs: [pr_metadata, build-gateway, build-supervisor, e2e, kubernetes-e2e]
if: always() && needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_core_e2e == 'true'
runs-on: ubuntu-latest
steps:
- name: Verify core E2E jobs
env:
BUILD_GATEWAY_RESULT: ${{ needs.build-gateway.result }}
BUILD_SUPERVISOR_RESULT: ${{ needs.build-supervisor.result }}
E2E_RESULT: ${{ needs.e2e.result }}
KUBERNETES_E2E_RESULT: ${{ needs.kubernetes-e2e.result }}
run: |
set -euo pipefail
failed=0
for item in \
"build-gateway:$BUILD_GATEWAY_RESULT" \
"build-supervisor:$BUILD_SUPERVISOR_RESULT" \
"e2e:$E2E_RESULT" \
"kubernetes-e2e:$KUBERNETES_E2E_RESULT"; do
name="${item%%:*}"
result="${item#*:}"
if [ "$result" != "success" ]; then
echo "::error::$name concluded $result"
failed=1
fi
done
exit "$failed"

gpu-e2e-result:
name: GPU E2E result
needs: [pr_metadata, build-supervisor, gpu-e2e]
if: always() && needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_gpu_e2e == 'true'
runs-on: ubuntu-latest
steps:
- name: Verify GPU E2E jobs
env:
BUILD_SUPERVISOR_RESULT: ${{ needs.build-supervisor.result }}
GPU_E2E_RESULT: ${{ needs.gpu-e2e.result }}
run: |
set -euo pipefail
failed=0
for item in \
"build-supervisor:$BUILD_SUPERVISOR_RESULT" \
"gpu-e2e:$GPU_E2E_RESULT"; do
name="${item%%:*}"
result="${item#*:}"
if [ "$result" != "success" ]; then
echo "::error::$name concluded $result"
failed=1
fi
done
exit "$failed"
Original file line number Diff line number Diff line change
@@ -1,65 +1,34 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: Branch Kubernetes E2E
name: Kubernetes E2E Test

on:
push:
branches:
- "pull-request/[0-9]+"
workflow_dispatch: {}

permissions: {}
workflow_call:
inputs:
image-tag:
description: "Image tag to test (typically the commit SHA)"
required: true
type: string
runner:
description: "GitHub Actions runner label"
required: false
type: string
default: "linux-amd64-cpu8"
checkout-ref:
description: "Git ref to check out for test inputs (defaults to the workflow SHA)"
required: false
type: string
default: ""

permissions:
contents: read
packages: read

jobs:
pr_metadata:
name: Resolve PR metadata
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
outputs:
should_run: ${{ steps.gate.outputs.should_run }}
steps:
- uses: actions/checkout@v6

- id: gate
uses: ./.github/actions/pr-gate
with:
required_label: test:e2e-kubernetes

build-gateway:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: gateway
platform: linux/amd64
publish-manifest: false

build-supervisor:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: supervisor
platform: linux/amd64
publish-manifest: false

kubernetes-e2e:
e2e-kubernetes:
name: Kubernetes E2E (Rust smoke)
needs: [pr_metadata, build-gateway, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true'
# Bare runner: running kind-in-container hits nested-Docker / kubeconfig
# complications. The runner has Docker; mise installs helm, kubectl, and
# the Rust toolchain.
runs-on: linux-amd64-cpu8
runs-on: ${{ inputs.runner }}
timeout-minutes: 60
permissions:
contents: read
Expand All @@ -68,7 +37,9 @@ jobs:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
KIND_CLUSTER_NAME: kube-e2e-${{ github.run_id }}
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
ref: ${{ inputs['checkout-ref'] || github.sha }}

- name: Install mise
run: |
Expand Down Expand Up @@ -100,7 +71,7 @@ jobs:

# mise.toml sets KUBECONFIG="{{config_root}}/kubeconfig"; helm/kind-action
# writes to ~/.kube/config. Materialize the kind context at the mise path
# so `mise run e2e:kubernetes` (and the wrapper's `kubectl --context=`)
# so `mise run e2e:kubernetes` (and the wrapper's `kubectl --context=...`)
# finds the kind cluster.
- name: Export kind kubeconfig to mise path
run: |
Expand All @@ -112,16 +83,14 @@ jobs:
run: |
set -euo pipefail
for component in gateway supervisor; do
src="ghcr.io/nvidia/openshell/${component}:${{ github.sha }}-amd64"
bare="ghcr.io/nvidia/openshell/${component}:${{ github.sha }}"
docker pull "$src"
docker tag "$src" "$bare"
kind load docker-image "$bare" --name "$KIND_CLUSTER_NAME"
image="ghcr.io/nvidia/openshell/${component}:${{ inputs.image-tag }}"
docker pull --platform linux/amd64 "$image"
kind load docker-image "$image" --name "$KIND_CLUSTER_NAME"
done

- name: Run Kubernetes E2E (Rust smoke)
env:
OPENSHELL_E2E_KUBE_CONTEXT: kind-${{ env.KIND_CLUSTER_NAME }}
IMAGE_TAG: ${{ github.sha }}
IMAGE_TAG: ${{ inputs.image-tag }}
OPENSHELL_REGISTRY: ghcr.io/nvidia/openshell
run: mise run --no-deps --skip-deps e2e:kubernetes
19 changes: 13 additions & 6 deletions .github/workflows/e2e-label-help.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: E2E Label Help

# When a `test:e2e*` label is applied, post a PR comment
# When an E2E label is applied, post a PR comment
# telling the maintainer the next manual step. We don't dispatch the workflow
# ourselves: a workflow_dispatch-triggered run does not surface in the PR's
# Checks tab, so we'd lose in-progress visibility. Instead we point the
Expand All @@ -19,7 +19,7 @@ permissions: {}
jobs:
hint:
name: Post next-step hint for E2E label
if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' || github.event.label.name == 'test:e2e-kubernetes'
if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu'
runs-on: ubuntu-latest
permissions:
pull-requests: write
Expand All @@ -37,10 +37,17 @@ jobs:
run: |
set -euo pipefail

workflow_file=branch-e2e.yml
workflow_name="Branch E2E Checks"
case "$LABEL_NAME" in
test:e2e) workflow_file=branch-e2e.yml; workflow_name="Branch E2E Checks" ;;
test:e2e-gpu) workflow_file=test-gpu.yml; workflow_name="GPU Test" ;;
test:e2e-kubernetes) workflow_file=branch-kubernetes-e2e.yml; workflow_name="Branch Kubernetes E2E" ;;
test:e2e)
suite_summary="the standard E2E suite"
build_summary="gateway and supervisor images"
;;
test:e2e-gpu)
suite_summary="GPU E2E"
build_summary="supervisor image"
;;
*) echo "Unrecognized label $LABEL_NAME"; exit 1 ;;
esac

Expand All @@ -62,7 +69,7 @@ jobs:
workflow_link="[$workflow_name](https://github.com/$GH_REPO/actions/workflows/$workflow_file)"
instructions="Open $workflow_link, find the run for commit \`$short_pr\`, and click **Re-run all jobs** to execute with the label set."
fi
body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The matching required CI gate status on this PR will flip green automatically once the run finishes."
body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The run will execute $suite_summary after building the required $build_summary once. The matching required CI gate status on this PR will flip green automatically once the run finishes."
fi

gh pr comment "$PR_NUMBER" --body "$body"
Loading
Loading