diff --git a/.github/workflows/cli-build.yml b/.github/workflows/cli-build.yml index 5bc7a9f0..2463f8ee 100644 --- a/.github/workflows/cli-build.yml +++ b/.github/workflows/cli-build.yml @@ -35,6 +35,15 @@ on: # caller downloads the uploaded `relayburn-cli-` artifacts and # stages them into `packages/relayburn/npm//bin/` before # `npm pack` + `npm publish`. + inputs: + release_version: + description: >- + Post-bump release version (e.g. "2.1.0") to bake into the + binary via CARGO_PKG_VERSION. Optional — when empty (PR/push + runs) we skip the lockstep sed and build with the on-disk + workspace version unchanged. + required: false + type: string permissions: contents: read @@ -94,6 +103,28 @@ jobs: cargo-cli-${{ runner.os }}-${{ matrix.target }}- cargo-cli-${{ runner.os }}- + - name: Lockstep Cargo.toml to release version + # When invoked via the publish workflow, sed the workspace + # `[workspace.package].version` (and the path-dep `version = "X.Y"` + # pin in `crates/relayburn-cli/Cargo.toml`) so the `burn` binary + # built below carries the post-bump `CARGO_PKG_VERSION`. Without + # this, clap's `version,` directive bakes the pre-bump version + # into the binary and `burn --version` reports the previous + # release. Mirrors the regex used in publish.yml's "Lockstep the + # Rust workspace" step that runs against the published commit. + # + # Skipped when `release_version` is empty (PR/push runs) — the + # on-disk workspace version is correct in that case. + if: inputs.release_version != '' + run: | + set -euo pipefail + RUST_VER="${{ inputs.release_version }}" + RUST_MINOR=$(echo "$RUST_VER" | awk -F. '{print $1"."$2}') + echo "Rust workspace lockstep: $RUST_VER (minor pin: $RUST_MINOR)" + sed -i.bak -E "s/^version = \"[^\"]+\"$/version = \"$RUST_VER\"/" Cargo.toml + sed -i.bak -E "s|(relayburn-sdk = \\{ path = \"\\.\\./relayburn-sdk\", version = \")[^\"]+(\" \\})|\\1$RUST_MINOR\\2|" crates/relayburn-cli/Cargo.toml + rm -f Cargo.toml.bak crates/relayburn-cli/Cargo.toml.bak + - name: Build burn binary for ${{ matrix.target }} # The binary name is `burn` (the `[[bin]]` rename in # `crates/relayburn-cli/Cargo.toml`); the crate is `relayburn-cli`. @@ -149,11 +180,35 @@ jobs: - name: Smoke test (`burn --help`) # Native legs only. The aarch64-linux leg cross-compiles on an x64 # host so the runner's interpreter cannot execute the binary. + # + # When `release_version` is supplied (publish-workflow caller), we + # also assert that `burn --version` reports the expected version. + # This catches the regression where the binary was built with a + # stale `CARGO_PKG_VERSION` (the 2026-05-04 incident — 2.1.0 + # platform package shipping a 2.0.0 binary). if: matrix.target != 'aarch64-unknown-linux-gnu' run: | set -euo pipefail + EXPECTED_VERSION='${{ inputs.release_version }}' + + assert_version() { + local label="$1" + local actual="$2" + if [ -z "$EXPECTED_VERSION" ]; then + echo "$label: $actual (no expected version supplied; skipping assertion)" + return 0 + fi + local expected="burn $EXPECTED_VERSION" + if [ "$actual" != "$expected" ]; then + echo "::error title=Version mismatch::$label reported '$actual', expected '$expected'. The binary was likely built with a stale CARGO_PKG_VERSION — the publish workflow should have lockstepped Cargo.toml before this build." + exit 1 + fi + echo "$label: $actual (matches expected)" + } + packages/relayburn/npm/${{ matrix.short }}/bin/burn --help - packages/relayburn/npm/${{ matrix.short }}/bin/burn --version + direct_version=$(packages/relayburn/npm/${{ matrix.short }}/bin/burn --version) + assert_version "direct binary" "$direct_version" smoke_dir="$(mktemp -d)" umbrella_dir="$(mktemp -d)" @@ -162,10 +217,12 @@ jobs: --ignore-scripts --no-audit --no-fund \ ./packages/relayburn/npm/${{ matrix.short }} "$smoke_dir/node_modules/.bin/burn" --help - "$smoke_dir/node_modules/.bin/burn" --version + platform_version=$("$smoke_dir/node_modules/.bin/burn" --version) + assert_version "platform package" "$platform_version" npm install --prefix "$umbrella_dir" --no-save --omit=optional \ --ignore-scripts --no-audit --no-fund \ ./packages/relayburn NODE_PATH="$smoke_dir/node_modules" "$umbrella_dir/node_modules/.bin/burn" --help - NODE_PATH="$smoke_dir/node_modules" "$umbrella_dir/node_modules/.bin/burn" --version + umbrella_version=$(NODE_PATH="$smoke_dir/node_modules" "$umbrella_dir/node_modules/.bin/burn" --version) + assert_version "umbrella package" "$umbrella_version" diff --git a/.github/workflows/napi-build.yml b/.github/workflows/napi-build.yml index a64fe35a..33d741c3 100644 --- a/.github/workflows/napi-build.yml +++ b/.github/workflows/napi-build.yml @@ -35,6 +35,15 @@ on: # caller downloads the uploaded `relayburn-sdk-` artifacts and # stages them into `packages/sdk-node/npm//` before # `npm pack` + `npm publish`. + inputs: + release_version: + description: >- + Post-bump release version (e.g. "2.1.0") to bake into the + napi-rs binding via CARGO_PKG_VERSION. Optional — when empty + (PR/push runs) we skip the lockstep sed and build with the + on-disk workspace version unchanged. + required: false + type: string permissions: contents: read @@ -121,6 +130,24 @@ jobs: working-directory: packages/sdk-node run: pnpm install --ignore-workspace --no-frozen-lockfile + - name: Lockstep Cargo.toml to release version + # When invoked via the publish workflow, sed the workspace + # `[workspace.package].version` (and the path-dep `version = "X.Y"` + # pin in `crates/relayburn-sdk-node/Cargo.toml`) so the napi-rs + # binding built below carries the post-bump `CARGO_PKG_VERSION`. + # Mirrors the lockstep regex in publish.yml and cli-build.yml. + # + # Skipped when `release_version` is empty (PR/push runs). + if: inputs.release_version != '' + run: | + set -euo pipefail + RUST_VER="${{ inputs.release_version }}" + RUST_MINOR=$(echo "$RUST_VER" | awk -F. '{print $1"."$2}') + echo "Rust workspace lockstep: $RUST_VER (minor pin: $RUST_MINOR)" + sed -i.bak -E "s/^version = \"[^\"]+\"$/version = \"$RUST_VER\"/" Cargo.toml + sed -i.bak -E "s|(relayburn-sdk = \\{ path = \"\\.\\./relayburn-sdk\", version = \")[^\"]+(\" \\})|\\1$RUST_MINOR\\2|" crates/relayburn-sdk-node/Cargo.toml + rm -f Cargo.toml.bak crates/relayburn-sdk-node/Cargo.toml.bak + - name: Build napi binding for ${{ matrix.target }} # `napi build` reads `packages/sdk-node/package.json`'s `napi` # block to learn the binary name, then dispatches to `cargo build` diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 06835232..ab5471e6 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -68,86 +68,47 @@ concurrency: # crates.io still ships exactly two crates (`relayburn-sdk` + `relayburn-cli`). jobs: - # Build the four `burn` binaries the platform packages need before publish. - # `cli-build.yml` is a reusable workflow that runs the same matrix it - # validates on PRs and uploads `relayburn-cli-` artifacts. - build-cli: - name: Build CLI binaries - uses: ./.github/workflows/cli-build.yml - secrets: inherit - - # Build the four napi-rs `.node` artifacts the SDK platform packages need. - build-sdk: - name: Build SDK napi bindings - uses: ./.github/workflows/napi-build.yml - secrets: inherit - - publish: + # Compute the release version BEFORE the build matrices kick off so the + # CLI and napi reusable workflows can sed Cargo.toml's workspace version + # to the post-bump value before `cargo build` runs. Without this, the + # `burn` binary inside `@relayburn/cli-@N+1` would carry the + # pre-bump (`N`) `CARGO_PKG_VERSION` baked in by clap's `version,` + # directive, so `burn --version` would report the previous release + # forever (the original 2026-05-04 incident: 2.1.0 platform package + # shipping a 2.0.0 binary). + # + # **This job is the single source of truth for the release version.** + # It runs the same heal-baseline + bump logic the publish job used to + # run inline, and the publish job below now consumes this output + # rather than recomputing. That eliminates the race window where + # local-vs-npm version drift between the precursor's checkout and the + # publish job's checkout could yield different "next release" values + # — and with it the echo chamber where build-cli's smoke test asserted + # against the same stale value baked into the binary. + # + # The job mutates package.json files in its own runner's filesystem + # (the heal step uses `npm version --allow-same-version`); those + # mutations are scoped to this job and don't affect the publish job's + # fresh checkout. + resolve-release-version: + name: Resolve release version runs-on: ubuntu-latest - needs: [build-cli, build-sdk] outputs: - versions: ${{ steps.bump.outputs.versions }} - release_version: ${{ steps.bump.outputs.release_version }} + release_version: ${{ steps.compute.outputs.release_version }} steps: - name: Checkout uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Setup pnpm - uses: pnpm/action-setup@v5 - name: Setup Node uses: actions/setup-node@v6 with: node-version: '22.14.0' registry-url: 'https://registry.npmjs.org' - cache: 'pnpm' - - - name: Install deps - run: pnpm install --frozen-lockfile - - # Packages publish in lockstep. Build + test the whole workspace so every - # package's dist/ is fresh before `pnpm pack` rewrites workspace:* deps - # to concrete versions at pack time. - - name: Build workspace - run: pnpm -r run build - - - name: Run tests - run: pnpm run test - - # Rust port build + test gates the whole publish job. If the Rust - # tree is red at this commit, abort before npm ships anything so the - # release tag points at a commit where both trees pass. - # - # `rustup toolchain install` (no args) reads rust-toolchain.toml and - # installs both the channel and the listed components — no explicit - # `rustup component add` needed. - - name: Setup Rust toolchain - run: rustup toolchain install - - - name: Cargo build + test - run: | - cargo build --workspace --all-targets - cargo test --workspace - # The 11-package target table. Each entry is `key:dir` where: - # - `key` is the suffix used for git tags and changelog identifiers - # (so `mcp` → tag `mcp-v`, `relayburn` → `relayburn-v`, - # `sdk` → `sdk-v`, platform packages → `cli-darwin-arm64-v` - # etc.). Distinct from the npm package name (read from each - # directory's `package.json`). - # - `dir` is the relative path to the package directory. - # - # The order matters for two reasons: - # 1. The `relayburn` umbrella's optionalDependencies must be synced - # after platform versions are bumped (handled in the bump step). - # 2. The publish loop ships platform packages BEFORE the umbrella - # that depends on them, so `npm install relayburn` post-publish - # can resolve all four `@relayburn/cli-` optionalDeps. - # - # `@relayburn/sdk` (napi umbrella) similarly ships after its platform - # packages. + # Mirror the target table the publish job uses so the heal step + # below sees the same 11 packages. Kept in sync by hand — there's + # no clean way to share an output across jobs without making the + # publish job depend on this one twice (which we already do). - name: Resolve target packages id: targets run: | @@ -167,22 +128,14 @@ jobs: echo 'EOF' } >> "$GITHUB_OUTPUT" - # Lockstep baseline heal. The 11 keepers ship at the same version, so - # if any package's local version lags either its own npm `latest` or - # another workspace package, pull it up to the highest stable version - # across the whole set before the bump step runs. Two failure modes: - # - # 1. A previous publish run shipped @relayburn/*@X to npm but failed - # at the Tag + push step (the original 2026-04-23 incident). - # 2. A new package was extracted into the workspace (e.g. the 8 - # platform packages bootstrapped at 0.0.1, getting absorbed into - # the 1.10.x→2.0.0 lockstep). - # - # The downstream "Verify new versions are not yet published" step still - # catches the case where the post-bump version collides with an existing - # npm version, so a stray manual publish at a wildly higher version will - # surface as a publish-time error after the heal rather than silently - # promoting the whole workspace into it. + # Heal step — verbatim copy of the publish job's "Heal local + # versions to lockstep baseline" logic. Pulls every keeper up to + # the highest stable version across (local versions ∪ npm latest) + # so the bump applied below operates on a coherent baseline. If + # this step is missing, a workspace where one keeper's local + # version lags its own npm `latest` (the recovery case the heal + # was originally added for) would cause the precursor and the + # publish job to disagree on the next release version. - name: Heal local versions to lockstep baseline env: TARGETS: ${{ steps.targets.outputs.targets }} @@ -264,40 +217,173 @@ jobs: node /tmp/lockstep-heal.mjs - - name: Bump versions - id: bump - env: - TARGETS: ${{ steps.targets.outputs.targets }} + # After heal, every keeper is at the same baseline. Bump the + # umbrella to derive the next release version — the other 10 + # packages will be set to this same value by the publish job + # downstream, so the umbrella is a sufficient anchor. + - name: Compute next release version + id: compute run: | set -euo pipefail CUSTOM='${{ github.event.inputs.custom_version }}' BUMP='${{ github.event.inputs.version }}' PREID='${{ github.event.inputs.prerelease_id }}' + pushd packages/relayburn > /dev/null + if [ -n "$CUSTOM" ]; then + npm version "$CUSTOM" --no-git-tag-version --allow-same-version + elif [ "$BUMP" = "none" ]; then + : # keep existing version (post-heal baseline = "current") + elif [[ "$BUMP" == pre* ]]; then + npm version "$BUMP" --no-git-tag-version --preid="$PREID" + else + npm version "$BUMP" --no-git-tag-version + fi + NEW=$(node -p "require('./package.json').version") + popd > /dev/null + + echo "release_version=$NEW" >> "$GITHUB_OUTPUT" + echo "Resolved release_version: $NEW" + + # Build the four `burn` binaries the platform packages need before publish. + # `cli-build.yml` is a reusable workflow that runs the same matrix it + # validates on PRs and uploads `relayburn-cli-` artifacts. + build-cli: + name: Build CLI binaries + needs: resolve-release-version + uses: ./.github/workflows/cli-build.yml + with: + release_version: ${{ needs.resolve-release-version.outputs.release_version }} + secrets: inherit + + # Build the four napi-rs `.node` artifacts the SDK platform packages need. + build-sdk: + name: Build SDK napi bindings + needs: resolve-release-version + uses: ./.github/workflows/napi-build.yml + with: + release_version: ${{ needs.resolve-release-version.outputs.release_version }} + secrets: inherit + + publish: + runs-on: ubuntu-latest + # `resolve-release-version` is already a transitive dep via + # build-cli/build-sdk, but listing it explicitly lets this job read + # `needs.resolve-release-version.outputs.release_version` directly. + needs: [build-cli, build-sdk, resolve-release-version] + outputs: + versions: ${{ steps.bump.outputs.versions }} + release_version: ${{ steps.bump.outputs.release_version }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@v5 + + - name: Setup Node + uses: actions/setup-node@v6 + with: + node-version: '22.14.0' + registry-url: 'https://registry.npmjs.org' + cache: 'pnpm' + + - name: Install deps + run: pnpm install --frozen-lockfile + + # Packages publish in lockstep. Build + test the whole workspace so every + # package's dist/ is fresh before `pnpm pack` rewrites workspace:* deps + # to concrete versions at pack time. + - name: Build workspace + run: pnpm -r run build + + - name: Run tests + run: pnpm run test + + # Rust port build + test gates the whole publish job. If the Rust + # tree is red at this commit, abort before npm ships anything so the + # release tag points at a commit where both trees pass. + # + # `rustup toolchain install` (no args) reads rust-toolchain.toml and + # installs both the channel and the listed components — no explicit + # `rustup component add` needed. + - name: Setup Rust toolchain + run: rustup toolchain install + + - name: Cargo build + test + run: | + cargo build --workspace --all-targets + cargo test --workspace + + # The 11-package target table. Each entry is `key:dir` where: + # - `key` is the suffix used for git tags and changelog identifiers + # (so `mcp` → tag `mcp-v`, `relayburn` → `relayburn-v`, + # `sdk` → `sdk-v`, platform packages → `cli-darwin-arm64-v` + # etc.). Distinct from the npm package name (read from each + # directory's `package.json`). + # - `dir` is the relative path to the package directory. + # + # The order matters for two reasons: + # 1. The `relayburn` umbrella's optionalDependencies must be synced + # after platform versions are bumped (handled in the bump step). + # 2. The publish loop ships platform packages BEFORE the umbrella + # that depends on them, so `npm install relayburn` post-publish + # can resolve all four `@relayburn/cli-` optionalDeps. + # + # `@relayburn/sdk` (napi umbrella) similarly ships after its platform + # packages. + - name: Resolve target packages + id: targets + run: | + { + echo 'targets<> "$GITHUB_OUTPUT" + + # Apply the release version computed by `resolve-release-version` + # to every keeper's package.json. The precursor is the single + # source of truth for what version we're shipping; we don't + # recompute heal+bump here because that's exactly the path that + # let the precursor and publish job disagree (and let stale + # `CARGO_PKG_VERSION` leak into shipped binaries while the + # cli-build smoke test agreed with itself on the wrong number). + # + # `--allow-same-version` lets us idempotently set every package + # to the target version, including ones that already happen to + # match (the heal step in the precursor may have already brought + # them to the same baseline, and the package.json on disk here is + # a fresh checkout that hasn't seen any of that). + - name: Apply release version to all packages + id: bump + env: + TARGETS: ${{ steps.targets.outputs.targets }} + RELEASE_VER: ${{ needs.resolve-release-version.outputs.release_version }} + run: | + set -euo pipefail + VERSIONS="" while IFS=: read -r key dir; do [ -z "$key" ] && continue pushd "$dir" > /dev/null - if [ -n "$CUSTOM" ]; then - npm version "$CUSTOM" --no-git-tag-version --allow-same-version - elif [ "$BUMP" = "none" ]; then - : # keep existing version (useful for first publish or re-publish) - elif [[ "$BUMP" == pre* ]]; then - npm version "$BUMP" --no-git-tag-version --preid="$PREID" - else - npm version "$BUMP" --no-git-tag-version - fi - NEW=$(node -p "require('./package.json').version") - VERSIONS+=" $key:$NEW" + npm version "$RELEASE_VER" --no-git-tag-version --allow-same-version popd > /dev/null + VERSIONS+=" $key:$RELEASE_VER" done <<< "$TARGETS" echo "versions=${VERSIONS# }" >> "$GITHUB_OUTPUT" - - # The release version is the umbrella `relayburn` version (every - # keeper bumps to the same value, but this is the canonical anchor - # for the GitHub Release + Cargo workspace + tag stamps). - RELEASE_VER=$(node -p "require('./packages/relayburn/package.json').version") echo "release_version=$RELEASE_VER" >> "$GITHUB_OUTPUT" # Sync the umbrella `relayburn` and `@relayburn/sdk` (napi) @@ -349,11 +435,12 @@ jobs: # Refresh Cargo.lock to reflect the new workspace version. cargo update --workspace - # Belt-and-suspenders alongside the heal step above: even if the - # local→npm baseline is in sync, the computed bump might collide with - # an existing version (e.g. someone manually published a one-off from - # another branch). Catch it before we waste a build + before npm - # rejects with a less specific error. + # Belt-and-suspenders alongside the precursor's heal: even if the + # local→npm baseline was in sync there, the computed release + # version might collide with an existing version (e.g. someone + # manually published a one-off from another branch between when + # the precursor ran and now). Catch it before we waste a build + + # before npm rejects with a less specific error. - name: Verify new versions are not yet published env: TARGETS: ${{ steps.targets.outputs.targets }} @@ -878,6 +965,56 @@ jobs: ls -lh "$sdk_dst" done + # Ship-gate: verify the staged `burn` binaries actually carry the + # release version we're about to publish. The cli-build smoke test + # already asserts this per-leg against the `release_version` input, + # but that check is upstream of artifact upload/download/staging. + # Re-checking right before `npm publish` closes the loop against + # any artifact mix-up between then and now. + # + # The publish runner is x64-linux, so we can only `exec` the + # linux-x64-gnu binary natively. For the cross-built ones we fall + # back to a byte-level grep for the expected version string — + # clap embeds `CARGO_PKG_VERSION` as a literal in the binary's + # rodata, so a `grep -aF` on the raw bytes is a reliable + # cross-arch substitute (false positive: a version string that + # incidentally appears as part of unrelated data, which is + # negligible given the specificity of `burn X.Y.Z`). + - name: Verify staged CLI binaries carry release version + env: + RELEASE_VER: ${{ needs.resolve-release-version.outputs.release_version }} + run: | + set -euo pipefail + EXPECTED="burn $RELEASE_VER" + fail=0 + for short in darwin-arm64 darwin-x64 linux-arm64-gnu linux-x64-gnu; do + bin="packages/relayburn/npm/${short}/bin/burn" + if [ ! -x "$bin" ]; then + echo "::error title=Missing staged binary::$bin not found or not executable" >&2 + fail=1 + continue + fi + if [ "$short" = "linux-x64-gnu" ]; then + actual=$("$bin" --version 2>&1 | head -1) + if [ "$actual" != "$EXPECTED" ]; then + echo "::error title=Binary version mismatch::$bin reported '$actual', expected '$EXPECTED'" >&2 + fail=1 + else + echo "$bin: $actual ✓ (native exec)" + fi + else + if grep -aF "$RELEASE_VER" "$bin" > /dev/null; then + echo "$bin: contains '$RELEASE_VER' literal ✓ (byte-level check; can't exec cross-arch on Linux runner)" + else + echo "::error title=Binary version not embedded::$bin does not contain literal '$RELEASE_VER' — built with stale CARGO_PKG_VERSION?" >&2 + fail=1 + fi + fi + done + if [ "$fail" -ne 0 ]; then + exit 1 + fi + # npm >= 11.5.1 is required for the OIDC trusted-publisher flow. - name: Install latest npm run: npm install -g npm@latest