PSAL-POSTECH · YWHyuk · May 18, 2026 · May 18, 2026
diff --git a/.github/workflows/build-sim-image.yml b/.github/workflows/build-sim-image.yml
@@ -1,29 +1,33 @@
 name: Build simulator image
 
-# Builds an Ubuntu-based image with the simulator side fully installed
-# (ASTRA-Sim analytical backend, chakra, python deps — exactly what
-# scripts/install-sim.sh produces on a bare host) and pushes it to
-# GitHub Container Registry. PRs run the build to validate the install
-# path; only pushes to main (and manual dispatch) publish the image.
+# Builds a multi-arch (linux/amd64 + linux/arm64) Ubuntu-based image
+# with the simulator side installed (see scripts/sim.Dockerfile) and
+# pushes it to GitHub Container Registry. PRs run the build to validate
+# the install path; only pushes to main (and manual dispatch) publish.
+#
+# Multi-arch strategy: each platform builds natively on its own GHA
+# runner (ubuntu-latest for amd64, ubuntu-24.04-arm for arm64). The
+# matrix job pushes each by digest only; the merge job assembles them
+# into a single multi-arch manifest under the human-readable tags.
+# This matches https://docs.docker.com/build/ci/github-actions/multi-platform/
+#
+# Why native runners and not QEMU? ASTRA-Sim is a C++ build via cmake.
+# Under x86 + QEMU arm64 emulation that compile balloons from ~5-7 min
+# to ~30-60 min — close to the GHA 6-hour budget and a lot of waste.
 
 on:
   push:
     branches: [main]
-    paths:
+    paths: &paths
       - 'scripts/install-sim.sh'
       - 'scripts/compile.sh'
       - 'scripts/sim.Dockerfile'
+      - '.dockerignore'
       - 'astra-sim'
       - '.gitmodules'
       - '.github/workflows/build-sim-image.yml'
   pull_request:
-    paths:
-      - 'scripts/install-sim.sh'
-      - 'scripts/compile.sh'
-      - 'scripts/sim.Dockerfile'
-      - 'astra-sim'
-      - '.gitmodules'
-      - '.github/workflows/build-sim-image.yml'
+    paths: *paths
   workflow_dispatch:
 
 permissions:
@@ -33,25 +37,45 @@ permissions:
 env:
   REGISTRY: ghcr.io
 
-# One in-flight build per ref; queued PR builds get cancelled in favour
-# of the newest commit. Main pushes finish.
 concurrency:
   group: build-sim-image-${{ github.ref }}
   cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 
 jobs:
+  # ---------------------------------------------------------------------------
+  # Per-platform build. Each row in the matrix runs on its own native
+  # runner. On push / dispatch the result is pushed-by-digest (no tag);
+  # the merge job below combines digests into the tagged manifest. On
+  # PR runs we only verify the build succeeds and skip both publish and
+  # the merge job.
+  # ---------------------------------------------------------------------------
   build:
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
+
+    runs-on: ${{ matrix.runner }}
+
     steps:
+      # ``linux/amd64`` → ``linux-amd64``. Used as a key for the digest
+      # artifact and the buildx cache scope so different platforms
+      # don't trample each other's GHA cache.
+      - name: Sanitise platform name
+        id: plat
+        run: |
+          p="${{ matrix.platform }}"
+          echo "key=${p//\//-}" >> "$GITHUB_OUTPUT"
+
       - name: Checkout (with submodules)
         uses: actions/checkout@v4
         with:
           submodules: recursive
 
-      # GHCR rejects uppercase characters in image names, but
-      # ${{ github.repository }} preserves the original case
-      # (PSAL-POSTECH/LLMServingSimSpec). Lowercase it once here so
-      # every downstream step sees a clean reference.
       - name: Resolve image name (lowercase)
         id: img
         run: |
@@ -69,8 +93,83 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      # latest → main only; sha-<short> → every build; branch tag for
-      # non-main branches so feature-branch images are addressable.
+      # PR: verify build only (no push, no digest export).
+      - name: Build (PR — verify only)
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: scripts/sim.Dockerfile
+          platforms: ${{ matrix.platform }}
+          push: false
+          cache-from: type=gha,scope=${{ steps.plat.outputs.key }}
+          cache-to: type=gha,mode=max,scope=${{ steps.plat.outputs.key }}
+
+      # main / dispatch: push by digest. The image gets uploaded to the
+      # registry under its content-addressable digest; no tag is set
+      # yet — the merge job below does that once all platforms succeed.
+      - name: Build and push by digest
+        if: github.event_name != 'pull_request'
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: scripts/sim.Dockerfile
+          platforms: ${{ matrix.platform }}
+          outputs: type=image,name=${{ steps.img.outputs.ref }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=gha,scope=${{ steps.plat.outputs.key }}
+          cache-to: type=gha,mode=max,scope=${{ steps.plat.outputs.key }}
+
+      - name: Stash digest for the merge job
+        if: github.event_name != 'pull_request'
+        run: |
+          mkdir -p "${{ runner.temp }}/digests"
+          digest="${{ steps.build.outputs.digest }}"
+          touch "${{ runner.temp }}/digests/${digest#sha256:}"
+
+      - name: Upload digest artifact
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-artifact@v4
+        with:
+          name: digest-${{ steps.plat.outputs.key }}
+          path: ${{ runner.temp }}/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  # ---------------------------------------------------------------------------
+  # Collect per-platform digests, build the multi-arch manifest, and tag
+  # it under the human-readable names (latest / sha-xxxx / branch-name).
+  # Skipped on PRs since no digests were pushed.
+  # ---------------------------------------------------------------------------
+  merge:
+    if: github.event_name != 'pull_request'
+    needs: build
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Resolve image name (lowercase)
+        id: img
+        run: |
+          name="${GITHUB_REPOSITORY,,}/sim"
+          echo "ref=${REGISTRY}/${name}" >> "$GITHUB_OUTPUT"
+
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ runner.temp }}/digests
+          pattern: digest-*
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Image tags + labels
         id: meta
         uses: docker/metadata-action@v5
@@ -81,27 +180,32 @@ jobs:
             type=sha,prefix=sha-,format=short
             type=ref,event=branch
 
-      - name: Build (and push if not PR)
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: scripts/sim.Dockerfile
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+      # buildx imagetools create takes ``-t <tag> -t <tag2> ... <ref@digest> <ref@digest>``
+      # and produces an OCI manifest list that points at each platform-
+      # specific digest. Result is a single tag (e.g. ``:latest``) that
+      # ``docker pull`` resolves to the runner's arch automatically.
+      - name: Create manifest list and push
+        working-directory: ${{ runner.temp }}/digests
+        run: |
+          docker buildx imagetools create \
+            $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ steps.img.outputs.ref }}@sha256:%s ' *)
+
+      - name: Inspect published manifest
+        run: |
+          docker buildx imagetools inspect \
+            ${{ steps.img.outputs.ref }}:${{ steps.meta.outputs.version }}
 
       - name: Summary
         if: always()
         run: |
           {
-            echo "### Simulator image"
+            echo "### Simulator image (multi-arch)"
             echo
             echo "- **image:** \`${{ steps.img.outputs.ref }}\`"
+            echo "- **platforms:** \`linux/amd64\` (x86 hosts), \`linux/arm64\` (Apple Silicon Macs, ARM servers)"
             echo "- **tags:**"
             echo '```'
             echo "${{ steps.meta.outputs.tags }}"
             echo '```'
-            echo "- **pushed:** ${{ github.event_name != 'pull_request' }}"
           } >> "$GITHUB_STEP_SUMMARY"
diff --git a/scripts/README.md b/scripts/README.md
@@ -14,7 +14,7 @@ live with their module — only setup and build helpers are here.
 | `install-vllm-cpu.sh` | Bare-metal vLLM **CPU** install. Builds vLLM 0.19.0 from source with `VLLM_TARGET_DEVICE=cpu` into `.venv-cpu` (separate from the GPU venv). Works on x86_64 and aarch64 (NVIDIA Grace). Used by `profiler/profile-cpu.sh`. |
 | `install-sim.sh`  | Bare-metal simulator install for fresh Docker containers / minimal Linux hosts (root or non-root). Installs apt build deps, inits submodules, pip-installs sim Python deps, builds ASTRA-Sim's analytical backend, and installs the Chakra converter (`--no-deps`). Skips vLLM — profiler/bench need `install-vllm.sh` and a GPU. Idempotent. |
 | `compile.sh`      | Build ASTRA-Sim's analytical backend and install the Chakra trace converter. |
-| `sim.Dockerfile`  | Multi-stage Ubuntu 24.04 image. Stage 1 compiles ASTRA-Sim with the C++ toolchain; stage 2 carries only the simulator runtime — python + the deps `serving/` actually imports + the compiled binary + chakra. No compilers, no `.git`, no `transformers / datasets / xgboost / matplotlib / scikit-learn` (those live in the vLLM image, `docker-vllm.sh`). `.dockerignore` keeps the build context lean (no `perf/`, `results/`, venv, CMake outputs). Built and published to GHCR by `.github/workflows/build-sim-image.yml` on every push to `main` touching the install / build paths. Pull with `docker pull ghcr.io/psal-postech/llmservingsimspec/sim:latest`. |
+| `sim.Dockerfile`  | Multi-stage Ubuntu 24.04 image. Stage 1 compiles ASTRA-Sim with the C++ toolchain; stage 2 carries only the simulator runtime — python + the deps `serving/` actually imports + the compiled binary + chakra. No compilers, no `.git`, no `transformers / datasets / xgboost / matplotlib / scikit-learn` (those live in the vLLM image, `docker-vllm.sh`). `.dockerignore` keeps the build context lean (no `perf/`, `results/`, venv, CMake outputs). Built and published to GHCR by `.github/workflows/build-sim-image.yml` as a multi-arch manifest (`linux/amd64` + `linux/arm64`) on every push to `main` touching the install / build paths. Apple Silicon Macs pull the arm64 variant automatically. Pull with `docker pull ghcr.io/psal-postech/llmservingsimspec/sim:latest`. |
 
 ## Typical first-time setup