From 9b80927e2911aae3b957d98355792481d8869408 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Fri, 15 May 2026 09:32:50 +0530 Subject: [PATCH] ci(deploy): auto-deploy on push to master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A worker fix shipped to master but never deployed because someone had to run `docker buildx build && kubectl set image` by hand. A user got the same broken expiry email twice as a result. Close that gap — for the worker repo, that's literally how the bug happened. On every push to master this workflow now: 1. checks out worker + sibling common/ + proto/ to match Dockerfile 2. runs `go test ./... -short -count=1` (fails the job on red tests) 3. builds linux/amd64 with GIT_SHA/BUILD_TIME/VERSION build-args 4. pushes ghcr.io/mastermanas805/instant-worker: + :latest 5. kubectl set image deployment/instant-worker + rollout status (180s) 6. verifies the deployment now points at the exact tag we built 7. shells into the new pod and curls localhost:8091/healthz to confirm the binary reports the new commit_id (best-effort; the prod image is distroless and may not have curl/wget, in which case the image-tag check above is the load-bearing gate) Operator action: add KUBECONFIG_B64 to repo secrets (base64-encoded kubeconfig). Without it the kubeconfig step fails fast with a clear error message. Concurrency group `deploy-Deploy` with cancel-in-progress=false so two merges in a row queue instead of racing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/deploy.yml | 194 +++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..53dad1b --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,194 @@ +# instant.dev/worker — Auto-deploy on push to master +# +# Why this exists: +# On 2026-05-15, a worker code fix shipped to master but was never +# deployed — an operator had to manually `docker buildx build && +# kubectl set image`. A user received the same broken expiry email +# twice as a result. This workflow eliminates that gap. +# +# Build context note: +# The Dockerfile expects to be invoked from the parent of worker/, with +# sibling common/ and proto/ directories present. In CI we mirror that +# by checking out: +# . (workspace root) +# ├── worker/ (this repo) +# ├── common/ (sibling repo) +# └── proto/ (sibling repo) +# then `docker buildx build -f worker/Dockerfile .` from the workspace root. +# +# Required repo secret: +# KUBECONFIG_B64 — base64-encoded kubeconfig with permission to +# `kubectl set image deployment/instant-worker -n instant-infra`. +# +# GHCR auth uses the per-job GITHUB_TOKEN with `packages: write`. + +name: Deploy + +on: + push: + branches: [master] + workflow_dispatch: + +concurrency: + group: deploy-${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read + packages: write + +env: + IMAGE_REPO: ghcr.io/mastermanas805/instant-worker + K8S_NAMESPACE: instant-infra + K8S_DEPLOYMENT: instant-worker + K8S_CONTAINER: worker + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout worker (this repo) into ./worker + uses: actions/checkout@v4 + with: + path: worker + + - name: Checkout common sibling into ./common + uses: actions/checkout@v4 + with: + repository: ${{ vars.COMMON_REPO || format('{0}/common', github.repository_owner) }} + token: ${{ secrets.GITHUB_TOKEN }} + path: common + + - name: Checkout proto sibling into ./proto + uses: actions/checkout@v4 + with: + repository: ${{ vars.PROTO_REPO || format('{0}/proto', github.repository_owner) }} + token: ${{ secrets.GITHUB_TOKEN }} + path: proto + + - name: Compute build metadata + id: meta + run: | + SHORT_SHA="${GITHUB_SHA:0:7}" + BUILD_TIME="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + VERSION="master-${SHORT_SHA}" + echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT" + echo "build_time=${BUILD_TIME}" >> "$GITHUB_OUTPUT" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "Built ${VERSION} (${BUILD_TIME})" + + - name: Set up Go (for unit tests + go.mod replace directives) + uses: actions/setup-go@v5 + with: + go-version: '1.25' + + - name: Run unit tests (short, no integration deps) + # go.mod uses `replace instant.dev/common => ../common` and + # `replace instant.dev/proto => ../proto`. When `go test` runs + # inside ./worker, the relative paths resolve to ./common and + # ./proto in the workspace root — already correct, no mv needed. + working-directory: worker + run: go test ./... -short -count=1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push image + # Build context = workspace root so Dockerfile's + # `COPY proto/`, `COPY common/`, `COPY worker/` all resolve. + run: | + docker buildx build \ + --platform linux/amd64 \ + -f worker/Dockerfile \ + --build-arg GIT_SHA="${{ steps.meta.outputs.short_sha }}" \ + --build-arg BUILD_TIME="${{ steps.meta.outputs.build_time }}" \ + --build-arg VERSION="${{ steps.meta.outputs.version }}" \ + -t "${IMAGE_REPO}:${{ steps.meta.outputs.version }}" \ + -t "${IMAGE_REPO}:latest" \ + --push \ + . + + - name: Set up kubectl + uses: azure/setup-kubectl@v3 + with: + version: 'latest' + + - name: Configure kubeconfig from KUBECONFIG_B64 secret + env: + KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }} + run: | + if [ -z "${KUBECONFIG_B64}" ]; then + echo "::error::KUBECONFIG_B64 repo secret is not set. Add it under Settings → Secrets → Actions." + exit 1 + fi + mkdir -p "$HOME/.kube" + echo "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config" + chmod 600 "$HOME/.kube/config" + kubectl version --client=true + + - name: Roll out new image + run: | + IMAGE="${IMAGE_REPO}:${{ steps.meta.outputs.version }}" + echo "Setting ${K8S_DEPLOYMENT}.${K8S_CONTAINER} to ${IMAGE}" + kubectl set image \ + "deployment/${K8S_DEPLOYMENT}" \ + "${K8S_CONTAINER}=${IMAGE}" \ + -n "${K8S_NAMESPACE}" + kubectl rollout status \ + "deployment/${K8S_DEPLOYMENT}" \ + -n "${K8S_NAMESPACE}" \ + --timeout=180s + + - name: Verify rolled-out image tag matches built version + # The worker has no public ingress, so we can't curl an external + # /healthz. Instead, verify the deployment now references the + # exact image tag we built — this is sufficient: rollout-status + # above already confirmed the new pod is Ready, and Ready means + # the container's startup probe passed. + run: | + ROLLED=$(kubectl get deployment "${K8S_DEPLOYMENT}" -n "${K8S_NAMESPACE}" \ + -o jsonpath="{.spec.template.spec.containers[?(@.name=='${K8S_CONTAINER}')].image}") + EXPECTED="${IMAGE_REPO}:${{ steps.meta.outputs.version }}" + echo "Live image: ${ROLLED}" + echo "Expected: ${EXPECTED}" + if [ "${ROLLED}" != "${EXPECTED}" ]; then + echo "::error::Rolled image (${ROLLED}) != expected (${EXPECTED})" + exit 1 + fi + + - name: Confirm new pod reports new SHA via in-cluster /healthz + # worker exposes /healthz on :8091 inside the cluster (see CLAUDE.md: + # "Mirrored on provisioner-sidecar (:8092), worker-healthz (:8091)"). + # We shell into the freshest Ready pod and curl localhost to confirm + # the binary itself reports our short SHA in commit_id. + run: | + SHORT_SHA="${{ steps.meta.outputs.short_sha }}" + POD=$(kubectl get pod -n "${K8S_NAMESPACE}" \ + -l "app=${K8S_DEPLOYMENT}" \ + --field-selector=status.phase=Running \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) + if [ -z "${POD}" ]; then + echo "::warning::could not locate a running ${K8S_DEPLOYMENT} pod by label app=${K8S_DEPLOYMENT}; rollout already succeeded, skipping in-pod SHA check" + exit 0 + fi + echo "Probing /healthz inside ${POD}" + for i in 1 2 3 4 5; do + BODY=$(kubectl exec -n "${K8S_NAMESPACE}" "${POD}" -- \ + sh -c 'wget -qO- http://127.0.0.1:8091/healthz 2>/dev/null || curl -fsSL http://127.0.0.1:8091/healthz 2>/dev/null' || echo "") + echo "Attempt ${i}: ${BODY}" + if echo "${BODY}" | grep -q "${SHORT_SHA}"; then + echo "Confirmed in-pod /healthz reports commit_id=${SHORT_SHA}" + exit 0 + fi + sleep 3 + done + # Distroless workers may not have wget/curl. Don't fail the deploy + # in that case — the image-tag check above is the load-bearing gate. + echo "::warning::could not confirm SHA via in-pod /healthz (distroless image likely has no curl/wget). Image-tag check passed; deploy is good."