From 3e5693f5973a23cffa27c4f484bff3bf2db9255d Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Wed, 20 May 2026 18:48:05 -0400 Subject: [PATCH 1/2] Harden comment sessions against daemon restarts --- .../run-android-comment-session/action.yml | 198 ++++++++++++++++-- actions/run-ios-comment-session/action.yml | 192 +++++++++++++++-- scripts/github-actions.test.mjs | 157 ++++++++++++++ 3 files changed, 508 insertions(+), 39 deletions(-) diff --git a/actions/run-android-comment-session/action.yml b/actions/run-android-comment-session/action.yml index 25d774c6..0b5489e1 100644 --- a/actions/run-android-comment-session/action.yml +++ b/actions/run-android-comment-session/action.yml @@ -324,6 +324,36 @@ runs: exit 1 fi + cat > simdeck-daemon-supervisor.sh <<'EOF' + #!/usr/bin/env bash + set +e + terminating=0 + child="" + trap 'terminating=1; if [[ -n "${child}" ]]; then kill "${child}" 2>/dev/null; wait "${child}" 2>/dev/null; fi; exit 0' TERM INT HUP + + while true; do + "$@" >> simdeck-daemon.log 2>&1 & + child="$!" + echo "${child}" > simdeck-child.pid + wait "${child}" + status="$?" + child="" + + if [[ "${terminating}" -eq 1 ]]; then + exit 0 + fi + if [[ "${status}" -eq 75 || "${status}" -ge 128 ]]; then + echo "[simdeck-action-supervisor] daemon exited with status ${status}; restarting" >> simdeck-daemon.log + sleep 1 + continue + fi + + echo "[simdeck-action-supervisor] daemon exited with status ${status}; not restarting" >> simdeck-daemon.log + exit "${status}" + done + EOF + chmod +x simdeck-daemon-supervisor.sh + SIMDECK_VIDEO_CODEC=software \ SIMDECK_ANDROID_VIDEO_CODEC=software \ SIMDECK_ALLOWED_ORIGINS='*' \ @@ -334,6 +364,7 @@ runs: SIMDECK_REALTIME_MIN_BITRATE="${stream_min_bitrate}" \ SIMDECK_REALTIME_BITS_PER_PIXEL="${stream_bits_per_pixel}" \ SIMDECK_LOCAL_STREAM_FPS="${stream_fps}" \ + ./simdeck-daemon-supervisor.sh \ simdeck daemon run \ --project-root "${GITHUB_WORKSPACE}" \ --metadata-path "${metadata_path}" \ @@ -343,8 +374,7 @@ runs: --stream-quality "${SIMDECK_STREAM_PROFILE}" \ --local-stream-fps "${stream_fps}" \ --access-token "${access_token}" \ - --pairing-code 000000 \ - > simdeck-daemon.log 2>&1 & + --pairing-code 000000 & echo "$!" > simdeck.pid cloudflared tunnel --url "http://127.0.0.1:${SIMDECK_PORT}" --protocol http2 --no-autoupdate > cloudflared.log 2>&1 & @@ -444,40 +474,106 @@ runs: sha="${{ steps.pr.outputs.sha }}" if [[ -n "${ARTIFACT_NAME_INPUT}" ]]; then artifact_name="${ARTIFACT_NAME_INPUT}" + artifact_candidates="${artifact_name}" else artifact_name="${ARTIFACT_PREFIX}-${sha}" + artifact_candidates="${artifact_name}" + if [[ "${ARTIFACT_PREFIX}" != "${artifact_name}" ]]; then + artifact_candidates+=$'\n'"${ARTIFACT_PREFIX}" + fi fi + printf '%s\n' "${artifact_candidates}" > /tmp/simdeck-artifact-candidates.txt + artifact_candidates_summary="$(python3 - <<'PY' + with open("/tmp/simdeck-artifact-candidates.txt", "r", encoding="utf-8") as handle: + names = [line.strip() for line in handle if line.strip()] + print(", ".join(f"`{name}`" for name in names)) + PY + )" + echo "SIMDECK_ARTIFACT_NAME=${artifact_name}" >> "${GITHUB_ENV}" + echo "SIMDECK_ARTIFACT_CANDIDATES=${artifact_candidates_summary}" >> "${GITHUB_ENV}" mkdir -p downloaded-app rm -f /tmp/simdeck-artifact-download.status app-download.log ( set +e { - run_id="" - for attempt in {1..30}; do - run_id="$(gh api -X GET "repos/${REPO}/actions/artifacts?name=${artifact_name}&per_page=100" \ - --jq '.artifacts[] | select(.expired == false) | .workflow_run.id' \ - | head -n 1 || true)" - - if [[ -z "${run_id}" && -n "${BUILD_WORKFLOW}" ]]; then - run_id="$(gh api --paginate "repos/${REPO}/actions/workflows/${BUILD_WORKFLOW}/runs?per_page=100" \ - --jq ".workflow_runs[] | select(.head_sha == \"${sha}\" and .conclusion == \"success\") | .id" \ - | head -n 1 || true)" + find_artifact_by_run() { + local candidate_run_id="$1" + gh api -X GET "repos/${REPO}/actions/runs/${candidate_run_id}/artifacts?per_page=100" | + python3 -c ' + import json + import sys + + with open("/tmp/simdeck-artifact-candidates.txt", "r", encoding="utf-8") as handle: + names = [line.strip() for line in handle if line.strip()] + + data = json.load(sys.stdin) + for name in names: + for artifact in data.get("artifacts", []): + if artifact.get("expired") is False and artifact.get("name") == name: + print(name) + raise SystemExit(0) + raise SystemExit(1) + ' || true + } + + find_artifact_record() { + local artifact_record artifact_name_for_run candidate candidate_run_id + + while IFS= read -r candidate; do + [[ -z "${candidate}" ]] && continue + artifact_record="$(gh api -X GET "repos/${REPO}/actions/artifacts?name=${candidate}&per_page=100" | + SIMDECK_ARTIFACT_SHA="${sha}" python3 -c ' + import json + import os + import sys + + data = json.load(sys.stdin) + sha = os.environ["SIMDECK_ARTIFACT_SHA"] + for artifact in data.get("artifacts", []): + run = artifact.get("workflow_run") or {} + if artifact.get("expired") is False and run.get("head_sha") == sha: + print("{}\t{}".format(run.get("id"), artifact.get("name"))) + break + ' || true)" + if [[ -n "${artifact_record}" ]]; then + printf '%s\n' "${artifact_record}" + return 0 + fi + done < /tmp/simdeck-artifact-candidates.txt + + if [[ -n "${BUILD_WORKFLOW}" ]]; then + while IFS= read -r candidate_run_id; do + [[ -z "${candidate_run_id}" ]] && continue + artifact_name_for_run="$(find_artifact_by_run "${candidate_run_id}")" + if [[ -n "${artifact_name_for_run}" ]]; then + printf '%s\t%s\n' "${candidate_run_id}" "${artifact_name_for_run}" + return 0 + fi + done < <(gh api --paginate "repos/${REPO}/actions/workflows/${BUILD_WORKFLOW}/runs?per_page=100" \ + --jq ".workflow_runs[] | select(.head_sha == \"${sha}\" and .conclusion == \"success\") | .id" || true) fi - if [[ -n "${run_id}" ]]; then - echo "Using build workflow run ${run_id} for ${sha}" - gh run download "${run_id}" --repo "${REPO}" --name "${artifact_name}" --dir downloaded-app + return 1 + } + + for attempt in {1..30}; do + artifact_record="$(find_artifact_record || true)" + + if [[ -n "${artifact_record}" ]]; then + IFS=$'\t' read -r run_id download_artifact_name <<< "${artifact_record}" + echo "Using build workflow run ${run_id} artifact '${download_artifact_name}' for ${sha}" + gh run download "${run_id}" --repo "${REPO}" --name "${download_artifact_name}" --dir downloaded-app exit_code="$?" echo "${exit_code}" > /tmp/simdeck-artifact-download.status exit "${exit_code}" fi - echo "Waiting for artifact '${artifact_name}' for PR head ${sha} (${attempt}/30)" + echo "Waiting for artifact (${artifact_candidates_summary}) for PR head ${sha} (${attempt}/30)" sleep 20 done - echo "No successful '${artifact_name}' artifact was found for PR head ${sha}." >&2 + echo "No successful unexpired artifact (${artifact_candidates_summary}) was found for PR head ${sha}." >&2 echo "1" > /tmp/simdeck-artifact-download.status exit 1 } > app-download.log 2>&1 @@ -543,6 +639,37 @@ runs: cat app-download.log status="$(cat /tmp/simdeck-artifact-download.status 2>/dev/null || echo 1)" if [[ "${status}" -ne 0 ]]; then + echo "SIMDECK_SESSION_START_FAILED=1" >> "${GITHUB_ENV}" + commit_sha="${{ steps.pr.outputs.sha }}" + artifact_candidates="${SIMDECK_ARTIFACT_CANDIDATES:-${SIMDECK_ARTIFACT_NAME:-${ARTIFACT_PREFIX}-${commit_sha}}}" + mention="" + if [[ -n "${COMMAND_COMMENT_AUTHOR:-}" ]]; then + mention="@${COMMAND_COMMENT_AUTHOR} " + fi + + cat > comment.md <<'EOF' + __MENTION__SimDeck Android session could not start for commit `__COMMIT_SHA__`. + + No unexpired APK artifact was available for this PR head. Expected one of: __ARTIFACT_CANDIDATES__. + + Re-run the build workflow or push a new commit, then comment `simdeck run android` again. + EOF + + body="$(cat comment.md)" + body="${body/__MENTION__/${mention}}" + body="${body/__COMMIT_SHA__/${commit_sha}}" + body="${body/__ARTIFACT_CANDIDATES__/${artifact_candidates}}" + for attempt in {1..5}; do + if [[ -n "${SIMDECK_STATUS_COMMENT_ID:-}" ]]; then + if gh api -X PATCH "repos/${REPO}/issues/comments/${SIMDECK_STATUS_COMMENT_ID}" -f body="${body}"; then + break + fi + elif comment_id="$(gh api "repos/${REPO}/issues/${PR_NUMBER}/comments" -f body="${body}" --jq '.id')"; then + echo "SIMDECK_STATUS_COMMENT_ID=${comment_id}" >> "${GITHUB_ENV}" + break + fi + sleep $((attempt * 5)) + done exit "${status}" fi @@ -592,6 +719,7 @@ runs: if [[ -f /tmp/sim-boot-start && -f /tmp/sim-boot-end ]]; then echo "Android emulator boot took $(( $(cat /tmp/sim-boot-end) - $(cat /tmp/sim-boot-start) )) seconds." fi + echo "SIMDECK_SESSION_OPEN=1" >> "${GITHUB_ENV}" - name: Update status comment after app launch shell: bash @@ -621,14 +749,39 @@ runs: set -euo pipefail udid="${{ steps.android.outputs.udid }}" end=$((SECONDS + KEEPALIVE_SECONDS)) + SIMDECK_DAEMON_HEALTH_GRACE_SECONDS="${SIMDECK_DAEMON_HEALTH_GRACE_SECONDS:-90}" + health_failure_started="" + + note_daemon_health_failure() { + local reason="$1" + if [[ -z "${health_failure_started}" ]]; then + health_failure_started="${SECONDS}" + fi + + if (( SECONDS - health_failure_started >= SIMDECK_DAEMON_HEALTH_GRACE_SECONDS )); then + echo "${reason} for ${SIMDECK_DAEMON_HEALTH_GRACE_SECONDS}s; stopping session." >&2 + cat simdeck-list-error.log >&2 || true + cat simdeck-health.log >&2 || true + cat simdeck-daemon.log >&2 || true + return 1 + fi + + echo "${reason}; waiting for SimDeck daemon supervisor to recover." + sleep 5 + return 0 + } while (( SECONDS < end )); do if [[ -f simdeck.pid ]] && ! kill -0 "$(cat simdeck.pid)" 2>/dev/null; then echo "SimDeck daemon process exited; stopping session." + cat simdeck-daemon.log >&2 || true exit 1 fi - list_json="$(simdeck --server-url "http://127.0.0.1:${SIMDECK_PORT}" list --format json)" + if ! list_json="$(simdeck --server-url "http://127.0.0.1:${SIMDECK_PORT}" list --format json 2>simdeck-list-error.log)"; then + note_daemon_health_failure "SimDeck daemon list check failed" || exit 1 + continue + fi if ! SIMDECK_LIST_JSON="${list_json}" python3 - "${udid}" <<'PY' import json import os @@ -647,7 +800,12 @@ runs: exit 0 fi - curl -fsS "http://127.0.0.1:${SIMDECK_PORT}/api/health?simdeckToken=${{ steps.stream.outputs.access_token }}" >/dev/null + if ! curl -fsS "http://127.0.0.1:${SIMDECK_PORT}/api/health?simdeckToken=${{ steps.stream.outputs.access_token }}" >/dev/null 2>simdeck-health.log; then + note_daemon_health_failure "SimDeck daemon health check failed" || exit 1 + continue + fi + + health_failure_started="" sleep 15 done @@ -668,7 +826,7 @@ runs: simdeck daemon stop - name: Update status comment at end - if: always() + if: always() && env.SIMDECK_SESSION_OPEN == '1' shell: bash run: | set -euo pipefail diff --git a/actions/run-ios-comment-session/action.yml b/actions/run-ios-comment-session/action.yml index 10c02629..1b2e3dd0 100644 --- a/actions/run-ios-comment-session/action.yml +++ b/actions/run-ios-comment-session/action.yml @@ -289,6 +289,36 @@ runs: exit 1 fi + cat > simdeck-daemon-supervisor.sh <<'EOF' + #!/usr/bin/env bash + set +e + terminating=0 + child="" + trap 'terminating=1; if [[ -n "${child}" ]]; then kill "${child}" 2>/dev/null; wait "${child}" 2>/dev/null; fi; exit 0' TERM INT HUP + + while true; do + "$@" >> simdeck-daemon.log 2>&1 & + child="$!" + echo "${child}" > simdeck-child.pid + wait "${child}" + status="$?" + child="" + + if [[ "${terminating}" -eq 1 ]]; then + exit 0 + fi + if [[ "${status}" -eq 75 || "${status}" -ge 128 ]]; then + echo "[simdeck-action-supervisor] daemon exited with status ${status}; restarting" >> simdeck-daemon.log + sleep 1 + continue + fi + + echo "[simdeck-action-supervisor] daemon exited with status ${status}; not restarting" >> simdeck-daemon.log + exit "${status}" + done + EOF + chmod +x simdeck-daemon-supervisor.sh + SIMDECK_VIDEO_CODEC=software \ SIMDECK_ALLOWED_ORIGINS='*' \ SIMDECK_REALTIME_STREAM=1 \ @@ -298,6 +328,7 @@ runs: SIMDECK_REALTIME_MIN_BITRATE="${stream_min_bitrate}" \ SIMDECK_REALTIME_BITS_PER_PIXEL="${stream_bits_per_pixel}" \ SIMDECK_LOCAL_STREAM_FPS="${stream_fps}" \ + ./simdeck-daemon-supervisor.sh \ simdeck daemon run \ --project-root "${GITHUB_WORKSPACE}" \ --metadata-path "${metadata_path}" \ @@ -307,8 +338,7 @@ runs: --stream-quality "${SIMDECK_STREAM_PROFILE}" \ --local-stream-fps "${stream_fps}" \ --access-token "${access_token}" \ - --pairing-code 000000 \ - > simdeck-daemon.log 2>&1 & + --pairing-code 000000 & echo "$!" > simdeck.pid cloudflared tunnel --url "http://127.0.0.1:${SIMDECK_PORT}" --protocol http2 --no-autoupdate > cloudflared.log 2>&1 & @@ -408,40 +438,106 @@ runs: sha="${{ steps.pr.outputs.sha }}" if [[ -n "${ARTIFACT_NAME_INPUT}" ]]; then artifact_name="${ARTIFACT_NAME_INPUT}" + artifact_candidates="${artifact_name}" else artifact_name="${ARTIFACT_PREFIX}-${sha}" + artifact_candidates="${artifact_name}" + if [[ "${ARTIFACT_PREFIX}" != "${artifact_name}" ]]; then + artifact_candidates+=$'\n'"${ARTIFACT_PREFIX}" + fi fi + printf '%s\n' "${artifact_candidates}" > /tmp/simdeck-artifact-candidates.txt + artifact_candidates_summary="$(python3 - <<'PY' + with open("/tmp/simdeck-artifact-candidates.txt", "r", encoding="utf-8") as handle: + names = [line.strip() for line in handle if line.strip()] + print(", ".join(f"`{name}`" for name in names)) + PY + )" + echo "SIMDECK_ARTIFACT_NAME=${artifact_name}" >> "${GITHUB_ENV}" + echo "SIMDECK_ARTIFACT_CANDIDATES=${artifact_candidates_summary}" >> "${GITHUB_ENV}" mkdir -p downloaded-app rm -f /tmp/simdeck-artifact-download.status app-download.log ( set +e { - run_id="" - for attempt in {1..30}; do - run_id="$(gh api -X GET "repos/${REPO}/actions/artifacts?name=${artifact_name}&per_page=100" \ - --jq '.artifacts[] | select(.expired == false) | .workflow_run.id' \ - | head -n 1 || true)" - - if [[ -z "${run_id}" && -n "${BUILD_WORKFLOW}" ]]; then - run_id="$(gh api --paginate "repos/${REPO}/actions/workflows/${BUILD_WORKFLOW}/runs?per_page=100" \ - --jq ".workflow_runs[] | select(.head_sha == \"${sha}\" and .conclusion == \"success\") | .id" \ - | head -n 1 || true)" + find_artifact_by_run() { + local candidate_run_id="$1" + gh api -X GET "repos/${REPO}/actions/runs/${candidate_run_id}/artifacts?per_page=100" | + python3 -c ' + import json + import sys + + with open("/tmp/simdeck-artifact-candidates.txt", "r", encoding="utf-8") as handle: + names = [line.strip() for line in handle if line.strip()] + + data = json.load(sys.stdin) + for name in names: + for artifact in data.get("artifacts", []): + if artifact.get("expired") is False and artifact.get("name") == name: + print(name) + raise SystemExit(0) + raise SystemExit(1) + ' || true + } + + find_artifact_record() { + local artifact_record artifact_name_for_run candidate candidate_run_id + + while IFS= read -r candidate; do + [[ -z "${candidate}" ]] && continue + artifact_record="$(gh api -X GET "repos/${REPO}/actions/artifacts?name=${candidate}&per_page=100" | + SIMDECK_ARTIFACT_SHA="${sha}" python3 -c ' + import json + import os + import sys + + data = json.load(sys.stdin) + sha = os.environ["SIMDECK_ARTIFACT_SHA"] + for artifact in data.get("artifacts", []): + run = artifact.get("workflow_run") or {} + if artifact.get("expired") is False and run.get("head_sha") == sha: + print("{}\t{}".format(run.get("id"), artifact.get("name"))) + break + ' || true)" + if [[ -n "${artifact_record}" ]]; then + printf '%s\n' "${artifact_record}" + return 0 + fi + done < /tmp/simdeck-artifact-candidates.txt + + if [[ -n "${BUILD_WORKFLOW}" ]]; then + while IFS= read -r candidate_run_id; do + [[ -z "${candidate_run_id}" ]] && continue + artifact_name_for_run="$(find_artifact_by_run "${candidate_run_id}")" + if [[ -n "${artifact_name_for_run}" ]]; then + printf '%s\t%s\n' "${candidate_run_id}" "${artifact_name_for_run}" + return 0 + fi + done < <(gh api --paginate "repos/${REPO}/actions/workflows/${BUILD_WORKFLOW}/runs?per_page=100" \ + --jq ".workflow_runs[] | select(.head_sha == \"${sha}\" and .conclusion == \"success\") | .id" || true) fi - if [[ -n "${run_id}" ]]; then - echo "Using build workflow run ${run_id} for ${sha}" - gh run download "${run_id}" --repo "${REPO}" --name "${artifact_name}" --dir downloaded-app + return 1 + } + + for attempt in {1..30}; do + artifact_record="$(find_artifact_record || true)" + + if [[ -n "${artifact_record}" ]]; then + IFS=$'\t' read -r run_id download_artifact_name <<< "${artifact_record}" + echo "Using build workflow run ${run_id} artifact '${download_artifact_name}' for ${sha}" + gh run download "${run_id}" --repo "${REPO}" --name "${download_artifact_name}" --dir downloaded-app exit_code="$?" echo "${exit_code}" > /tmp/simdeck-artifact-download.status exit "${exit_code}" fi - echo "Waiting for artifact '${artifact_name}' for PR head ${sha} (${attempt}/30)" + echo "Waiting for artifact (${artifact_candidates_summary}) for PR head ${sha} (${attempt}/30)" sleep 20 done - echo "No successful '${artifact_name}' artifact was found for PR head ${sha}." >&2 + echo "No successful unexpired artifact (${artifact_candidates_summary}) was found for PR head ${sha}." >&2 echo "1" > /tmp/simdeck-artifact-download.status exit 1 } > app-download.log 2>&1 @@ -643,6 +739,37 @@ runs: cat app-download.log status="$(cat /tmp/simdeck-artifact-download.status 2>/dev/null || echo 1)" if [[ "${status}" -ne 0 ]]; then + echo "SIMDECK_SESSION_START_FAILED=1" >> "${GITHUB_ENV}" + commit_sha="${{ steps.pr.outputs.sha }}" + artifact_candidates="${SIMDECK_ARTIFACT_CANDIDATES:-${SIMDECK_ARTIFACT_NAME:-${ARTIFACT_PREFIX}-${commit_sha}}}" + mention="" + if [[ -n "${COMMAND_COMMENT_AUTHOR:-}" ]]; then + mention="@${COMMAND_COMMENT_AUTHOR} " + fi + + cat > comment.md <<'EOF' + __MENTION__SimDeck iOS session could not start for commit `__COMMIT_SHA__`. + + No unexpired simulator app artifact was available for this PR head. Expected one of: __ARTIFACT_CANDIDATES__. + + Re-run the build workflow or push a new commit, then comment `simdeck run ios` again. + EOF + + body="$(cat comment.md)" + body="${body/__MENTION__/${mention}}" + body="${body/__COMMIT_SHA__/${commit_sha}}" + body="${body/__ARTIFACT_CANDIDATES__/${artifact_candidates}}" + for attempt in {1..5}; do + if [[ -n "${SIMDECK_STATUS_COMMENT_ID:-}" ]]; then + if gh api -X PATCH "repos/${REPO}/issues/comments/${SIMDECK_STATUS_COMMENT_ID}" -f body="${body}"; then + break + fi + elif comment_id="$(gh api "repos/${REPO}/issues/${PR_NUMBER}/comments" -f body="${body}" --jq '.id')"; then + echo "SIMDECK_STATUS_COMMENT_ID=${comment_id}" >> "${GITHUB_ENV}" + break + fi + sleep $((attempt * 5)) + done exit "${status}" fi @@ -724,6 +851,7 @@ runs: if [[ -f /tmp/sim-boot-start && -f /tmp/sim-boot-end ]]; then echo "Simulator boot took $(( $(cat /tmp/sim-boot-end) - $(cat /tmp/sim-boot-start) )) seconds." fi + echo "SIMDECK_SESSION_OPEN=1" >> "${GITHUB_ENV}" - name: Save CoreSimulator device cache if: env.SIMDECK_SIMULATOR_CACHE == '1' && steps.coresim-cache.outputs.cache-hit != 'true' @@ -760,10 +888,31 @@ runs: set -euo pipefail udid="${{ steps.simulator.outputs.udid }}" end=$((SECONDS + KEEPALIVE_SECONDS)) + SIMDECK_DAEMON_HEALTH_GRACE_SECONDS="${SIMDECK_DAEMON_HEALTH_GRACE_SECONDS:-90}" + health_failure_started="" + + note_daemon_health_failure() { + local reason="$1" + if [[ -z "${health_failure_started}" ]]; then + health_failure_started="${SECONDS}" + fi + + if (( SECONDS - health_failure_started >= SIMDECK_DAEMON_HEALTH_GRACE_SECONDS )); then + echo "${reason} for ${SIMDECK_DAEMON_HEALTH_GRACE_SECONDS}s; stopping session." >&2 + cat simdeck-health.log >&2 || true + cat simdeck-daemon.log >&2 || true + return 1 + fi + + echo "${reason}; waiting for SimDeck daemon supervisor to recover." + sleep 5 + return 0 + } while (( SECONDS < end )); do if [[ -f simdeck.pid ]] && ! kill -0 "$(cat simdeck.pid)" 2>/dev/null; then echo "SimDeck daemon process exited; stopping session." + cat simdeck-daemon.log >&2 || true exit 1 fi @@ -773,7 +922,12 @@ runs: exit 0 fi - curl -fsS "http://127.0.0.1:${SIMDECK_PORT}/api/health?simdeckToken=${{ steps.stream.outputs.access_token }}" >/dev/null + if ! curl -fsS "http://127.0.0.1:${SIMDECK_PORT}/api/health?simdeckToken=${{ steps.stream.outputs.access_token }}" >/dev/null 2>simdeck-health.log; then + note_daemon_health_failure "SimDeck daemon health check failed" || exit 1 + continue + fi + + health_failure_started="" sleep 15 done @@ -794,7 +948,7 @@ runs: fi - name: Update status comment at end - if: always() + if: always() && env.SIMDECK_SESSION_OPEN == '1' shell: bash run: | set -euo pipefail diff --git a/scripts/github-actions.test.mjs b/scripts/github-actions.test.mjs index 8a974e08..22ee69f2 100644 --- a/scripts/github-actions.test.mjs +++ b/scripts/github-actions.test.mjs @@ -6,6 +6,24 @@ const iosAction = readFileSync( new URL("../actions/run-ios-comment-session/action.yml", import.meta.url), "utf8", ); +const androidAction = readFileSync( + new URL("../actions/run-android-comment-session/action.yml", import.meta.url), + "utf8", +); + +function indexOfStep(action, name) { + const index = action.indexOf(`- name: ${name}`); + assert.notEqual(index, -1, `${name} step should exist`); + return index; +} + +function stepSlice(action, name, nextName) { + const startIndex = indexOfStep(action, name); + const endIndex = + nextName === undefined ? action.length : indexOfStep(action, nextName); + assert(endIndex > startIndex, `${nextName} should run after ${name}`); + return action.slice(startIndex, endIndex); +} test("iOS PR comment waits for public simulator list access", () => { const prebootIndex = iosAction.indexOf( @@ -50,3 +68,142 @@ test("iOS PR comment waits for public simulator list access", () => { "readiness check should require the selected simulator to be booted", ); }); + +for (const [platform, action, startStep, waitStep] of [ + [ + "iOS", + iosAction, + "Start simulator artifact download", + "Wait for simulator artifact download", + ], + [ + "Android", + androidAction, + "Start APK artifact download", + "Wait for APK artifact download", + ], +]) { + test(`${platform} PR comment resolves an actual matching artifact before download`, () => { + const artifactStep = stepSlice(action, startStep, waitStep); + + assert.match( + artifactStep, + /artifact_candidates\+=\$'\\n'"\$\{ARTIFACT_PREFIX\}"/, + "default artifact lookup should include legacy prefix-only artifacts", + ); + assert.match( + artifactStep, + /run\.get\("head_sha"\) == sha/, + "repository artifact lookup should match the PR head SHA", + ); + assert.match( + artifactStep, + /find_artifact_by_run/, + "workflow-run fallback should inspect the run's artifacts", + ); + assert.match( + artifactStep, + /--name "\$\{download_artifact_name\}"/, + "download should use the artifact name that was actually found", + ); + assert.doesNotMatch( + artifactStep, + /gh run download "\$\{run_id\}" --repo "\$\{REPO\}" --name "\$\{artifact_name\}"/, + "workflow-run fallback must not assume the generated artifact name exists", + ); + }); + + test(`${platform} PR comment reports artifact startup failure explicitly`, () => { + const waitStepBody = stepSlice(action, waitStep, "Install and launch"); + + assert.match( + waitStepBody, + /SIMDECK_SESSION_START_FAILED=1/, + "artifact failure should mark startup failure", + ); + assert.match( + waitStepBody, + /session could not start for commit/, + "artifact failure comment should not read like a completed session", + ); + assert.match( + waitStepBody, + /No unexpired .* artifact was available/, + "artifact failure comment should explain the missing or expired artifact", + ); + }); + + test(`${platform} PR comment only posts ended status after app launch`, () => { + const launchIndex = indexOfStep(action, "Install and launch"); + const sessionOpenIndex = action.indexOf("SIMDECK_SESSION_OPEN=1"); + const finalStep = stepSlice(action, "Update status comment at end"); + + assert( + sessionOpenIndex > launchIndex, + "session should only be marked open after the app is launched", + ); + assert.match( + finalStep, + /if: always\(\) && env\.SIMDECK_SESSION_OPEN == '1'/, + "ended status should only run for sessions that opened", + ); + }); + + test(`${platform} PR comment supervises recoverable daemon exits`, () => { + const startStepBody = stepSlice( + action, + "Install tools, start SimDeck and tunnel", + "Resolve PR head", + ); + + assert.match( + startStepBody, + /simdeck-daemon-supervisor\.sh/, + "action should run SimDeck through a local supervisor", + ); + assert.match( + startStepBody, + /"\$\{status\}" -eq 75/, + "supervisor should restart recoverable SimDeck exits", + ); + assert.match( + startStepBody, + /"\$\{status\}" -ge 128/, + "supervisor should restart signal-terminated daemon children", + ); + assert.match( + startStepBody, + /simdeck-child\.pid/, + "supervisor should expose the active child pid for cleanup diagnostics", + ); + }); + + test(`${platform} PR comment keepalive tolerates transient daemon restarts`, () => { + const keepaliveStepBody = stepSlice( + action, + "Keep session alive", + "Stop session", + ); + + assert.match( + keepaliveStepBody, + /SIMDECK_DAEMON_HEALTH_GRACE_SECONDS/, + "keepalive should have a grace window for daemon restarts", + ); + assert.match( + keepaliveStepBody, + /health_failure_started/, + "keepalive should track continuous daemon health failures", + ); + assert.match( + keepaliveStepBody, + /cat simdeck-daemon\.log/, + "keepalive should print daemon logs when the grace window expires", + ); + assert.match( + keepaliveStepBody, + /continue/, + "keepalive should continue polling after transient daemon failures", + ); + }); +} From c91c8aacc884e98499c5831b9bf3216b5eb1a182 Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Wed, 20 May 2026 19:48:48 -0400 Subject: [PATCH 2/2] fix: stabilize PR comment sessions --- .../run-android-comment-session/action.yml | 34 +---- actions/run-ios-comment-session/action.yml | 34 +---- bin/simdeck.mjs | 63 ++++++--- scripts/github-actions.test.mjs | 132 +++++++++++++++--- 4 files changed, 163 insertions(+), 100 deletions(-) diff --git a/actions/run-android-comment-session/action.yml b/actions/run-android-comment-session/action.yml index 0b5489e1..cc13fceb 100644 --- a/actions/run-android-comment-session/action.yml +++ b/actions/run-android-comment-session/action.yml @@ -324,36 +324,6 @@ runs: exit 1 fi - cat > simdeck-daemon-supervisor.sh <<'EOF' - #!/usr/bin/env bash - set +e - terminating=0 - child="" - trap 'terminating=1; if [[ -n "${child}" ]]; then kill "${child}" 2>/dev/null; wait "${child}" 2>/dev/null; fi; exit 0' TERM INT HUP - - while true; do - "$@" >> simdeck-daemon.log 2>&1 & - child="$!" - echo "${child}" > simdeck-child.pid - wait "${child}" - status="$?" - child="" - - if [[ "${terminating}" -eq 1 ]]; then - exit 0 - fi - if [[ "${status}" -eq 75 || "${status}" -ge 128 ]]; then - echo "[simdeck-action-supervisor] daemon exited with status ${status}; restarting" >> simdeck-daemon.log - sleep 1 - continue - fi - - echo "[simdeck-action-supervisor] daemon exited with status ${status}; not restarting" >> simdeck-daemon.log - exit "${status}" - done - EOF - chmod +x simdeck-daemon-supervisor.sh - SIMDECK_VIDEO_CODEC=software \ SIMDECK_ANDROID_VIDEO_CODEC=software \ SIMDECK_ALLOWED_ORIGINS='*' \ @@ -364,7 +334,6 @@ runs: SIMDECK_REALTIME_MIN_BITRATE="${stream_min_bitrate}" \ SIMDECK_REALTIME_BITS_PER_PIXEL="${stream_bits_per_pixel}" \ SIMDECK_LOCAL_STREAM_FPS="${stream_fps}" \ - ./simdeck-daemon-supervisor.sh \ simdeck daemon run \ --project-root "${GITHUB_WORKSPACE}" \ --metadata-path "${metadata_path}" \ @@ -374,7 +343,8 @@ runs: --stream-quality "${SIMDECK_STREAM_PROFILE}" \ --local-stream-fps "${stream_fps}" \ --access-token "${access_token}" \ - --pairing-code 000000 & + --pairing-code 000000 \ + > simdeck-daemon.log 2>&1 & echo "$!" > simdeck.pid cloudflared tunnel --url "http://127.0.0.1:${SIMDECK_PORT}" --protocol http2 --no-autoupdate > cloudflared.log 2>&1 & diff --git a/actions/run-ios-comment-session/action.yml b/actions/run-ios-comment-session/action.yml index 1b2e3dd0..2e52a00f 100644 --- a/actions/run-ios-comment-session/action.yml +++ b/actions/run-ios-comment-session/action.yml @@ -289,36 +289,6 @@ runs: exit 1 fi - cat > simdeck-daemon-supervisor.sh <<'EOF' - #!/usr/bin/env bash - set +e - terminating=0 - child="" - trap 'terminating=1; if [[ -n "${child}" ]]; then kill "${child}" 2>/dev/null; wait "${child}" 2>/dev/null; fi; exit 0' TERM INT HUP - - while true; do - "$@" >> simdeck-daemon.log 2>&1 & - child="$!" - echo "${child}" > simdeck-child.pid - wait "${child}" - status="$?" - child="" - - if [[ "${terminating}" -eq 1 ]]; then - exit 0 - fi - if [[ "${status}" -eq 75 || "${status}" -ge 128 ]]; then - echo "[simdeck-action-supervisor] daemon exited with status ${status}; restarting" >> simdeck-daemon.log - sleep 1 - continue - fi - - echo "[simdeck-action-supervisor] daemon exited with status ${status}; not restarting" >> simdeck-daemon.log - exit "${status}" - done - EOF - chmod +x simdeck-daemon-supervisor.sh - SIMDECK_VIDEO_CODEC=software \ SIMDECK_ALLOWED_ORIGINS='*' \ SIMDECK_REALTIME_STREAM=1 \ @@ -328,7 +298,6 @@ runs: SIMDECK_REALTIME_MIN_BITRATE="${stream_min_bitrate}" \ SIMDECK_REALTIME_BITS_PER_PIXEL="${stream_bits_per_pixel}" \ SIMDECK_LOCAL_STREAM_FPS="${stream_fps}" \ - ./simdeck-daemon-supervisor.sh \ simdeck daemon run \ --project-root "${GITHUB_WORKSPACE}" \ --metadata-path "${metadata_path}" \ @@ -338,7 +307,8 @@ runs: --stream-quality "${SIMDECK_STREAM_PROFILE}" \ --local-stream-fps "${stream_fps}" \ --access-token "${access_token}" \ - --pairing-code 000000 & + --pairing-code 000000 \ + > simdeck-daemon.log 2>&1 & echo "$!" > simdeck.pid cloudflared tunnel --url "http://127.0.0.1:${SIMDECK_PORT}" --protocol http2 --no-autoupdate > cloudflared.log 2>&1 & diff --git a/bin/simdeck.mjs b/bin/simdeck.mjs index 0415375b..71c99b0c 100755 --- a/bin/simdeck.mjs +++ b/bin/simdeck.mjs @@ -5,11 +5,15 @@ import { existsSync } from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; +const RECOVERABLE_RESTART_EXIT_CODE = 75; + const packageRoot = path.resolve( path.dirname(fileURLToPath(import.meta.url)), "..", ); const binaryPath = path.join(packageRoot, "build", "simdeck-bin"); +const childArgs = process.argv.slice(2); +const isDaemonRun = childArgs[0] === "daemon" && childArgs[1] === "run"; if (process.platform !== "darwin") { console.error("simdeck only supports macOS."); @@ -23,28 +27,53 @@ if (!existsSync(binaryPath)) { process.exit(1); } -const child = spawn(binaryPath, process.argv.slice(2), { - cwd: process.cwd(), - stdio: "inherit", -}); - -child.on("error", (error) => { - console.error(error.message); - process.exit(1); -}); +let child; +let terminating = false; for (const signal of ["SIGINT", "SIGTERM", "SIGHUP"]) { process.once(signal, () => { - if (!child.killed) { + terminating = true; + if (child && !child.killed) { child.kill(signal); } }); } -child.on("exit", (code, signal) => { - if (signal) { - process.kill(process.pid, signal); - return; - } - process.exit(code ?? 1); -}); +function spawnChild() { + const env = isDaemonRun + ? { + ...process.env, + SIMDECK_DAEMON_METADATA_PID: String(process.pid), + } + : process.env; + + child = spawn(binaryPath, childArgs, { + cwd: process.cwd(), + env, + stdio: "inherit", + }); + + child.on("error", (error) => { + console.error(error.message); + process.exit(1); + }); + + child.on("exit", (code, signal) => { + if ( + isDaemonRun && + !terminating && + (code === RECOVERABLE_RESTART_EXIT_CODE || signal) + ) { + setTimeout(spawnChild, 500); + return; + } + + if (signal) { + process.kill(process.pid, signal); + return; + } + process.exit(code ?? 1); + }); +} + +spawnChild(); diff --git a/scripts/github-actions.test.mjs b/scripts/github-actions.test.mjs index 22ee69f2..a7193690 100644 --- a/scripts/github-actions.test.mjs +++ b/scripts/github-actions.test.mjs @@ -1,5 +1,16 @@ import assert from "node:assert/strict"; -import { readFileSync } from "node:fs"; +import { + chmodSync, + copyFileSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { spawnSync } from "node:child_process"; import { test } from "node:test"; const iosAction = readFileSync( @@ -25,6 +36,8 @@ function stepSlice(action, name, nextName) { return action.slice(startIndex, endIndex); } +const darwinTest = process.platform === "darwin" ? test : test.skip; + test("iOS PR comment waits for public simulator list access", () => { const prebootIndex = iosAction.indexOf( "- name: Select and preboot simulator", @@ -149,33 +162,20 @@ for (const [platform, action, startStep, waitStep] of [ ); }); - test(`${platform} PR comment supervises recoverable daemon exits`, () => { + test(`${platform} PR comment relies on the packaged CLI daemon supervisor`, () => { const startStepBody = stepSlice( action, "Install tools, start SimDeck and tunnel", "Resolve PR head", ); - assert.match( + assert.doesNotMatch( startStepBody, /simdeck-daemon-supervisor\.sh/, - "action should run SimDeck through a local supervisor", - ); - assert.match( - startStepBody, - /"\$\{status\}" -eq 75/, - "supervisor should restart recoverable SimDeck exits", - ); - assert.match( - startStepBody, - /"\$\{status\}" -ge 128/, - "supervisor should restart signal-terminated daemon children", - ); - assert.match( - startStepBody, - /simdeck-child\.pid/, - "supervisor should expose the active child pid for cleanup diagnostics", + "action should not carry a second workflow-local daemon supervisor", ); + assert.match(startStepBody, /simdeck daemon run/); + assert.match(startStepBody, /echo "\$!" > simdeck\.pid/); }); test(`${platform} PR comment keepalive tolerates transient daemon restarts`, () => { @@ -207,3 +207,97 @@ for (const [platform, action, startStep, waitStep] of [ ); }); } + +darwinTest( + "npm CLI wrapper restarts daemon run after recoverable native exit", + () => { + const root = mkdtempSync(join(tmpdir(), "simdeck-wrapper-test-")); + try { + mkdirSync(join(root, "bin"), { recursive: true }); + mkdirSync(join(root, "build"), { recursive: true }); + const wrapperPath = join(root, "bin", "simdeck.mjs"); + const nativePath = join(root, "build", "simdeck-bin"); + const logPath = join(root, "native.log"); + const countPath = join(root, "count"); + + copyFileSync(new URL("../bin/simdeck.mjs", import.meta.url), wrapperPath); + chmodSync(wrapperPath, 0o755); + writeFileSync( + nativePath, + `#!/usr/bin/env bash +set -euo pipefail +count="$(cat "${countPath}" 2>/dev/null || echo 0)" +count="$((count + 1))" +echo "$count" > "${countPath}" +echo "$$:\${SIMDECK_DAEMON_METADATA_PID:-}:\$*" >> "${logPath}" +if [[ "$count" == "1" ]]; then + exit 75 +fi +exit 0 +`, + ); + chmodSync(nativePath, 0o755); + + const result = spawnSync( + process.execPath, + [wrapperPath, "daemon", "run", "--port", "4310"], + { + encoding: "utf8", + }, + ); + + assert.equal(result.status, 0, result.stderr); + const logLines = readFileSync(logPath, "utf8").trim().split("\n"); + assert.equal(logLines.length, 2, "daemon run should be retried once"); + + const entries = logLines.map((line) => { + const [pid, metadataPid, args] = line.split(":"); + return { pid, metadataPid, args }; + }); + assert.notEqual(entries[0].pid, entries[1].pid); + assert.match(entries[0].metadataPid, /^\d+$/); + assert.equal(entries[0].metadataPid, entries[1].metadataPid); + assert.notEqual(entries[0].pid, entries[0].metadataPid); + assert.equal(entries[0].args, "daemon run --port 4310"); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }, +); + +darwinTest( + "npm CLI wrapper does not restart non-daemon commands on exit 75", + () => { + const root = mkdtempSync(join(tmpdir(), "simdeck-wrapper-test-")); + try { + mkdirSync(join(root, "bin"), { recursive: true }); + mkdirSync(join(root, "build"), { recursive: true }); + const wrapperPath = join(root, "bin", "simdeck.mjs"); + const nativePath = join(root, "build", "simdeck-bin"); + const logPath = join(root, "native.log"); + + copyFileSync(new URL("../bin/simdeck.mjs", import.meta.url), wrapperPath); + chmodSync(wrapperPath, 0o755); + writeFileSync( + nativePath, + `#!/usr/bin/env bash +set -euo pipefail +echo "$$:\${SIMDECK_DAEMON_METADATA_PID:-}:\$*" >> "${logPath}" +exit 75 +`, + ); + chmodSync(nativePath, 0o755); + + const result = spawnSync(process.execPath, [wrapperPath, "list"], { + encoding: "utf8", + }); + + assert.equal(result.status, 75); + const logLines = readFileSync(logPath, "utf8").trim().split("\n"); + assert.equal(logLines.length, 1); + assert.equal(logLines[0].split(":")[1], ""); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }, +);