diff --git a/.msd/autofix-findings-summary.txt b/.msd/autofix-findings-summary.txt
new file mode 100644
index 000000000..bbcc95c4e
--- /dev/null
+++ b/.msd/autofix-findings-summary.txt
@@ -0,0 +1,10 @@
+1. [MEDIUM] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+2. [MEDIUM] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+3. [LOW] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+4. [LOW] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+5. [LOW] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+6. [LOW] packages/sdk/src/runner.ts — packages/sdk/src/runner.ts
+7. [LOW] packages/sdk/src/cli.ts — packages/sdk/src/cli.ts
+8. [LOW] packages/sdk/src/cli.ts — packages/sdk/src/cli.ts
+9. [LOW] packages/sdk/src/file-db.ts — packages/sdk/src/file-db.ts
+10. [LOW] packages/sdk/src/file-db.ts — packages/sdk/src/file-db.ts
diff --git a/.msd/autofix-plan.json b/.msd/autofix-plan.json
new file mode 100644
index 000000000..7a78bf66d
--- /dev/null
+++ b/.msd/autofix-plan.json
@@ -0,0 +1,49 @@
+{
+  "groups": [
+    {
+      "id": "group-1",
+      "label": "runner.ts reliability and readability fixes",
+      "domain": "reliability",
+      "findings": [
+        "packages/sdk/src/runner.ts-Silent failure: optional config param on resume()-historian-review, developer-cross-review, security-cross-review-medium",
+        "packages/sdk/src/runner.ts-Synthetic timestamps in reconstructed runs-historian-review, developer-cross-review-medium",
+        "packages/sdk/src/runner.ts-Path traversal via unsanitized runId-security-review, all cross-reviewers-low",
+        "packages/sdk/src/runner.ts-failedStepName heuristic asymmetry with startFrom-historian-review, developer-cross-review-low",
+        "packages/sdk/src/runner.ts-.report.json files invisible to cache reconstruction-historian-review, developer-cross-review-low",
+        "packages/sdk/src/runner.ts-Nested ternary workflow selection readability-developer-review, all cross-reviewers-low"
+      ],
+      "files": [
+        "packages/sdk/src/runner.ts"
+      ],
+      "rationale": "All 6 findings target runner.ts — grouping avoids file conflicts and lets one worker handle related resume/reconstruction logic"
+    },
+    {
+      "id": "group-2",
+      "label": "cli.ts flag parsing and config version fixes",
+      "domain": "code-quality",
+      "findings": [
+        "packages/sdk/src/cli.ts-String matching brittleness for flag parsing-developer-review, historian-cross-review-low",
+        "packages/sdk/src/cli.ts-Config version mismatch during resume-historian-cross-review-low"
+      ],
+      "files": [
+        "packages/sdk/src/cli.ts"
+      ],
+      "rationale": "Both findings target cli.ts — same file, related CLI parsing concerns"
+    },
+    {
+      "id": "group-3",
+      "label": "file-db.ts output validation and error handling",
+      "domain": "reliability",
+      "findings": [
+        "packages/sdk/src/file-db.ts-hasStepOutputs false positive potential-historian-review-low",
+        "packages/sdk/src/file-db.ts-File path disclosure in append() catch — DISPUTED NON-ISSUE-security-review (disputed by security-cross-review, developer-cross-review)-low"
+      ],
+      "files": [
+        "packages/sdk/src/file-db.ts"
+      ],
+      "rationale": "Both findings target file-db.ts — same file, related data integrity concerns"
+    }
+  ],
+  "totalGroups": 3,
+  "conflictCheck": "no file appears in multiple groups"
+}
\ No newline at end of file
diff --git a/.trajectories/active/traj_1774617159310_87c5f71c.json b/.trajectories/active/traj_1774617159310_87c5f71c.json
new file mode 100644
index 000000000..01569663d
--- /dev/null
+++ b/.trajectories/active/traj_1774617159310_87c5f71c.json
@@ -0,0 +1,406 @@
+{
+  "id": "traj_1774617159310_87c5f71c",
+  "version": 1,
+  "task": {
+    "title": "autofix-swarm-Agentworkforce-relay-workflow",
+    "source": {
+      "system": "workflow-runner",
+      "id": "8f29a399f4d82a7d88bf39bb"
+    }
+  },
+  "status": "active",
+  "startedAt": "2026-03-27T13:12:39.310Z",
+  "agents": [
+    {
+      "name": "orchestrator",
+      "role": "workflow-runner",
+      "joinedAt": "2026-03-27T13:12:39.310Z"
+    },
+    {
+      "name": "lead",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:12:43.433Z"
+    },
+    {
+      "name": "fix-worker-1",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:14:23.398Z"
+    },
+    {
+      "name": "fix-worker-2",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:14:25.399Z"
+    },
+    {
+      "name": "fix-worker-3",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:14:27.399Z"
+    },
+    {
+      "name": "fix-worker-4",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:14:29.399Z"
+    },
+    {
+      "name": "verifier",
+      "role": "specialist",
+      "joinedAt": "2026-03-27T13:18:31.200Z"
+    }
+  ],
+  "chapters": [
+    {
+      "id": "ch_47b43958",
+      "title": "Planning",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-27T13:12:39.310Z",
+      "events": [
+        {
+          "ts": 1774617159310,
+          "type": "note",
+          "content": "Purpose: Swarm autofix: 10 findings for Agentworkforce/relay (source: pr_review)"
+        },
+        {
+          "ts": 1774617159310,
+          "type": "note",
+          "content": "Approach: 14-step dag workflow — Parsed 14 steps, 2 parallel tracks, 12 dependent steps, DAG validated, no cycles"
+        }
+      ],
+      "endedAt": "2026-03-27T13:12:43.285Z"
+    },
+    {
+      "id": "ch_0ffd568d",
+      "title": "Execution: init-msd-dir, read-context",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-27T13:12:43.285Z",
+      "events": [],
+      "endedAt": "2026-03-27T13:12:43.414Z"
+    },
+    {
+      "id": "ch_181743b4",
+      "title": "Convergence: init-msd-dir + read-context",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-27T13:12:43.414Z",
+      "events": [
+        {
+          "ts": 1774617163415,
+          "type": "reflection",
+          "content": "init-msd-dir + read-context resolved. 2/2 steps completed. All steps completed on first attempt. Unblocking: write-findings, plan.",
+          "significance": "high",
+          "raw": {
+            "confidence": 0.75,
+            "focalPoints": [
+              "init-msd-dir: completed",
+              "read-context: completed"
+            ]
+          }
+        }
+      ],
+      "endedAt": "2026-03-27T13:12:43.433Z"
+    },
+    {
+      "id": "ch_df21ba53",
+      "title": "Execution: plan",
+      "agentName": "lead",
+      "startedAt": "2026-03-27T13:12:43.433Z",
+      "events": [
+        {
+          "ts": 1774617163433,
+          "type": "note",
+          "content": "\"plan\": You are the LEAD ARCHITECT for an autofix session on Agentworkforce/relay",
+          "raw": {
+            "agent": "lead"
+          }
+        },
+        {
+          "ts": 1774617263325,
+          "type": "completion-marker",
+          "content": "\"plan\" marker-based completion — Legacy STEP_COMPLETE marker observed (6 signal(s), 1 relevant channel post(s), 3 file change(s); signals=plan, COMPLETE, >0q>4m<u▗ ▗   ▖ ▖  Claude Code v2.1.85, Verification passed, plan, COMPLETE; channel=**[plan] Output:**\n```\nnate immediately after \n finishing. Do NOT spawn sub-agents unless the task explicitly requires it.     \n     rr ng\n            (thinking; files=modified:.claude/settings.json, created:.claude/settings.local.json, created:.msd/autofix-plan.json)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "plan",
+            "completionMode": "marker",
+            "reason": "Legacy STEP_COMPLETE marker observed",
+            "evidence": {
+              "summary": "6 signal(s), 1 relevant channel post(s), 3 file change(s)",
+              "signals": [
+                "plan",
+                "COMPLETE",
+                ">0q>4m<u▗ ▗   ▖ ▖  Claude Code v2.1.85",
+                "Verification passed",
+                "plan",
+                "COMPLETE"
+              ],
+              "channelPosts": [
+                "**[plan] Output:**\n```\nnate immediately after \n finishing. Do NOT spawn sub-agents unless the task explicitly requires it.     \n     rr ng\n            (thinking"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "created:.claude/settings.local.json",
+                "created:.msd/autofix-plan.json"
+              ]
+            }
+          }
+        },
+        {
+          "ts": 1774617263325,
+          "type": "finding",
+          "content": "\"plan\" completed → >0q>4m<u▗ ▗   ▖ ▖  Claude Code v2.1.85\r\r\n           Opus 4.6 (1M context) · Claude Max\r\r\n  ▘▘ ▝▝    ~/Projects/AgentWork",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-27T13:14:23.398Z"
+    },
+    {
+      "id": "ch_9a4d3319",
+      "title": "Execution: fix-worker-1-step, fix-worker-2-step, fix-worker-3-step, fix-worker-4-step",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-27T13:14:23.398Z",
+      "events": [],
+      "endedAt": "2026-03-27T13:14:23.399Z"
+    },
+    {
+      "id": "ch_cbb52163",
+      "title": "Execution: fix-worker-1-step",
+      "agentName": "fix-worker-1",
+      "startedAt": "2026-03-27T13:14:23.399Z",
+      "events": [
+        {
+          "ts": 1774617263399,
+          "type": "note",
+          "content": "\"fix-worker-1-step\": You are fix-worker-1",
+          "raw": {
+            "agent": "fix-worker-1"
+          }
+        }
+      ],
+      "endedAt": "2026-03-27T13:14:25.400Z"
+    },
+    {
+      "id": "ch_b1834273",
+      "title": "Execution: fix-worker-2-step",
+      "agentName": "fix-worker-2",
+      "startedAt": "2026-03-27T13:14:25.400Z",
+      "events": [
+        {
+          "ts": 1774617265400,
+          "type": "note",
+          "content": "\"fix-worker-2-step\": You are fix-worker-2",
+          "raw": {
+            "agent": "fix-worker-2"
+          }
+        }
+      ],
+      "endedAt": "2026-03-27T13:14:27.399Z"
+    },
+    {
+      "id": "ch_98e907e4",
+      "title": "Execution: fix-worker-3-step",
+      "agentName": "fix-worker-3",
+      "startedAt": "2026-03-27T13:14:27.399Z",
+      "events": [
+        {
+          "ts": 1774617267400,
+          "type": "note",
+          "content": "\"fix-worker-3-step\": You are fix-worker-3",
+          "raw": {
+            "agent": "fix-worker-3"
+          }
+        }
+      ],
+      "endedAt": "2026-03-27T13:14:29.400Z"
+    },
+    {
+      "id": "ch_e66952b2",
+      "title": "Execution: fix-worker-4-step",
+      "agentName": "fix-worker-4",
+      "startedAt": "2026-03-27T13:14:29.400Z",
+      "events": [
+        {
+          "ts": 1774617269400,
+          "type": "note",
+          "content": "\"fix-worker-4-step\": You are fix-worker-4",
+          "raw": {
+            "agent": "fix-worker-4"
+          }
+        },
+        {
+          "ts": 1774617278478,
+          "type": "completion-evidence",
+          "content": "\"fix-worker-4-step\" verification-based completion — Verification passed (2 signal(s), 1 file change(s), exit=0; signals=0, Verification passed; files=modified:.claude/settings.json; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "fix-worker-4-step",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "2 signal(s), 1 file change(s), exit=0",
+              "signals": [
+                "0",
+                "Verification passed"
+              ],
+              "files": [
+                "modified:.claude/settings.json"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774617278478,
+          "type": "finding",
+          "content": "\"fix-worker-4-step\" completed → No group exists at index 3. The plan contains only 3 groups (indices 0–2). Nothing to do — exiting.",
+          "significance": "medium"
+        },
+        {
+          "ts": 1774617355450,
+          "type": "completion-evidence",
+          "content": "\"fix-worker-3-step\" verification-based completion — Verification passed (4 signal(s), 1 relevant channel post(s), 3 file change(s), exit=0; signals=0, The fix is clean and correct. Here's my summary:, Verification passed, **[fix-worker-3-step] Output:**; channel=**[fix-worker-3-step] Output:**\n```\nThe fix is clean and correct. Here's my summary:\n## fix-worker-3: group-3 (`file-db.ts`) — Completed\n### File modified\n- `pa; files=modified:.claude/settings.json, modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "fix-worker-3-step",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "4 signal(s), 1 relevant channel post(s), 3 file change(s), exit=0",
+              "signals": [
+                "0",
+                "The fix is clean and correct. Here's my summary:",
+                "Verification passed",
+                "**[fix-worker-3-step] Output:**"
+              ],
+              "channelPosts": [
+                "**[fix-worker-3-step] Output:**\n```\nThe fix is clean and correct. Here's my summary:\n## fix-worker-3: group-3 (`file-db.ts`) — Completed\n### File modified\n- `pa"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774617355450,
+          "type": "finding",
+          "content": "\"fix-worker-3-step\" completed → The fix is clean and correct. Here's my summary:\n\n---\n\n## fix-worker-3: group-3 (`file-db.ts`) — Completed\n\n### File mod",
+          "significance": "medium"
+        },
+        {
+          "ts": 1774617411093,
+          "type": "completion-evidence",
+          "content": "\"fix-worker-2-step\" verification-based completion — Verification passed (4 signal(s), 1 relevant channel post(s), 4 file change(s), exit=0; signals=0, File parses cleanly. Here's the summary:, Verification passed, **[fix-worker-2-step] Output:**; channel=**[fix-worker-2-step] Output:**\n```\nFile parses cleanly. Here's the summary:\n## Fix Summary: group-2 — cli.ts flag parsing and config version fixes\n### File mod; files=modified:.claude/settings.json, modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts, modified:packages/sdk/src/workflows/runner.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "fix-worker-2-step",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "4 signal(s), 1 relevant channel post(s), 4 file change(s), exit=0",
+              "signals": [
+                "0",
+                "File parses cleanly. Here's the summary:",
+                "Verification passed",
+                "**[fix-worker-2-step] Output:**"
+              ],
+              "channelPosts": [
+                "**[fix-worker-2-step] Output:**\n```\nFile parses cleanly. Here's the summary:\n## Fix Summary: group-2 — cli.ts flag parsing and config version fixes\n### File mod"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts",
+                "modified:packages/sdk/src/workflows/runner.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774617411093,
+          "type": "finding",
+          "content": "\"fix-worker-2-step\" completed → - Only `packages/sdk/src/workflows/cli.ts` was modified (within group-2 scope)",
+          "significance": "medium"
+        },
+        {
+          "ts": 1774617510623,
+          "type": "completion-evidence",
+          "content": "\"fix-worker-1-step\" verification-based completion — Verification passed (4 signal(s), 1 relevant channel post(s), 5 file change(s), exit=0; signals=0, All changes compile cleanly., Verification passed, **[fix-worker-1-step] Output:**; channel=**[fix-worker-1-step] Output:**\n```\nAll changes compile cleanly.\n## Summary of fixes applied to `packages/sdk/src/workflows/runner.ts`\n### Finding 1 (MEDIUM): S; files=modified:.claude/settings.json, modified:package-lock.json, modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts, modified:packages/sdk/src/workflows/runner.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "fix-worker-1-step",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "4 signal(s), 1 relevant channel post(s), 5 file change(s), exit=0",
+              "signals": [
+                "0",
+                "All changes compile cleanly.",
+                "Verification passed",
+                "**[fix-worker-1-step] Output:**"
+              ],
+              "channelPosts": [
+                "**[fix-worker-1-step] Output:**\n```\nAll changes compile cleanly.\n## Summary of fixes applied to `packages/sdk/src/workflows/runner.ts`\n### Finding 1 (MEDIUM): S"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "modified:package-lock.json",
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts",
+                "modified:packages/sdk/src/workflows/runner.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774617510623,
+          "type": "finding",
+          "content": "\"fix-worker-1-step\" completed → All changes compile cleanly.\n\n---\n\n## Summary of fixes applied to `packages/sdk/src/workflows/runner.ts`\n\n### Finding 1 ",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-27T13:18:30.626Z"
+    },
+    {
+      "id": "ch_ad0f036b",
+      "title": "Convergence: fix-worker-1-step + fix-worker-2-step + fix-worker-3-step + fix-worker-4-step",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-27T13:18:30.626Z",
+      "events": [
+        {
+          "ts": 1774617510627,
+          "type": "reflection",
+          "content": "fix-worker-1-step + fix-worker-2-step + fix-worker-3-step + fix-worker-4-step resolved. 4/4 steps completed. All steps completed on first attempt. Unblocking: check-files.",
+          "significance": "high",
+          "raw": {
+            "confidence": 1,
+            "focalPoints": [
+              "fix-worker-1-step: completed",
+              "fix-worker-2-step: completed",
+              "fix-worker-3-step: completed",
+              "fix-worker-4-step: completed"
+            ]
+          }
+        }
+      ],
+      "endedAt": "2026-03-27T13:18:31.201Z"
+    },
+    {
+      "id": "ch_183321ae",
+      "title": "Execution: verify-and-finalize",
+      "agentName": "verifier",
+      "startedAt": "2026-03-27T13:18:31.201Z",
+      "events": [
+        {
+          "ts": 1774617511201,
+          "type": "note",
+          "content": "\"verify-and-finalize\": You are the VERIFIER for an autofix session",
+          "raw": {
+            "agent": "verifier"
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/.trajectories/completed/2026-03/traj_4ghb800vy5ti.json b/.trajectories/completed/2026-03/traj_4ghb800vy5ti.json
new file mode 100644
index 000000000..dd722a325
--- /dev/null
+++ b/.trajectories/completed/2026-03/traj_4ghb800vy5ti.json
@@ -0,0 +1,53 @@
+{
+  "id": "traj_4ghb800vy5ti",
+  "version": 1,
+  "task": {
+    "title": "Write tests for resume fallback behavior"
+  },
+  "status": "completed",
+  "startedAt": "2026-03-26T22:14:28.964Z",
+  "agents": [
+    {
+      "name": "default",
+      "role": "lead",
+      "joinedAt": "2026-03-26T22:17:39.363Z"
+    }
+  ],
+  "chapters": [
+    {
+      "id": "chap_8962n005gjuj",
+      "title": "Work",
+      "agentName": "default",
+      "startedAt": "2026-03-26T22:17:39.363Z",
+      "events": [
+        {
+          "ts": 1774563459364,
+          "type": "decision",
+          "content": "Added targeted resume-fallback and file-db diagnostics tests before implementation: Added targeted resume-fallback and file-db diagnostics tests before implementation",
+          "raw": {
+            "question": "Added targeted resume-fallback and file-db diagnostics tests before implementation",
+            "chosen": "Added targeted resume-fallback and file-db diagnostics tests before implementation",
+            "alternatives": [],
+            "reasoning": "These tests codify the new resume reconstruction contract and preserve the existing DB happy path while exposing the current missing fallback behavior."
+          },
+          "significance": "high"
+        }
+      ],
+      "endedAt": "2026-03-26T22:17:47.783Z"
+    }
+  ],
+  "commits": [],
+  "filesChanged": [],
+  "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay",
+  "tags": [],
+  "_trace": {
+    "startRef": "59e77b2ae829a39ada8ff9165fcd8d2b374d5daf",
+    "endRef": "59e77b2ae829a39ada8ff9165fcd8d2b374d5daf"
+  },
+  "completedAt": "2026-03-26T22:17:47.783Z",
+  "retrospective": {
+    "summary": "Added resume fallback tests covering cache reconstruction cases and file-db append diagnostics.",
+    "approach": "Standard approach",
+    "confidence": 0.83
+  }
+}
\ No newline at end of file
diff --git a/.trajectories/completed/2026-03/traj_4ghb800vy5ti.md b/.trajectories/completed/2026-03/traj_4ghb800vy5ti.md
new file mode 100644
index 000000000..a9f18423c
--- /dev/null
+++ b/.trajectories/completed/2026-03/traj_4ghb800vy5ti.md
@@ -0,0 +1,31 @@
+# Trajectory: Write tests for resume fallback behavior
+
+> **Status:** ✅ Completed
+> **Confidence:** 83%
+> **Started:** March 26, 2026 at 11:14 PM
+> **Completed:** March 26, 2026 at 11:17 PM
+
+---
+
+## Summary
+
+Added resume fallback tests covering cache reconstruction cases and file-db append diagnostics.
+
+**Approach:** Standard approach
+
+---
+
+## Key Decisions
+
+### Added targeted resume-fallback and file-db diagnostics tests before implementation
+- **Chose:** Added targeted resume-fallback and file-db diagnostics tests before implementation
+- **Reasoning:** These tests codify the new resume reconstruction contract and preserve the existing DB happy path while exposing the current missing fallback behavior.
+
+---
+
+## Chapters
+
+### 1. Work
+*Agent: default*
+
+- Added targeted resume-fallback and file-db diagnostics tests before implementation: Added targeted resume-fallback and file-db diagnostics tests before implementation
diff --git a/.trajectories/completed/traj_1774563046213_6359d1ff.json b/.trajectories/completed/traj_1774563046213_6359d1ff.json
new file mode 100644
index 000000000..de34079ae
--- /dev/null
+++ b/.trajectories/completed/traj_1774563046213_6359d1ff.json
@@ -0,0 +1,463 @@
+{
+  "id": "traj_1774563046213_6359d1ff",
+  "version": 1,
+  "task": {
+    "title": "fix-resume-fallback-workflow",
+    "source": {
+      "system": "workflow-runner",
+      "id": "beaff872dcafd78fa86bc1f7"
+    }
+  },
+  "status": "completed",
+  "startedAt": "2026-03-26T22:10:46.213Z",
+  "agents": [
+    {
+      "name": "orchestrator",
+      "role": "workflow-runner",
+      "joinedAt": "2026-03-26T22:10:46.213Z"
+    },
+    {
+      "name": "architect",
+      "role": "specialist",
+      "joinedAt": "2026-03-26T22:11:06.721Z"
+    },
+    {
+      "name": "runner-worker",
+      "role": "specialist",
+      "joinedAt": "2026-03-26T22:14:19.775Z"
+    },
+    {
+      "name": "db-worker",
+      "role": "specialist",
+      "joinedAt": "2026-03-26T22:14:19.775Z"
+    },
+    {
+      "name": "test-worker",
+      "role": "specialist",
+      "joinedAt": "2026-03-26T22:14:19.775Z"
+    },
+    {
+      "name": "reviewer",
+      "role": "specialist",
+      "joinedAt": "2026-03-26T22:20:07.221Z"
+    }
+  ],
+  "chapters": [
+    {
+      "id": "ch_ebc1566f",
+      "title": "Planning",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:10:46.213Z",
+      "events": [
+        {
+          "ts": 1774563046213,
+          "type": "note",
+          "content": "Purpose: Fix --resume to fall back to step-output cache when workflow-runs.jsonl is missing"
+        },
+        {
+          "ts": 1774563046213,
+          "type": "note",
+          "content": "Approach: 16-step dag workflow — Parsed 16 steps, 8 parallel tracks, 8 dependent steps, DAG validated, no cycles"
+        }
+      ],
+      "endedAt": "2026-03-26T22:10:51.529Z"
+    },
+    {
+      "id": "ch_0c0edcfe",
+      "title": "Execution: read-runner-resume, read-runner-execute, read-runner-fs-helpers, read-file-db, read-types, read-cli, read-existing-tests, read-runner-constructor",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:10:51.529Z",
+      "events": [],
+      "endedAt": "2026-03-26T22:11:06.718Z"
+    },
+    {
+      "id": "ch_5e690eeb",
+      "title": "Convergence: read-runner-resume + read-runner-execute + read-runner-fs-helpers + read-file-db + read-types + read-cli + read-existing-tests + read-runner-constructor",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:11:06.718Z",
+      "events": [
+        {
+          "ts": 1774563066720,
+          "type": "reflection",
+          "content": "read-runner-resume + read-runner-execute + read-runner-fs-helpers + read-file-db + read-types + read-cli + read-existing-tests + read-runner-constructor resolved. 8/8 steps completed. All steps completed on first attempt. Unblocking: design.",
+          "significance": "high",
+          "raw": {
+            "confidence": 0.75,
+            "focalPoints": [
+              "read-runner-resume: completed",
+              "read-runner-execute: completed",
+              "read-runner-fs-helpers: completed",
+              "read-file-db: completed",
+              "read-types: completed",
+              "read-cli: completed",
+              "read-existing-tests: completed",
+              "read-runner-constructor: completed"
+            ]
+          }
+        }
+      ],
+      "endedAt": "2026-03-26T22:11:06.721Z"
+    },
+    {
+      "id": "ch_3eefedd2",
+      "title": "Execution: design",
+      "agentName": "architect",
+      "startedAt": "2026-03-26T22:11:06.721Z",
+      "events": [
+        {
+          "ts": 1774563066721,
+          "type": "note",
+          "content": "\"design\": Design the fix for --resume falling back to the step-output filesystem cache",
+          "raw": {
+            "agent": "architect"
+          }
+        },
+        {
+          "ts": 1774563259772,
+          "type": "completion-marker",
+          "content": "\"design\" marker-based completion — Legacy STEP_COMPLETE marker observed (5 signal(s), 1 relevant channel post(s), 2 file change(s); signals=**Design complete.** Written to `docs/resume-fallback-design.md`., design, COMPLETE, >1u>4;2m>0q>4m<u>1u>4;2m▗ ▗   ▖ ▖  Claude Code v2.1.84, DESIGN_COMPLETE; channel=**Design complete.** Written to `docs/resume-fallback-design.md`.\n\n**Summary of approach:**\n\n1. **New method** `reconstructRunFromCache(runId, config, workflowN; files=modified:.claude/settings.json, created:docs/resume-fallback-design.md)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "design",
+            "completionMode": "marker",
+            "reason": "Legacy STEP_COMPLETE marker observed",
+            "evidence": {
+              "summary": "5 signal(s), 1 relevant channel post(s), 2 file change(s)",
+              "signals": [
+                "**Design complete.** Written to `docs/resume-fallback-design.md`.",
+                "design",
+                "COMPLETE",
+                ">1u>4;2m>0q>4m<u>1u>4;2m▗ ▗   ▖ ▖  Claude Code v2.1.84",
+                "DESIGN_COMPLETE"
+              ],
+              "channelPosts": [
+                "**Design complete.** Written to `docs/resume-fallback-design.md`.\n\n**Summary of approach:**\n\n1. **New method** `reconstructRunFromCache(runId, config, workflowN"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "created:docs/resume-fallback-design.md"
+              ]
+            }
+          }
+        },
+        {
+          "ts": 1774563259773,
+          "type": "finding",
+          "content": "\"design\" completed → ✢",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-26T22:14:19.775Z"
+    },
+    {
+      "id": "ch_fa296109",
+      "title": "Execution: impl-resume-fallback, impl-db-diagnostics, impl-tests",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:14:19.775Z",
+      "events": [],
+      "endedAt": "2026-03-26T22:14:19.776Z"
+    },
+    {
+      "id": "ch_b06d79ad",
+      "title": "Execution: impl-resume-fallback",
+      "agentName": "runner-worker",
+      "startedAt": "2026-03-26T22:14:19.776Z",
+      "events": [
+        {
+          "ts": 1774563259776,
+          "type": "note",
+          "content": "\"impl-resume-fallback\": Implement the resume fallback in runner.ts",
+          "raw": {
+            "agent": "runner-worker"
+          }
+        }
+      ],
+      "endedAt": "2026-03-26T22:14:19.776Z"
+    },
+    {
+      "id": "ch_a9803898",
+      "title": "Execution: impl-db-diagnostics",
+      "agentName": "db-worker",
+      "startedAt": "2026-03-26T22:14:19.776Z",
+      "events": [
+        {
+          "ts": 1774563259776,
+          "type": "note",
+          "content": "\"impl-db-diagnostics\": Improve file-db.ts diagnostics and add CLI warning improvements",
+          "raw": {
+            "agent": "db-worker"
+          }
+        }
+      ],
+      "endedAt": "2026-03-26T22:14:19.776Z"
+    },
+    {
+      "id": "ch_a5d7528c",
+      "title": "Execution: impl-tests",
+      "agentName": "test-worker",
+      "startedAt": "2026-03-26T22:14:19.776Z",
+      "events": [
+        {
+          "ts": 1774563259776,
+          "type": "note",
+          "content": "\"impl-tests\": Write tests for the resume fallback behavior",
+          "raw": {
+            "agent": "test-worker"
+          }
+        },
+        {
+          "ts": 1774563332201,
+          "type": "completion-evidence",
+          "content": "\"impl-db-diagnostics\" verification-based completion — Verification passed (6 signal(s), 2 file change(s), exit=0; signals=0, design, COMPLETE, 2026-03-26T22:14:22.209873Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---, 2026-03-26T22:14:22.209873Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---, Verification passed; files=modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "impl-db-diagnostics",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "6 signal(s), 2 file change(s), exit=0",
+              "signals": [
+                "0",
+                "design",
+                "COMPLETE",
+                "2026-03-26T22:14:22.209873Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---",
+                "2026-03-26T22:14:22.209873Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---",
+                "Verification passed"
+              ],
+              "files": [
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774563332201,
+          "type": "finding",
+          "content": "\"impl-db-diagnostics\" completed → Implemented the requested changes.\n\nChanges written to disk:\n- [file-db.ts](/Users/khaliqgant/Projects/AgentWorkforce/re",
+          "significance": "medium"
+        },
+        {
+          "ts": 1774563483555,
+          "type": "completion-evidence",
+          "content": "\"impl-tests\" verification-based completion — Verification passed (6 signal(s), 3 file change(s), exit=0; signals=design, unknown, COMPLETE, APPROVE, 2026-03-26T22:14:22.190950Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---, Verification passed; files=created:packages/sdk/src/__tests__/resume-fallback.test.ts, modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "impl-tests",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "6 signal(s), 3 file change(s), exit=0",
+              "signals": [
+                "design",
+                "unknown",
+                "COMPLETE",
+                "APPROVE",
+                "2026-03-26T22:14:22.190950Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---",
+                "Verification passed"
+              ],
+              "files": [
+                "created:packages/sdk/src/__tests__/resume-fallback.test.ts",
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774563483555,
+          "type": "finding",
+          "content": "\"impl-tests\" completed → Added [resume-fallback.test.ts](/Users/khaliqgant/Projects/AgentWorkforce/relay/packages/sdk/src/__tests__/resume-fallba",
+          "significance": "medium"
+        },
+        {
+          "ts": 1774563600439,
+          "type": "completion-evidence",
+          "content": "\"impl-resume-fallback\" verification-based completion — Verification passed (6 signal(s), 4 file change(s), exit=0; signals=0, design, COMPLETE, 2026-03-26T22:14:22.130729Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---, 2026-03-26T22:14:22.130729Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---, Verification passed; files=created:packages/sdk/src/__tests__/resume-fallback.test.ts, modified:packages/sdk/src/workflows/cli.ts, modified:packages/sdk/src/workflows/file-db.ts, modified:packages/sdk/src/workflows/runner.ts; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "impl-resume-fallback",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "6 signal(s), 4 file change(s), exit=0",
+              "signals": [
+                "0",
+                "design",
+                "COMPLETE",
+                "2026-03-26T22:14:22.130729Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---",
+                "2026-03-26T22:14:22.130729Z ERROR codex_core::codex: failed to load skill /Users/khaliqgant/Projects/AgentWorkforce/relay/.agents/skills/creating-agent-skills-skill/SKILL.md: missing YAML frontmatter delimited by ---",
+                "Verification passed"
+              ],
+              "files": [
+                "created:packages/sdk/src/__tests__/resume-fallback.test.ts",
+                "modified:packages/sdk/src/workflows/cli.ts",
+                "modified:packages/sdk/src/workflows/file-db.ts",
+                "modified:packages/sdk/src/workflows/runner.ts"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774563600439,
+          "type": "finding",
+          "content": "\"impl-resume-fallback\" completed → Implemented the resume fallback in [`packages/sdk/src/workflows/runner.ts`](/Users/khaliqgant/Projects/AgentWorkforce/re",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-26T22:20:00.441Z"
+    },
+    {
+      "id": "ch_6069dd38",
+      "title": "Convergence: impl-resume-fallback + impl-db-diagnostics + impl-tests",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:20:00.441Z",
+      "events": [
+        {
+          "ts": 1774563600442,
+          "type": "reflection",
+          "content": "impl-resume-fallback + impl-db-diagnostics + impl-tests resolved. 3/3 steps completed. All steps completed on first attempt. Unblocking: verify-build.",
+          "significance": "high",
+          "raw": {
+            "confidence": 1,
+            "focalPoints": [
+              "impl-resume-fallback: completed",
+              "impl-db-diagnostics: completed",
+              "impl-tests: completed"
+            ]
+          }
+        }
+      ],
+      "endedAt": "2026-03-26T22:20:07.222Z"
+    },
+    {
+      "id": "ch_c4a26591",
+      "title": "Execution: review",
+      "agentName": "reviewer",
+      "startedAt": "2026-03-26T22:20:07.222Z",
+      "events": [
+        {
+          "ts": 1774563607222,
+          "type": "note",
+          "content": "\"review\": Review the resume fallback implementation",
+          "raw": {
+            "agent": "reviewer"
+          }
+        },
+        {
+          "ts": 1774563699435,
+          "type": "completion-evidence",
+          "content": "\"review\" verification-based completion — Verification passed (2 signal(s), 2 file change(s), exit=0; signals=0, Verification passed; files=modified:.claude/settings.json, created:docs/resume-fallback-review.md; exit=0)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "review",
+            "completionMode": "verification",
+            "reason": "Verification passed",
+            "evidence": {
+              "summary": "2 signal(s), 2 file change(s), exit=0",
+              "signals": [
+                "0",
+                "Verification passed"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "created:docs/resume-fallback-review.md"
+              ],
+              "exitCode": 0
+            }
+          }
+        },
+        {
+          "ts": 1774563699435,
+          "type": "finding",
+          "content": "\"review\" completed → Review written to `docs/resume-fallback-review.md`.",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-26T22:21:39.437Z"
+    },
+    {
+      "id": "ch_85a4f8b8",
+      "title": "Execution: fix-issues",
+      "agentName": "architect",
+      "startedAt": "2026-03-26T22:21:39.437Z",
+      "events": [
+        {
+          "ts": 1774563699437,
+          "type": "note",
+          "content": "\"fix-issues\": Address any review feedback or build/test failures",
+          "raw": {
+            "agent": "architect"
+          }
+        },
+        {
+          "ts": 1774563807587,
+          "type": "completion-marker",
+          "content": "\"fix-issues\" marker-based completion — Legacy STEP_COMPLETE marker observed (6 signal(s), 2 relevant channel post(s), 6 file change(s); signals=COMPLETE, **FIX_COMPLETE**, fix-issues, >1u>4;2m>0q>4m<u>1u>4;2m▗ ▗   ▖ ▖  Claude Code v2.1.84, FIX_COMPLETE, **[fix-issues] Output:**; channel=**FIX_COMPLETE**\n\n**Summary:** Review's blocking issue resolved. Tests executed and all passing.\n\n**What was fixed:**\n- `runner.ts` `reconstructRunFromCache` —  | **[fix-issues] Output:**\n```\npackages/sdk && npx\n      vitest --run src/__tests__/resume-fallback.test.ts 2>&1)\n      RUN  v3.2.4 /Users/khaliqgant/Projects/Age; files=modified:.claude/settings.json, created:.turbo/cache/4d0e6994e2101327-manifest.json, created:.turbo/cache/4d0e6994e2101327-meta.json, created:.turbo/cache/4d0e6994e2101327.tar.zst, created:.turbo/cache/b92de645f3f74692-manifest.json, created:.turbo/cache/b92de645f3f74692-meta.json)",
+          "significance": "medium",
+          "raw": {
+            "stepName": "fix-issues",
+            "completionMode": "marker",
+            "reason": "Legacy STEP_COMPLETE marker observed",
+            "evidence": {
+              "summary": "6 signal(s), 2 relevant channel post(s), 6 file change(s)",
+              "signals": [
+                "COMPLETE",
+                "**FIX_COMPLETE**",
+                "fix-issues",
+                ">1u>4;2m>0q>4m<u>1u>4;2m▗ ▗   ▖ ▖  Claude Code v2.1.84",
+                "FIX_COMPLETE",
+                "**[fix-issues] Output:**"
+              ],
+              "channelPosts": [
+                "**FIX_COMPLETE**\n\n**Summary:** Review's blocking issue resolved. Tests executed and all passing.\n\n**What was fixed:**\n- `runner.ts` `reconstructRunFromCache` — ",
+                "**[fix-issues] Output:**\n```\npackages/sdk && npx\n      vitest --run src/__tests__/resume-fallback.test.ts 2>&1)\n      RUN  v3.2.4 /Users/khaliqgant/Projects/Age"
+              ],
+              "files": [
+                "modified:.claude/settings.json",
+                "created:.turbo/cache/4d0e6994e2101327-manifest.json",
+                "created:.turbo/cache/4d0e6994e2101327-meta.json",
+                "created:.turbo/cache/4d0e6994e2101327.tar.zst",
+                "created:.turbo/cache/b92de645f3f74692-manifest.json",
+                "created:.turbo/cache/b92de645f3f74692-meta.json"
+              ]
+            }
+          }
+        },
+        {
+          "ts": 1774563807588,
+          "type": "finding",
+          "content": "\"fix-issues\" completed → ✢",
+          "significance": "medium"
+        }
+      ],
+      "endedAt": "2026-03-26T22:23:27.597Z"
+    },
+    {
+      "id": "ch_dce801dd",
+      "title": "Retrospective",
+      "agentName": "orchestrator",
+      "startedAt": "2026-03-26T22:23:27.597Z",
+      "events": [
+        {
+          "ts": 1774563807597,
+          "type": "reflection",
+          "content": "All 16 steps completed in 13min. (completed in 13 minutes)",
+          "significance": "high"
+        }
+      ],
+      "endedAt": "2026-03-26T22:23:27.597Z"
+    }
+  ],
+  "completedAt": "2026-03-26T22:23:27.597Z",
+  "retrospective": {
+    "summary": "All 16 steps completed in 13min.",
+    "approach": "dag workflow (5 agents)",
+    "confidence": 0.84375,
+    "learnings": [],
+    "challenges": []
+  }
+}
\ No newline at end of file
diff --git a/.trajectories/index.json b/.trajectories/index.json
index b7ad69bc3..c6b013436 100644
--- a/.trajectories/index.json
+++ b/.trajectories/index.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "lastUpdated": "2026-03-26T20:11:23.431Z",
+  "lastUpdated": "2026-03-26T22:17:47.879Z",
   "trajectories": {
     "traj_1b1dj40sl6jl": {
       "title": "Revert aggressive retry logic in relay-pty-orchestrator",
@@ -919,6 +919,13 @@
       "startedAt": "2026-03-26T19:53:02.196Z",
       "completedAt": "2026-03-26T20:11:23.315Z",
       "path": "/Users/khaliqgant/Projects/Agent Workforce/relay/.trajectories/completed/2026-03/traj_jhnp7malmci4.json"
+    },
+    "traj_4ghb800vy5ti": {
+      "title": "Write tests for resume fallback behavior",
+      "status": "completed",
+      "startedAt": "2026-03-26T22:14:28.964Z",
+      "completedAt": "2026-03-26T22:17:47.783Z",
+      "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay/.trajectories/completed/2026-03/traj_4ghb800vy5ti.json"
     }
   }
-}
\ No newline at end of file
+}
diff --git a/package-lock.json b/package-lock.json
index f00a8f87a..d145f3708 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "agent-relay",
-  "version": "3.2.18",
+  "version": "3.2.21",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "agent-relay",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "bundleDependencies": [
         "@agent-relay/cloud",
         "@agent-relay/config",
@@ -24,14 +24,14 @@
         "web"
       ],
       "dependencies": {
-        "@agent-relay/cloud": "3.2.18",
-        "@agent-relay/config": "3.2.18",
-        "@agent-relay/hooks": "3.2.18",
-        "@agent-relay/sdk": "3.2.18",
-        "@agent-relay/telemetry": "3.2.18",
-        "@agent-relay/trajectory": "3.2.18",
-        "@agent-relay/user-directory": "3.2.18",
-        "@agent-relay/utils": "3.2.18",
+        "@agent-relay/cloud": "3.2.21",
+        "@agent-relay/config": "3.2.21",
+        "@agent-relay/hooks": "3.2.21",
+        "@agent-relay/sdk": "3.2.21",
+        "@agent-relay/telemetry": "3.2.21",
+        "@agent-relay/trajectory": "3.2.21",
+        "@agent-relay/user-directory": "3.2.21",
+        "@agent-relay/utils": "3.2.21",
         "@aws-sdk/client-s3": "^3.1004.0",
         "@modelcontextprotocol/sdk": "^1.0.0",
         "@relaycast/mcp": "1.0.0",
@@ -1308,7 +1308,7 @@
     },
     "node_modules/@clack/prompts/node_modules/is-unicode-supported": {
       "version": "1.3.0",
-      "extraneous": true,
+      "dev": true,
       "inBundle": true,
       "license": "MIT",
       "engines": {
@@ -14961,10 +14961,10 @@
     },
     "packages/acp-bridge": {
       "name": "@agent-relay/acp-bridge",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "license": "Apache-2.0",
       "dependencies": {
-        "@agent-relay/sdk": "3.2.18",
+        "@agent-relay/sdk": "3.2.21",
         "@agentclientprotocol/sdk": "^0.12.0"
       },
       "bin": {
@@ -14981,13 +14981,13 @@
     },
     "packages/brand": {
       "name": "@agent-relay/brand",
-      "version": "1.1.0"
+      "version": "3.2.21"
     },
     "packages/cloud": {
       "name": "@agent-relay/cloud",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18",
+        "@agent-relay/config": "3.2.21",
         "@aws-sdk/client-s3": "^3.1004.0",
         "ignore": "^7.0.5",
         "tar": "^7.5.10"
@@ -15000,7 +15000,7 @@
     },
     "packages/config": {
       "name": "@agent-relay/config",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
         "zod": "^3.23.8",
         "zod-to-json-schema": "^3.23.1"
@@ -15013,11 +15013,11 @@
     },
     "packages/hooks": {
       "name": "@agent-relay/hooks",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18",
-        "@agent-relay/sdk": "3.2.18",
-        "@agent-relay/trajectory": "3.2.18"
+        "@agent-relay/config": "3.2.21",
+        "@agent-relay/sdk": "3.2.21",
+        "@agent-relay/trajectory": "3.2.21"
       },
       "devDependencies": {
         "@types/node": "^22.19.3",
@@ -15027,9 +15027,9 @@
     },
     "packages/memory": {
       "name": "@agent-relay/memory",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/hooks": "3.2.18"
+        "@agent-relay/hooks": "3.2.21"
       },
       "devDependencies": {
         "@types/node": "^22.19.3",
@@ -15039,11 +15039,11 @@
     },
     "packages/openclaw": {
       "name": "@agent-relay/openclaw",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "hasInstallScript": true,
       "license": "Apache-2.0",
       "dependencies": {
-        "@agent-relay/sdk": "3.2.18",
+        "@agent-relay/sdk": "3.2.21",
         "@relaycast/sdk": "^1.0.0",
         "ws": "^8.0.0"
       },
@@ -15867,9 +15867,9 @@
     },
     "packages/policy": {
       "name": "@agent-relay/policy",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18"
+        "@agent-relay/config": "3.2.21"
       },
       "devDependencies": {
         "@types/node": "^22.19.3",
@@ -15879,9 +15879,9 @@
     },
     "packages/sdk": {
       "name": "@agent-relay/sdk",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18",
+        "@agent-relay/config": "3.2.21",
         "@relaycast/sdk": "^1.1.0",
         "@sinclair/typebox": "^0.34.48",
         "chalk": "^4.1.2",
@@ -15965,7 +15965,7 @@
     },
     "packages/telemetry": {
       "name": "@agent-relay/telemetry",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
         "posthog-node": "^4.0.1"
       },
@@ -15977,9 +15977,9 @@
     },
     "packages/trajectory": {
       "name": "@agent-relay/trajectory",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18"
+        "@agent-relay/config": "3.2.21"
       },
       "devDependencies": {
         "@types/node": "^22.19.3",
@@ -15989,9 +15989,9 @@
     },
     "packages/user-directory": {
       "name": "@agent-relay/user-directory",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/utils": "3.2.18"
+        "@agent-relay/utils": "3.2.21"
       },
       "devDependencies": {
         "@types/node": "^22.19.3",
@@ -16001,9 +16001,9 @@
     },
     "packages/utils": {
       "name": "@agent-relay/utils",
-      "version": "3.2.18",
+      "version": "3.2.21",
       "dependencies": {
-        "@agent-relay/config": "3.2.18",
+        "@agent-relay/config": "3.2.21",
         "compare-versions": "^6.1.1"
       },
       "devDependencies": {
diff --git a/packages/sdk/src/__tests__/resume-fallback.test.ts b/packages/sdk/src/__tests__/resume-fallback.test.ts
new file mode 100644
index 000000000..5f4e4f147
--- /dev/null
+++ b/packages/sdk/src/__tests__/resume-fallback.test.ts
@@ -0,0 +1,415 @@
+/**
+ * Tests for resuming workflow execution from cached step outputs when the JSONL
+ * run database is missing or unavailable.
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  chmodSync,
+  mkdirSync,
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+} from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import type { WorkflowDb } from '../workflows/runner.js';
+import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js';
+
+// ── Mock fetch ───────────────────────────────────────────────────────────────
+
+const mockFetch = vi.fn().mockResolvedValue({
+  ok: true,
+  json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }),
+  text: () => Promise.resolve(''),
+});
+vi.stubGlobal('fetch', mockFetch);
+
+// ── Mock RelayCast SDK ───────────────────────────────────────────────────────
+
+const mockRelaycastAgent = {
+  send: vi.fn().mockResolvedValue(undefined),
+  heartbeat: vi.fn().mockResolvedValue(undefined),
+  channels: {
+    create: vi.fn().mockResolvedValue(undefined),
+    join: vi.fn().mockResolvedValue(undefined),
+    invite: vi.fn().mockResolvedValue(undefined),
+  },
+};
+
+const mockRelaycast = {
+  agents: {
+    register: vi.fn().mockResolvedValue({ token: 'token-1' }),
+  },
+  as: vi.fn().mockReturnValue(mockRelaycastAgent),
+};
+
+class MockRelayError extends Error {
+  code: string;
+  constructor(code: string, message: string, status = 400) {
+    super(message);
+    this.code = code;
+    this.name = 'RelayError';
+    (this as any).status = status;
+  }
+}
+
+vi.mock('@relaycast/sdk', () => ({
+  RelayCast: vi.fn().mockImplementation(() => mockRelaycast),
+  RelayError: MockRelayError,
+}));
+
+// ── Mock AgentRelay ──────────────────────────────────────────────────────────
+
+let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>;
+
+const mockAgent = {
+  name: 'test-agent-abc',
+  get waitForExit() { return waitForExitFn; },
+  get waitForIdle() { return vi.fn().mockImplementation(() => new Promise(() => {})); },
+  release: vi.fn().mockResolvedValue(undefined),
+};
+
+const mockHuman = {
+  name: 'WorkflowRunner',
+  sendMessage: vi.fn().mockResolvedValue(undefined),
+};
+
+const mockRelayInstance = {
+  spawnPty: vi.fn().mockImplementation(async ({ name, task }: { name: string; task?: string }) => {
+    const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
+    const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
+    const output = isReview
+      ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'
+      : stepComplete
+        ? `STEP_COMPLETE:${stepComplete}\n`
+        : 'STEP_COMPLETE:unknown\n';
+
+    queueMicrotask(() => {
+      if (typeof mockRelayInstance.onWorkerOutput === 'function') {
+        mockRelayInstance.onWorkerOutput({ name, chunk: output });
+      }
+    });
+
+    return { ...mockAgent, name };
+  }),
+  human: vi.fn().mockReturnValue(mockHuman),
+  shutdown: vi.fn().mockResolvedValue(undefined),
+  onBrokerStderr: vi.fn().mockReturnValue(() => {}),
+  onWorkerOutput: null as ((frame: { name: string; chunk: string }) => void) | null,
+  onMessageReceived: null as any,
+  onAgentSpawned: null as any,
+  onAgentReleased: null as any,
+  onAgentExited: null as any,
+  onAgentIdle: null as any,
+  onDeliveryUpdate: null as any,
+  listAgentsRaw: vi.fn().mockResolvedValue([]),
+};
+
+vi.mock('../relay.js', () => ({
+  AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance),
+}));
+
+// Import after mocking
+const { WorkflowRunner } = await import('../workflows/runner.js');
+const { JsonFileWorkflowDb } = await import('../workflows/file-db.js');
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeDb(): WorkflowDb {
+  const runs = new Map<string, WorkflowRunRow>();
+  const steps = new Map<string, WorkflowStepRow>();
+
+  return {
+    insertRun: vi.fn(async (run: WorkflowRunRow) => {
+      runs.set(run.id, { ...run });
+    }),
+    updateRun: vi.fn(async (id: string, patch: Partial<WorkflowRunRow>) => {
+      const existing = runs.get(id);
+      if (existing) runs.set(id, { ...existing, ...patch });
+    }),
+    getRun: vi.fn(async (id: string) => {
+      const run = runs.get(id);
+      return run ? { ...run } : null;
+    }),
+    insertStep: vi.fn(async (step: WorkflowStepRow) => {
+      steps.set(step.id, { ...step });
+    }),
+    updateStep: vi.fn(async (id: string, patch: Partial<WorkflowStepRow>) => {
+      const existing = steps.get(id);
+      if (existing) steps.set(id, { ...existing, ...patch });
+    }),
+    getStepsByRunId: vi.fn(async (runId: string) => {
+      return [...steps.values()].filter((s) => s.runId === runId);
+    }),
+  };
+}
+
+function makeResumeConfig(): RelayYamlConfig {
+  return {
+    version: '1',
+    name: 'test-resume-fallback',
+    swarm: { pattern: 'dag' },
+    agents: [
+      { name: 'agent-a', cli: 'claude' },
+    ],
+    workflows: [
+      {
+        name: 'default',
+        steps: [
+          { name: 'step-a', agent: 'agent-a', task: 'Do step A' },
+          { name: 'step-b', agent: 'agent-a', task: 'Do step B', dependsOn: ['step-a'] },
+          { name: 'step-c', agent: 'agent-a', task: 'Do step C', dependsOn: ['step-b'] },
+        ],
+      },
+    ],
+    trajectories: false,
+  };
+}
+
+function makeTemplateConfig(): RelayYamlConfig {
+  return {
+    version: '1',
+    name: 'test-resume-template',
+    swarm: { pattern: 'dag' },
+    agents: [
+      { name: 'agent-a', cli: 'claude' },
+    ],
+    workflows: [
+      {
+        name: 'default',
+        steps: [
+          { name: 'step-a', agent: 'agent-a', task: 'Generate input' },
+          {
+            name: 'step-b',
+            agent: 'agent-a',
+            task: 'Use cached value: {{steps.step-a.output}}',
+            dependsOn: ['step-a'],
+          },
+        ],
+      },
+    ],
+    trajectories: false,
+  };
+}
+
+function makeRunRow(runId: string, config: RelayYamlConfig, status: WorkflowRunRow['status'] = 'failed'): WorkflowRunRow {
+  const now = new Date().toISOString();
+  return {
+    id: runId,
+    workspaceId: 'ws-test',
+    workflowName: 'default',
+    pattern: config.swarm.pattern,
+    status,
+    config,
+    startedAt: now,
+    createdAt: now,
+    updatedAt: now,
+  };
+}
+
+function makeStepRow(
+  runId: string,
+  stepName: string,
+  task: string,
+  dependsOn: string[] = [],
+  status: WorkflowStepRow['status'] = 'pending',
+  output?: string
+): WorkflowStepRow {
+  const now = new Date().toISOString();
+  return {
+    id: `${runId}-${stepName}`,
+    runId,
+    stepName,
+    agentName: 'agent-a',
+    stepType: 'agent',
+    status,
+    task,
+    dependsOn,
+    output,
+    retryCount: 0,
+    createdAt: now,
+    updatedAt: now,
+    startedAt: status !== 'pending' ? now : undefined,
+    completedAt: status === 'completed' ? now : undefined,
+  };
+}
+
+function writeCachedOutput(tmpDir: string, runId: string, stepName: string, output: string): void {
+  const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', runId);
+  mkdirSync(outputDir, { recursive: true });
+  writeFileSync(path.join(outputDir, `${stepName}.md`), output);
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+describe('resume fallback to step-output cache', () => {
+  let db: WorkflowDb;
+  let runner: InstanceType<typeof WorkflowRunner>;
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    waitForExitFn = vi.fn().mockResolvedValue('exited');
+    mockRelayInstance.onWorkerOutput = null;
+    tmpDir = mkdtempSync(path.join(os.tmpdir(), 'resume-fallback-'));
+    db = makeDb();
+    runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir });
+  });
+
+  afterEach(() => {
+    try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+  });
+
+  it('should reconstruct run from step-output cache when JSONL missing', async () => {
+    const runId = 'resume-cache-run';
+    const config = makeResumeConfig();
+    writeCachedOutput(tmpDir, runId, 'step-a', 'cached-a');
+    writeCachedOutput(tmpDir, runId, 'step-b', 'cached-b');
+
+    const events: Array<{ type: string; stepName?: string }> = [];
+    runner.on((event) => {
+      if ('stepName' in event) {
+        events.push({ type: event.type, stepName: event.stepName });
+      }
+    });
+
+    const run = await (runner as any).resume(runId, undefined, config);
+    expect(run.status, run.error).toBe('completed');
+
+    const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName);
+    expect(startedSteps).not.toContain('step-a');
+    expect(startedSteps).not.toContain('step-b');
+    expect(startedSteps).toContain('step-c');
+  });
+
+  it('should throw "not found" when neither JSONL nor cache exists', async () => {
+    const config = makeResumeConfig();
+
+    await expect((runner as any).resume('nonexistent-id', undefined, config)).rejects.toThrow('not found');
+  });
+
+  it('should prefer JSONL database over step-output cache', async () => {
+    const runId = 'resume-db-run';
+    const config = makeResumeConfig();
+    const dbPath = path.join(tmpDir, '.agent-relay', 'workflow-runs.jsonl');
+    const fileDb = new JsonFileWorkflowDb(dbPath);
+    const dbRunner = new WorkflowRunner({ db: fileDb, workspaceId: 'ws-test', cwd: tmpDir });
+
+    await fileDb.insertRun(makeRunRow(runId, config));
+    await fileDb.insertStep(makeStepRow(runId, 'step-a', 'Do step A', [], 'failed'));
+    await fileDb.insertStep(makeStepRow(runId, 'step-b', 'Do step B', ['step-a'], 'pending'));
+    await fileDb.insertStep(makeStepRow(runId, 'step-c', 'Do step C', ['step-b'], 'pending'));
+
+    writeCachedOutput(tmpDir, runId, 'step-a', 'cached-a-from-fallback');
+
+    const events: Array<{ type: string; stepName?: string }> = [];
+    dbRunner.on((event) => {
+      if ('stepName' in event) {
+        events.push({ type: event.type, stepName: event.stepName });
+      }
+    });
+
+    const run = await dbRunner.resume(runId);
+    expect(run.status, run.error).toBe('completed');
+
+    const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName);
+    expect(startedSteps).toContain('step-a');
+    expect(startedSteps).toContain('step-b');
+    expect(startedSteps).toContain('step-c');
+  });
+
+  it('should handle empty step-output directory gracefully', async () => {
+    const runId = 'resume-empty-cache';
+    const config = makeResumeConfig();
+    mkdirSync(path.join(tmpDir, '.agent-relay', 'step-outputs', runId), { recursive: true });
+
+    const events: Array<{ type: string; stepName?: string }> = [];
+    runner.on((event) => {
+      if ('stepName' in event) {
+        events.push({ type: event.type, stepName: event.stepName });
+      }
+    });
+
+    const run = await (runner as any).resume(runId, undefined, config);
+    expect(run.status, run.error).toBe('completed');
+
+    const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName);
+    expect(startedSteps).toContain('step-a');
+    expect(startedSteps).toContain('step-b');
+    expect(startedSteps).toContain('step-c');
+  });
+
+  it('should load cached output into step template variables', async () => {
+    const runId = 'resume-template-cache';
+    const config = makeTemplateConfig();
+    writeCachedOutput(tmpDir, runId, 'step-a', 'hello world');
+
+    const run = await (runner as any).resume(runId, undefined, config);
+    expect(run.status, run.error).toBe('completed');
+
+    const spawnedTasks = mockRelayInstance.spawnPty.mock.calls.map(
+      ([args]) => (args as { task?: string }).task ?? ''
+    );
+    expect(spawnedTasks.some((task) => task.includes('Use cached value: hello world'))).toBe(true);
+  });
+
+  it('should skip .report.json files when scanning step outputs', async () => {
+    const runId = 'resume-report-cache';
+    const config = makeResumeConfig();
+    const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', runId);
+    mkdirSync(outputDir, { recursive: true });
+    writeFileSync(path.join(outputDir, 'step-a.md'), 'cached-a');
+    writeFileSync(path.join(outputDir, 'step-a.report.json'), '{"summary":"done"}');
+    writeFileSync(path.join(outputDir, 'step-b.report.json'), '{"summary":"metadata only"}');
+
+    const events: Array<{ type: string; stepName?: string }> = [];
+    runner.on((event) => {
+      if ('stepName' in event) {
+        events.push({ type: event.type, stepName: event.stepName });
+      }
+    });
+
+    const run = await (runner as any).resume(runId, undefined, config);
+    expect(run.status, run.error).toBe('completed');
+
+    const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName);
+    expect(startedSteps).not.toContain('step-a');
+    expect(startedSteps).toContain('step-b');
+    expect(startedSteps).toContain('step-c');
+  });
+});
+
+describe('file-db append diagnostics', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    tmpDir = mkdtempSync(path.join(os.tmpdir(), 'file-db-warn-'));
+  });
+
+  afterEach(() => {
+    try {
+      chmodSync(path.join(tmpDir, 'readonly'), 0o755);
+    } catch {}
+    try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+  });
+
+  it('should warn once when append fails', async () => {
+    const readonlyDir = path.join(tmpDir, 'readonly');
+    mkdirSync(readonlyDir, { recursive: true });
+    chmodSync(readonlyDir, 0o555);
+
+    const dbPath = path.join(readonlyDir, 'workflow-runs.jsonl');
+    const fileDb = new JsonFileWorkflowDb(dbPath);
+    const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+    const config = makeResumeConfig();
+
+    await fileDb.insertRun(makeRunRow('warn-run-1', config));
+    await fileDb.insertRun(makeRunRow('warn-run-2', config));
+
+    expect(warnSpy).toHaveBeenCalledTimes(1);
+
+    warnSpy.mockRestore();
+  });
+});
diff --git a/packages/sdk/src/workflows/cli.ts b/packages/sdk/src/workflows/cli.ts
index 7a33e77ef..5d0077a51 100644
--- a/packages/sdk/src/workflows/cli.ts
+++ b/packages/sdk/src/workflows/cli.ts
@@ -52,6 +52,21 @@ type ExecuteOptions = {
   previousRunId?: string;
 };
 
+/** Flags that consume the next argument as their value. Single source of truth for CLI parsing. */
+const FLAGS_WITH_VALUES = new Set(['--resume', '--workflow', '--start-from', '--previous-run-id']);
+
+function getYamlPathArg(args: string[]): string | undefined {
+  for (let i = 0; i < args.length; i += 1) {
+    const arg = args[i];
+    if (arg.startsWith('--')) {
+      if (FLAGS_WITH_VALUES.has(arg)) i += 1;
+      continue;
+    }
+    return arg;
+  }
+  return undefined;
+}
+
 interface RenderableTask {
   output?: string;
   title: string;
@@ -302,6 +317,7 @@ async function runWithListr(
 
 async function main(): Promise<void> {
   const args = process.argv.slice(2);
+  const yamlPath = getYamlPathArg(args);
 
   if (args.length === 0 || args.includes('--help')) {
     printUsage();
@@ -358,7 +374,37 @@ async function main(): Promise<void> {
           break;
       }
     });
-    const result = await runner.resume(runId);
+    let result: RunnerResult;
+    try {
+      const resumeConfig = yamlPath ? await runner.parseYamlFile(yamlPath) : undefined;
+      if (resumeConfig) {
+        console.warn(
+          chalk.yellow(
+            '[workflow] warning: resuming with current config from disk — ' +
+              'if the workflow YAML changed since the original run, behaviour may differ'
+          )
+        );
+      }
+      result = await runner.resume(runId, undefined, resumeConfig);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      const isRunNotFound = message.startsWith(`Run "${runId}" not found`);
+      if (isRunNotFound) {
+        if (fileDb.hasStepOutputs(runId)) {
+          console.error(
+            chalk.red(
+              `Error: ${message}. Step outputs exist for this run, but persisted run state is missing from ${dbPath}. ` +
+                `Use --start-from with --previous-run-id ${runId} to recover from the cached step outputs instead.`
+            )
+          );
+        } else {
+          console.error(chalk.red(`Error: ${message}`));
+        }
+      } else {
+        console.error(chalk.red(`Error: ${message}`));
+      }
+      process.exit(1);
+    }
 
     if (result.status === 'completed') {
       console.log(chalk.green('\nWorkflow completed successfully.'));
@@ -371,7 +417,6 @@ async function main(): Promise<void> {
   }
 
   // ── Normal / validate / dry-run mode ──────────────────────────────────────
-  const yamlPath = args[0];
   let workflowName: string | undefined;
 
   const workflowIdx = args.indexOf('--workflow');
@@ -391,6 +436,12 @@ async function main(): Promise<void> {
     previousRunId = args[prevRunIdx + 1];
   }
 
+  if (!yamlPath) {
+    console.error(chalk.red('Error: workflow YAML path is required'));
+    printUsage();
+    process.exit(1);
+  }
+
   const isValidate = args.includes('--validate');
   const isDryRun = !!process.env.DRY_RUN;
 
diff --git a/packages/sdk/src/workflows/file-db.ts b/packages/sdk/src/workflows/file-db.ts
index dc59bc630..35f2a3bdd 100644
--- a/packages/sdk/src/workflows/file-db.ts
+++ b/packages/sdk/src/workflows/file-db.ts
@@ -1,4 +1,4 @@
-import { appendFileSync, mkdirSync, readFileSync } from 'node:fs';
+import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync } from 'node:fs';
 import path from 'node:path';
 
 import type { WorkflowRunRow, WorkflowStepRow } from './types.js';
@@ -24,6 +24,7 @@ export class JsonFileWorkflowDb implements WorkflowDb {
 
   /** Whether the storage directory is writable. False = silent no-op mode. */
   private readonly writable: boolean;
+  private appendFailedOnce = false;
 
   constructor(filePath: string) {
     this.filePath = filePath;
@@ -43,14 +44,32 @@ export class JsonFileWorkflowDb implements WorkflowDb {
     return this.writable;
   }
 
+  hasStepOutputs(runId: string): boolean {
+    try {
+      const dir = path.join(path.dirname(this.filePath), 'step-outputs', runId);
+      return existsSync(dir) && readdirSync(dir).length > 0;
+    } catch {
+      return false;
+    }
+  }
+
   // ── Private helpers ─────────────────────────────────────────────────────
 
   private append(entry: DbEntry): void {
     if (!this.writable) return;
     try {
       appendFileSync(this.filePath, JSON.stringify(entry) + '\n', 'utf8');
-    } catch {
-      // Non-critical — workflow execution continues; resume won't be available.
+    } catch (err) {
+      if (!this.appendFailedOnce) {
+        this.appendFailedOnce = true;
+        console.warn(
+          '[workflow] warning: failed to write run state to ' +
+            this.filePath +
+            ' — --resume will not be available for this run. Use --start-from instead. ' +
+            'Error: ' +
+            (err instanceof Error ? err.message : String(err))
+        );
+      }
     }
   }
 
diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts
index 8d96c102b..4b2846ef0 100644
--- a/packages/sdk/src/workflows/runner.ts
+++ b/packages/sdk/src/workflows/runner.ts
@@ -1952,14 +1952,25 @@ export class WorkflowRunner {
   }
 
   /** Resume a previously paused or partially completed run. */
-  async resume(runId: string, vars?: VariableContext): Promise<WorkflowRunRow> {
+  async resume(runId: string, vars?: VariableContext, config?: RelayYamlConfig): Promise<WorkflowRunRow> {
     // Set up abort controller early so callers can abort() even during setup
     this.abortController = new AbortController();
     this.paused = false;
 
-    const run = await this.db.getRun(runId);
+    let run = await this.db.getRun(runId);
+    let stepStates = new Map<string, StepState>();
     if (!run) {
-      throw new Error(`Run "${runId}" not found`);
+      const reconstructed = this.reconstructRunFromCache(runId, config);
+      if (!reconstructed) {
+        throw new Error(`Run "${runId}" not found (no database entry or cached step outputs)`);
+      }
+      this.log('[resume] Reconstructing run from cached step outputs (workflow-runs.jsonl missing)');
+      run = reconstructed.run;
+      stepStates = reconstructed.stepStates;
+      await this.db.insertRun(run);
+      for (const [, state] of stepStates) {
+        await this.db.insertStep(state.row);
+      }
     }
     this.persistRunIdHint(runId);
 
@@ -1967,25 +1978,26 @@ export class WorkflowRunner {
       throw new Error(`Run "${runId}" is in status "${run.status}" and cannot be resumed`);
     }
 
-    const config = vars ? this.resolveVariables(run.config, vars) : run.config;
+    const resolvedConfig = vars ? this.resolveVariables(run.config, vars) : run.config;
 
     // Resolve path definitions (same as execute()) so workdir lookups work on resume
-    const pathResult = this.resolvePathDefinitions(config.paths, this.cwd);
+    const pathResult = this.resolvePathDefinitions(resolvedConfig.paths, this.cwd);
     if (pathResult.errors.length > 0) {
       throw new Error(`Path validation failed:\n  ${pathResult.errors.join('\n  ')}`);
     }
     this.resolvedPaths = pathResult.resolved;
 
-    const workflows = config.workflows ?? [];
+    const workflows = resolvedConfig.workflows ?? [];
     const workflow = workflows.find((w) => w.name === run.workflowName);
     if (!workflow) {
       throw new Error(`Workflow "${run.workflowName}" not found in stored config`);
     }
 
-    const existingSteps = await this.db.getStepsByRunId(runId);
-    const stepStates = new Map<string, StepState>();
-    for (const stepRow of existingSteps) {
-      stepStates.set(stepRow.stepName, { row: stepRow });
+    if (stepStates.size === 0) {
+      const existingSteps = await this.db.getStepsByRunId(runId);
+      for (const stepRow of existingSteps) {
+        stepStates.set(stepRow.stepName, { row: stepRow });
+      }
     }
 
     // Reset failed steps to pending for retry
@@ -2006,7 +2018,7 @@ export class WorkflowRunner {
     return this.runWorkflowCore({
       run,
       workflow,
-      config,
+      config: resolvedConfig,
       stepStates,
       isResume: true,
     });
@@ -6547,8 +6559,16 @@ export class WorkflowRunner {
       .slice(0, 32);
   }
 
+  /** Validate that a runId is safe for use in file paths (no traversal). */
+  private validateRunId(runId: string): void {
+    if (/[/\\]|^\.\.?$/.test(runId) || runId.includes('..')) {
+      throw new Error(`Invalid runId: "${runId}" contains path traversal characters`);
+    }
+  }
+
   /** Directory for persisted step outputs: .agent-relay/step-outputs/{runId}/ */
   private getStepOutputDir(runId: string): string {
+    this.validateRunId(runId);
     return path.join(this.cwd, '.agent-relay', 'step-outputs', runId);
   }
 
@@ -6638,6 +6658,153 @@ export class WorkflowRunner {
     }
   }
 
+  /** Match the best workflow from config given a set of cached step names. */
+  private matchWorkflowFromCache(
+    workflows: WorkflowDefinition[],
+    cachedStepNames: Set<string>
+  ): WorkflowDefinition | null {
+    if (workflows.length === 1) return workflows[0];
+
+    if (cachedStepNames.size === 0) {
+      // No cached steps to disambiguate — ambiguous when multiple workflows exist
+      this.log('[resume] Multiple workflows in config with empty cache — cannot disambiguate');
+      return null;
+    }
+
+    // Score each workflow by how many cached steps match, excluding those with unknown steps
+    const scored = workflows
+      .map((candidate) => ({
+        workflow: candidate,
+        matchedSteps: candidate.steps.filter((step) => cachedStepNames.has(step.name)).length,
+        unknownSteps: [...cachedStepNames].filter(
+          (name) => !candidate.steps.some((step) => step.name === name)
+        ).length,
+      }))
+      .filter((candidate) => candidate.unknownSteps === 0)
+      .sort((a, b) => b.matchedSteps - a.matchedSteps);
+
+    return scored[0]?.workflow ?? null;
+  }
+
+  private reconstructRunFromCache(
+    runId: string,
+    config?: RelayYamlConfig
+  ): { run: WorkflowRunRow; stepStates: Map<string, StepState> } | null {
+    const stepOutputDir = this.getStepOutputDir(runId);
+    if (!existsSync(stepOutputDir)) return null;
+
+    let resumeConfig = config ?? this.currentConfig;
+    if (!resumeConfig) {
+      // Attempt to load config from relay.yaml on disk (resume() may call before runWorkflowCore sets currentConfig)
+      const yamlPath = path.join(this.cwd, 'relay.yaml');
+      if (existsSync(yamlPath)) {
+        try {
+          const raw = readFileSync(yamlPath, 'utf-8');
+          resumeConfig = this.parseYamlString(raw, yamlPath);
+        } catch {
+          return null;
+        }
+      } else {
+        return null;
+      }
+    }
+
+    let entries: Dirent[];
+    try {
+      entries = readdirSync(stepOutputDir, { withFileTypes: true });
+    } catch {
+      return null;
+    }
+
+    const cachedStepNames = new Set(
+      entries
+        .filter((entry) => entry.isFile() && entry.name.endsWith('.md'))
+        .map((entry) => entry.name.slice(0, -3))
+        .filter(Boolean)
+    );
+    const workflows = resumeConfig.workflows ?? [];
+    if (workflows.length === 0) return null;
+
+    // Empty cache directory is valid — all steps will be re-run
+    const workflow = this.matchWorkflowFromCache(workflows, cachedStepNames);
+    if (!workflow) return null;
+
+    // Use actual file modification times from cached outputs instead of synthetic timestamps
+    const stepMtimes = new Map<string, string>();
+    let earliestMtime = Date.now();
+    for (const stepName of cachedStepNames) {
+      try {
+        const mdPath = path.join(stepOutputDir, `${stepName}.md`);
+        const reportPath = path.join(stepOutputDir, `${stepName}.report.json`);
+        const mdStat = existsSync(mdPath) ? statSync(mdPath) : null;
+        const reportStat = existsSync(reportPath) ? statSync(reportPath) : null;
+        // Use the latest mtime between .md and .report.json
+        const mtime = Math.max(mdStat?.mtimeMs ?? 0, reportStat?.mtimeMs ?? 0);
+        if (mtime > 0) {
+          stepMtimes.set(stepName, new Date(mtime).toISOString());
+          if (mtime < earliestMtime) earliestMtime = mtime;
+        }
+      } catch {
+        // Fall back to current time if stat fails
+      }
+    }
+    const fallbackTime = new Date().toISOString();
+
+    const completedSteps = new Set(workflow.steps.filter((step) => cachedStepNames.has(step.name)).map((step) => step.name));
+    // Heuristic: mark the first eligible non-completed step as failed (the likely failure point)
+    const failedStepName = workflow.steps.find(
+      (step) => !completedSteps.has(step.name) && (step.dependsOn ?? []).every((dep) => completedSteps.has(dep))
+    )?.name;
+
+    const runStartedAt = new Date(earliestMtime).toISOString();
+    const run: WorkflowRunRow = {
+      id: runId,
+      workspaceId: this.workspaceId,
+      workflowName: workflow.name,
+      pattern: resumeConfig.swarm.pattern,
+      status: 'failed',
+      config: resumeConfig,
+      startedAt: runStartedAt,
+      createdAt: runStartedAt,
+      updatedAt: fallbackTime,
+    };
+
+    const stepStates = new Map<string, StepState>();
+    for (const step of workflow.steps) {
+      const isNonAgent = step.type === 'deterministic' || step.type === 'worktree' || step.type === 'integration';
+      const cachedOutput = completedSteps.has(step.name) ? this.loadStepOutput(runId, step.name) : undefined;
+      const status: WorkflowStepStatus =
+        completedSteps.has(step.name) ? 'completed' : step.name === failedStepName ? 'failed' : 'pending';
+
+      const stepRow: WorkflowStepRow = {
+        id: this.generateId(),
+        runId,
+        stepName: step.name,
+        agentName: isNonAgent ? null : (step.agent ?? null),
+        stepType: isNonAgent ? (step.type as 'deterministic' | 'worktree' | 'integration') : 'agent',
+        status,
+        task:
+          step.type === 'deterministic'
+            ? (step.command ?? '')
+            : step.type === 'worktree'
+              ? (step.branch ?? '')
+              : step.type === 'integration'
+                ? (`${step.integration}.${step.action}`)
+                : (step.task ?? ''),
+        dependsOn: step.dependsOn ?? [],
+        output: cachedOutput,
+        error: status === 'failed' ? 'Recovered from cached step outputs' : undefined,
+        completedAt: status === 'completed' ? (stepMtimes.get(step.name) ?? fallbackTime) : undefined,
+        retryCount: 0,
+        createdAt: stepMtimes.get(step.name) ?? fallbackTime,
+        updatedAt: stepMtimes.get(step.name) ?? fallbackTime,
+      };
+      stepStates.set(step.name, { row: stepRow });
+    }
+
+    return { run, stepStates };
+  }
+
   /** Get or create the worker logs directory (.agent-relay/team/worker-logs) */
   private getWorkerLogsDir(): string {
     const logsDir = path.join(this.cwd, '.agent-relay', 'team', 'worker-logs');