diff --git a/scripts/test-modal-tools.mjs b/scripts/test-modal-tools.mjs
new file mode 100644
index 0000000..0ff4b3b
--- /dev/null
+++ b/scripts/test-modal-tools.mjs
@@ -0,0 +1,143 @@
+#!/usr/bin/env node
+/**
+ * Modal sandbox tools — offline self-tests.
+ *
+ * Runs without making any paid x402 calls. Covers:
+ *   1. Tool registration: all 4 ModalXxx capabilities exist in `allCapabilities`.
+ *   2. Hidden by default: none of them appear in CORE_TOOL_NAMES.
+ *   3. Schema sanity: each spec has the expected required/optional fields.
+ *   4. normalizeCommand behavior (string ↔ array, invalid).
+ *   5. SessionSandboxTracker add / remove / drain semantics.
+ *   6. Gateway contract: re-runs the probe curls (free 400/402 responses)
+ *      to verify the endpoint surface still matches the implementation
+ *      (catches gateway-side breaking changes early).
+ *
+ * Run:    node scripts/test-modal-tools.mjs
+ * Exit 0 = pass, exit 1 = fail.
+ */
+
+import { allCapabilities } from '../dist/tools/index.js';
+import { CORE_TOOL_NAMES } from '../dist/tools/tool-categories.js';
+import { sessionSandboxTracker } from '../dist/tools/modal.js';
+
+let failures = 0;
+function check(name, ok, detail = '') {
+  const mark = ok ? '\x1b[32m✓\x1b[0m' : '\x1b[31m✗\x1b[0m';
+  console.log(`  ${mark} ${name}${detail ? ' — ' + detail : ''}`);
+  if (!ok) failures++;
+}
+
+console.log('\n[1] Tool registration');
+const expected = ['ModalCreate', 'ModalExec', 'ModalStatus', 'ModalTerminate'];
+const registered = new Set(allCapabilities.map(c => c.spec.name));
+for (const name of expected) {
+  check(`${name} registered`, registered.has(name));
+}
+
+console.log('\n[2] Hidden by default (must require ActivateTool)');
+for (const name of expected) {
+  check(`${name} NOT in CORE_TOOL_NAMES`, !CORE_TOOL_NAMES.has(name));
+}
+
+console.log('\n[3] Schema sanity');
+const create = allCapabilities.find(c => c.spec.name === 'ModalCreate');
+const exec = allCapabilities.find(c => c.spec.name === 'ModalExec');
+const status = allCapabilities.find(c => c.spec.name === 'ModalStatus');
+const terminate = allCapabilities.find(c => c.spec.name === 'ModalTerminate');
+
+check('ModalCreate has gpu/timeout/cpu/memory props',
+  create && ['gpu', 'timeout', 'cpu', 'memory'].every(k => k in create.spec.input_schema.properties));
+check('ModalCreate has NO required fields',
+  create && (!create.spec.input_schema.required || create.spec.input_schema.required.length === 0));
+check('ModalExec requires sandbox_id + command',
+  exec && exec.spec.input_schema.required?.includes('sandbox_id') &&
+  exec.spec.input_schema.required?.includes('command'));
+check('ModalStatus requires sandbox_id only',
+  status && JSON.stringify(status.spec.input_schema.required) === '["sandbox_id"]');
+check('ModalTerminate requires sandbox_id only',
+  terminate && JSON.stringify(terminate.spec.input_schema.required) === '["sandbox_id"]');
+check('ModalCreate concurrent=false (high-cost, must be serial)', create && create.concurrent === false);
+check('ModalExec concurrent=false (writes shared sandbox state)', exec && exec.concurrent === false);
+
+console.log('\n[4] normalizeCommand — internal, exercised via ModalExec.execute');
+// We can't easily import the un-exported helper, but ModalExec returns
+// a clear error message on bad command, so probe via its public surface.
+const stubScope = {
+  workingDir: '/tmp',
+  abortSignal: new AbortController().signal,
+  onAskUser: undefined,
+};
+const badCases = [
+  { input: { sandbox_id: 'x', command: '' }, label: 'empty string command rejected' },
+  { input: { sandbox_id: 'x', command: [] }, label: 'empty array command rejected' },
+  { input: { sandbox_id: 'x', command: [1, 2] }, label: 'non-string array command rejected' },
+  { input: { sandbox_id: 'x', command: null }, label: 'null command rejected' },
+  { input: { sandbox_id: 'x' }, label: 'missing command rejected' },
+];
+for (const c of badCases) {
+  const r = await exec.execute(c.input, stubScope);
+  check(c.label, r.isError === true && /invalid command|command is required|expected/i.test(r.output));
+}
+
+console.log('\n[5] SessionSandboxTracker semantics');
+sessionSandboxTracker.drainIds(); // start clean
+sessionSandboxTracker.add({ id: 'sbx_a', gpu: 'cpu', createdAt: Date.now() });
+sessionSandboxTracker.add({ id: 'sbx_b', gpu: 'T4', createdAt: Date.now() });
+const list1 = sessionSandboxTracker.list();
+check('add registers 2 sandboxes', list1.length === 2);
+sessionSandboxTracker.remove('sbx_a');
+check('remove drops one', sessionSandboxTracker.list().length === 1);
+const drained = sessionSandboxTracker.drainIds();
+check('drainIds returns remaining ids', drained.length === 1 && drained[0] === 'sbx_b');
+check('drainIds clears the tracker', sessionSandboxTracker.list().length === 0);
+
+console.log('\n[6] Gateway contract probe (live, free — relies on 400/402 responses)');
+const BASE = 'https://blockrun.ai/api';
+async function probe(path, body, expectStatus, validator) {
+  const ctrl = new AbortController();
+  const t = setTimeout(() => ctrl.abort(), 8000);
+  try {
+    const r = await fetch(`${BASE}${path}`, {
+      method: 'POST',
+      signal: ctrl.signal,
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+    const text = await r.text().catch(() => '');
+    let json = {};
+    try { json = JSON.parse(text); } catch { /* ignore */ }
+    const ok = r.status === expectStatus && (validator ? validator(json) : true);
+    return { ok, status: r.status, json };
+  } catch (err) {
+    return { ok: false, status: 0, error: err.message };
+  } finally {
+    clearTimeout(t);
+  }
+}
+
+const c1 = await probe('/v1/modal/sandbox/create', {}, 402, j => j.price?.amount === '0.0100');
+check('create endpoint reachable, CPU price still $0.01', c1.ok, `got status ${c1.status}`);
+
+const c2 = await probe('/v1/modal/sandbox/create', { gpu: 'H100' }, 402, j => j.price?.amount === '0.4000');
+check('H100 price still $0.40', c2.ok, `got status ${c2.status}`);
+
+const c3 = await probe('/v1/modal/sandbox/create', { gpu: 'invalid_xxx' }, 400);
+check('invalid gpu rejected with 400', c3.ok, `got status ${c3.status}`);
+
+const c4 = await probe('/v1/modal/sandbox/exec', {}, 400, j =>
+  j.details?.some(d => d.path?.[0] === 'sandbox_id') &&
+  j.details?.some(d => d.path?.[0] === 'command' && d.expected === 'array'));
+check('exec still requires sandbox_id + command-as-array', c4.ok, `got status ${c4.status}`);
+
+const c5 = await probe('/v1/modal/sandbox/status', {}, 400);
+check('status requires sandbox_id', c5.ok, `got status ${c5.status}`);
+
+const c6 = await probe('/v1/modal/sandbox/terminate', {}, 400);
+check('terminate requires sandbox_id', c6.ok, `got status ${c6.status}`);
+
+const c7 = await probe('/v1/modal/sandbox/create', { image: 'python:3.12' }, 400, j =>
+  j.details?.some(d => /python:3\.11/i.test(String(d.message ?? ''))));
+check('image still locked to python:3.11', c7.ok, `got status ${c7.status}`);
+
+console.log(`\n${failures === 0 ? '\x1b[32mAll checks passed.\x1b[0m' : `\x1b[31m${failures} check(s) failed.\x1b[0m`}\n`);
+process.exit(failures === 0 ? 0 : 1);
diff --git a/src/agent/tool-guard.ts b/src/agent/tool-guard.ts
index 6f00eef..33626dc 100644
--- a/src/agent/tool-guard.ts
+++ b/src/agent/tool-guard.ts
@@ -234,9 +234,23 @@ export class SessionToolGuard {
     invocation: CapabilityInvocation,
     scope: ExecutionScope
   ): Promise<CapabilityResult | null> {
-    // Hard-block tools that have failed too many times this session
+    // Hard-block tools that have failed too many times this session.
+    // Modal lifecycle tools are exempt: orphan sandboxes keep billing
+    // GPU time, and ModalTerminate is the only way to recover from
+    // agent-side. Auto-disabling it after 3 transient errors would
+    // strand a $0.40/hr H100 until the session ends. Same logic for
+    // media-gen tools: failures are usually transient (gateway hiccup,
+    // prompt rejection) and the user often wants to retry.
+    const FAILURE_EXEMPT = new Set([
+      'ImageGen',
+      'VideoGen',
+      'ModalCreate',
+      'ModalExec',
+      'ModalStatus',
+      'ModalTerminate',
+    ]);
     const errorCount = this.toolErrorCounts.get(invocation.name) ?? 0;
-    if (errorCount >= 3) {
+    if (errorCount >= 3 && !FAILURE_EXEMPT.has(invocation.name)) {
       return {
         output: `${invocation.name} has failed ${errorCount} times this session and is now disabled. ` +
           'Tell the user what went wrong and suggest alternatives.',
diff --git a/src/stats/insights.ts b/src/stats/insights.ts
index b3c0ca9..1fec5f1 100644
--- a/src/stats/insights.ts
+++ b/src/stats/insights.ts
@@ -57,6 +57,25 @@ export interface InsightsReport {
   avgRequestCostUsd: number;
   /** Efficiency: cost per 1K tokens */
   costPer1KTokens: number;
+  /**
+   * Cost breakdown by capability category. Lets the UI show a clean
+   * "where did your USDC go" split alongside the per-model bar list.
+   *   - chat:    LLM token-billed calls (anything with non-zero tokens)
+   *   - media:   ImageGen / VideoGen / MusicGen (per_image / per_second / per_track)
+   *   - sandbox: Modal GPU sandbox lifecycle (create / exec / status / terminate)
+   *
+   * Categorization is by `model` name prefix:
+   *   - `modal/*`              → sandbox
+   *   - rows with 0 input + 0 output tokens → media (image/video/music are
+   *     stored with 0 tokens by recordUsage; modal/* matches first)
+   *   - everything else        → chat
+   */
+  byCategory: {
+    chatCostUsd: number;
+    mediaCostUsd: number;
+    sandboxCostUsd: number;
+    sandboxRequests: number;
+  };
 }
 
 // ─── Generate Report ──────────────────────────────────────────────────────
@@ -72,6 +91,11 @@ export function generateInsights(days = 30): InsightsReport {
   let totalCost = 0;
   let totalInput = 0;
   let totalOutput = 0;
+  // Category totals — see InsightsReport.byCategory doc.
+  let chatCost = 0;
+  let mediaCost = 0;
+  let sandboxCost = 0;
+  let sandboxRequests = 0;
   const modelAgg = new Map<string, {
     requests: number;
     costUsd: number;
@@ -85,6 +109,17 @@ export function generateInsights(days = 30): InsightsReport {
     totalInput += r.inputTokens;
     totalOutput += r.outputTokens;
 
+    // Categorize: modal/* always goes to sandbox; zero-token entries are
+    // media (image/video/music recordUsage stores 0/0 tokens); rest = chat.
+    if (r.model.startsWith('modal/')) {
+      sandboxCost += r.costUsd;
+      sandboxRequests++;
+    } else if ((r.inputTokens + r.outputTokens) === 0) {
+      mediaCost += r.costUsd;
+    } else {
+      chatCost += r.costUsd;
+    }
+
     const existing = modelAgg.get(r.model) ?? {
       requests: 0,
       costUsd: 0,
@@ -164,6 +199,12 @@ export function generateInsights(days = 30): InsightsReport {
     projections,
     avgRequestCostUsd,
     costPer1KTokens,
+    byCategory: {
+      chatCostUsd: chatCost,
+      mediaCostUsd: mediaCost,
+      sandboxCostUsd: sandboxCost,
+      sandboxRequests,
+    },
   };
 }
 
diff --git a/src/tools/index.ts b/src/tools/index.ts
index e65cc5d..8b2a1a7 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -39,6 +39,7 @@ import {
   defiLlamaYieldsCapability,
   defiLlamaPriceCapability,
 } from './defillama.js';
+import { modalCapabilities } from './modal.js';
 import { createTradingCapabilities } from './trading-execute.js';
 import { Portfolio } from '../trading/portfolio.js';
 import { RiskEngine } from '../trading/risk.js';
@@ -180,6 +181,11 @@ export const allCapabilities: CapabilityHandler[] = [
   defiLlamaChainsCapability,
   defiLlamaYieldsCapability,
   defiLlamaPriceCapability,
+  // Modal GPU sandbox tools — registered but hidden by default (not in
+  // CORE_TOOL_NAMES). Agent must `ActivateTool({names:["ModalCreate",...]})`
+  // before they appear in its tool inventory. High-cost ($0.40/H100 create)
+  // operations should not be in the default surface.
+  ...modalCapabilities, // ModalCreate, ModalExec, ModalStatus, ModalTerminate
 ];
 
 export {
diff --git a/src/tools/modal.ts b/src/tools/modal.ts
new file mode 100644
index 0000000..cc98105
--- /dev/null
+++ b/src/tools/modal.ts
@@ -0,0 +1,766 @@
+/**
+ * Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
+ * BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
+ * status, terminate}. See https://modal.com/docs/guide/sandboxes for the
+ * underlying primitives.
+ *
+ * Pricing (per-call, USDC):
+ *   create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
+ *   exec: $0.001
+ *   status: $0.001
+ *   terminate: $0.001
+ *
+ * Gateway constraints (probed 2026-05-02):
+ *   - image is fixed at python:3.11 — no custom containers yet.
+ *   - command is execve-style (string[]), not a shell string. We accept a
+ *     plain string from the LLM and auto-wrap to ["sh","-c", string].
+ *   - No stdin / env / workdir / streaming on exec — keep commands self-
+ *     contained and idempotent.
+ *   - No upload/download endpoints — files in/out via exec heredoc / curl.
+ *
+ * Lifecycle:
+ *   ModalCreate → returns sandbox_id, charged at GPU tier
+ *   ModalExec   → sync, returns { stdout, stderr, exit_code }
+ *   ModalStatus → check running/terminated
+ *   ModalTerminate → release; called automatically at session end via
+ *                    the SessionSandboxTracker registry.
+ */
+
+import {
+  getOrCreateWallet,
+  getOrCreateSolanaWallet,
+  createPaymentPayload,
+  createSolanaPaymentPayload,
+  parsePaymentRequired,
+  extractPaymentDetails,
+  solanaKeyToBytes,
+  SOLANA_NETWORK,
+} from '@blockrun/llm';
+import type { CapabilityHandler, CapabilityResult, ExecutionScope } from '../agent/types.js';
+import { loadChain, API_URLS, VERSION } from '../config.js';
+import { walletReservation, type ReservationToken } from '../wallet/reservation.js';
+import { recordUsage } from '../stats/tracker.js';
+
+// ─── Pricing table (probed from /.well-known/x402 + 402 responses) ─────────
+const CREATE_PRICE_USD: Record<string, number> = {
+  cpu: 0.01,
+  T4: 0.05,
+  L4: 0.08,
+  A10G: 0.10,
+  A100: 0.20,
+  H100: 0.40,
+};
+const EXEC_PRICE_USD = 0.001;
+const STATUS_PRICE_USD = 0.001;
+const TERMINATE_PRICE_USD = 0.001;
+
+const VALID_GPUS = new Set(Object.keys(CREATE_PRICE_USD).filter(g => g !== 'cpu'));
+
+// ─── Session sandbox tracker ───────────────────────────────────────────────
+// In-memory registry of sandboxes created in the current session. Used by
+// (1) the cleanup hook in vscode-session.ts to terminate orphans on session
+// end, (2) the extension UI's "active sandboxes" badge.
+//
+// NOT persisted — sandboxes outlive the Franklin process via Modal's own
+// timeout, so a missed cleanup is bounded by the user-set `timeout`.
+
+export interface SandboxRecord {
+  id: string;
+  gpu: string; // 'cpu' | 'T4' | ... — denormalized for cost display
+  createdAt: number;
+  timeoutSeconds?: number;
+}
+
+class SessionSandboxTracker {
+  private sandboxes = new Map<string, SandboxRecord>();
+
+  add(rec: SandboxRecord): void {
+    this.sandboxes.set(rec.id, rec);
+  }
+
+  remove(id: string): void {
+    this.sandboxes.delete(id);
+  }
+
+  list(): SandboxRecord[] {
+    return [...this.sandboxes.values()].sort((a, b) => b.createdAt - a.createdAt);
+  }
+
+  /** Snapshot then clear — used by the session cleanup hook. */
+  drainIds(): string[] {
+    const ids = [...this.sandboxes.keys()];
+    this.sandboxes.clear();
+    return ids;
+  }
+}
+
+export const sessionSandboxTracker = new SessionSandboxTracker();
+
+// ─── x402 payment signing — same shape as imagegen's helper ───────────────
+
+async function signPayment(
+  response: Response,
+  chain: 'base' | 'solana',
+  endpoint: string,
+  resourceDescription: string,
+): Promise<Record<string, string> | null> {
+  try {
+    const paymentHeader = await extractPaymentReq(response);
+    if (!paymentHeader) return null;
+
+    if (chain === 'solana') {
+      const wallet = await getOrCreateSolanaWallet();
+      const paymentRequired = parsePaymentRequired(paymentHeader);
+      const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
+      const secretBytes = await solanaKeyToBytes(wallet.privateKey);
+      const feePayer = details.extra?.feePayer || details.recipient;
+
+      const payload = await createSolanaPaymentPayload(
+        secretBytes,
+        wallet.address,
+        details.recipient,
+        details.amount,
+        feePayer as string,
+        {
+          resourceUrl: details.resource?.url || endpoint,
+          resourceDescription: details.resource?.description || resourceDescription,
+          maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
+          extra: details.extra as Record<string, unknown> | undefined,
+        }
+      );
+      return { 'PAYMENT-SIGNATURE': payload };
+    } else {
+      const wallet = getOrCreateWallet();
+      const paymentRequired = parsePaymentRequired(paymentHeader);
+      const details = extractPaymentDetails(paymentRequired);
+
+      const payload = await createPaymentPayload(
+        wallet.privateKey as `0x${string}`,
+        wallet.address,
+        details.recipient,
+        details.amount,
+        details.network || 'eip155:8453',
+        {
+          resourceUrl: details.resource?.url || endpoint,
+          resourceDescription: details.resource?.description || resourceDescription,
+          maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
+          extra: details.extra as Record<string, unknown> | undefined,
+        }
+      );
+      return { 'PAYMENT-SIGNATURE': payload };
+    }
+  } catch (err) {
+    console.error(`[franklin] Modal payment error: ${(err as Error).message}`);
+    return null;
+  }
+}
+
+async function extractPaymentReq(response: Response): Promise<string | null> {
+  let header = response.headers.get('payment-required');
+  if (!header) {
+    try {
+      const body = (await response.json()) as Record<string, unknown>;
+      if (body.x402 || body.accepts) {
+        header = btoa(JSON.stringify(body));
+      }
+    } catch { /* ignore */ }
+  }
+  return header;
+}
+
+/**
+ * Generic POST-with-x402-retry helper used by all four Modal endpoints. The
+ * first POST gets a 402 with payment requirements; we sign and retry once
+ * with the X-PAYMENT header. Returns the parsed JSON body and the raw
+ * Response (callers may need status code).
+ */
+async function postWithPayment(
+  endpoint: string,
+  body: Record<string, unknown>,
+  resourceDescription: string,
+  abortSignal: AbortSignal,
+  timeoutMs: number,
+): Promise<{ ok: boolean; status: number; body: Record<string, unknown>; raw: string }> {
+  const chain = loadChain();
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'User-Agent': `franklin/${VERSION}`,
+  };
+  const ctrl = new AbortController();
+  const onParentAbort = () => ctrl.abort();
+  abortSignal.addEventListener('abort', onParentAbort, { once: true });
+  const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+
+  try {
+    const payload = JSON.stringify(body);
+    let response = await fetch(endpoint, { method: 'POST', signal: ctrl.signal, headers, body: payload });
+
+    if (response.status === 402) {
+      const paymentHeaders = await signPayment(response, chain, endpoint, resourceDescription);
+      if (!paymentHeaders) {
+        return { ok: false, status: 402, body: { error: 'payment signing failed' }, raw: '' };
+      }
+      response = await fetch(endpoint, {
+        method: 'POST',
+        signal: ctrl.signal,
+        headers: { ...headers, ...paymentHeaders },
+        body: payload,
+      });
+    }
+
+    const raw = await response.text().catch(() => '');
+    let parsed: Record<string, unknown> = {};
+    try { parsed = raw ? JSON.parse(raw) : {}; } catch { /* leave as {} */ }
+    return { ok: response.ok, status: response.status, body: parsed, raw };
+  } finally {
+    clearTimeout(timer);
+    abortSignal.removeEventListener('abort', onParentAbort);
+  }
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+function modalEndpoint(path: string): string {
+  const chain = loadChain();
+  return `${API_URLS[chain]}/v1/modal/sandbox/${path}`;
+}
+
+/**
+ * Normalize the agent's `command` input into the execve-style array Modal
+ * expects. LLMs frequently pass a shell string ("pip install torch && python
+ * train.py"); auto-wrap that into ["sh","-c", string] so the agent doesn't
+ * have to know the difference. Arrays are passed through verbatim.
+ */
+function normalizeCommand(input: unknown): string[] | null {
+  if (Array.isArray(input)) {
+    if (input.every(x => typeof x === 'string') && input.length > 0) {
+      return input as string[];
+    }
+    return null;
+  }
+  if (typeof input === 'string' && input.trim().length > 0) {
+    return ['sh', '-c', input];
+  }
+  return null;
+}
+
+function fmtUsd(n: number): string {
+  if (n < 0.01) return `$${n.toFixed(4)}`;
+  return `$${n.toFixed(2)}`;
+}
+
+// ─── ModalCreate ─────────────────────────────────────────────────────────
+
+interface ModalCreateInput {
+  gpu?: string;
+  timeout?: number;
+  cpu?: number;
+  memory?: number;
+}
+
+export const modalCreateCapability: CapabilityHandler = {
+  spec: {
+    name: 'ModalCreate',
+    description:
+      'Create a Modal Python 3.11 sandbox (CPU or GPU) via the BlockRun gateway. ' +
+      'Returns a sandbox_id you pass to ModalExec. Charged once per create at the ' +
+      'GPU tier price: CPU $0.01, T4 $0.05, L4 $0.08, A10G $0.10, A100 $0.20, H100 $0.40. ' +
+      'IMPORTANT — current limitations (BlockRun gateway is in early-access for sandboxes):\n' +
+      '  - sandbox lifetime: 5 minutes MAX (gateway hard-cap, regardless of GPU tier)\n' +
+      '  - per ModalExec call: 60 seconds MAX wall-clock\n' +
+      '  - Python 3.11 only, no custom images yet\n' +
+      '  - 1 vCPU, 1 GiB RAM defaults\n' +
+      '  - GPU access is preview-tier (officially "coming later" in docs)\n' +
+      '  - No setup-time provisioning — every sandbox starts empty\n' +
+      'These limits make this tool suitable for: GPU benchmarks (nvidia-smi, matmul), ' +
+      'small model inference (≤3B params if weights pre-cached), CUDA kernel validation, ' +
+      'short ad-hoc Python tasks. NOT suitable for: full LoRA / fine-tuning runs, ' +
+      'pip install + model download + training (pip alone burns 1-2 min of the 5-min budget). ' +
+      'Custom images + longer lifetime + GPU production tier are documented as "coming later" ' +
+      'by BlockRun — for serious ML workloads tell the user to use Modal directly until then. ' +
+      'Always call ModalTerminate when done. ' +
+      'Long-running command pattern: each ModalExec call is itself capped at 60s wall-clock. ' +
+      'For work that takes >60s (pip install, model download, training), use the ' +
+      'fire-and-poll pattern: ModalExec(["sh","-c","nohup <cmd> > /workspace/log 2>&1 &"]) ' +
+      'returns in <1s, then poll with subsequent ModalExec(["cat","/workspace/log"]) calls.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        gpu: { type: 'string', description: 'GPU tier. One of T4, L4, A10G, A100, H100. Omit for CPU-only ($0.01).' },
+        timeout: { type: 'number', description: 'Lifetime cap in seconds. Default + Max = 300 (5 min). Gateway rejects values > 300 with HTTP 400.' },
+        cpu: { type: 'number', description: 'Number of CPU cores. Default 0.125, max 8.' },
+        memory: { type: 'number', description: 'Memory MB. Default 128, max 32768.' },
+      },
+    },
+  },
+  concurrent: false,
+  async execute(input: Record<string, unknown>, ctx: ExecutionScope): Promise<CapabilityResult> {
+    const raw = input as ModalCreateInput;
+    // ── Client-side coercion ────────────────────────────────────────────
+    // LLMs routinely pass numeric fields as strings ("timeout":"300") and
+    // GPU tier in lowercase ("t4"). The gateway's schema is strict and
+    // 400s on either, leaving the agent confused (it sees "Invalid
+    // request body" with no actionable hint). Fix the obvious mistakes
+    // before they leave the client.
+    let gpu = raw.gpu;
+    if (typeof gpu === 'string') {
+      const matched = [...VALID_GPUS].find(g => g.toLowerCase() === gpu!.toLowerCase());
+      if (matched) gpu = matched;
+    }
+    if (gpu && !VALID_GPUS.has(gpu)) {
+      return {
+        output: `Error: invalid gpu "${gpu}". Allowed: ${[...VALID_GPUS].join(', ')} (or omit for CPU).`,
+        isError: true,
+      };
+    }
+    const tier = gpu ?? 'cpu';
+    const price = CREATE_PRICE_USD[tier];
+
+    // Coerce numeric fields. Reject NaN explicitly so we don't ship
+    // garbage to the gateway.
+    const coerceNum = (v: unknown, name: string): number | { error: string } => {
+      if (v === undefined || v === null || v === '') return undefined as unknown as number;
+      const n = typeof v === 'string' ? Number(v) : (v as number);
+      if (typeof n !== 'number' || !Number.isFinite(n)) {
+        return { error: `${name} must be a number, got ${typeof v}: ${JSON.stringify(v)}` };
+      }
+      return n;
+    };
+    const timeoutCoerced = coerceNum(raw.timeout, 'timeout');
+    const cpuCoerced = coerceNum(raw.cpu, 'cpu');
+    const memoryCoerced = coerceNum(raw.memory, 'memory');
+    for (const c of [timeoutCoerced, cpuCoerced, memoryCoerced]) {
+      if (c && typeof c === 'object' && 'error' in c) {
+        return { output: `Error: ${c.error}`, isError: true };
+      }
+    }
+    // Gateway hard-caps sandbox lifetime at 300s. Cap client-side so we
+    // surface a clear error instead of letting the user pay $0.20 for a
+    // create that 400s on the wire.
+    const CREATE_TIMEOUT_MAX = 300;
+    if (typeof timeoutCoerced === 'number' && timeoutCoerced > CREATE_TIMEOUT_MAX) {
+      return {
+        output:
+          `Error: timeout ${timeoutCoerced}s exceeds gateway max of ${CREATE_TIMEOUT_MAX}s. ` +
+          `BlockRun caps Modal sandbox lifetime at 5 minutes regardless of GPU tier. ` +
+          `For longer workloads, the work must be split across multiple sandboxes ` +
+          `(checkpoint + reload) or you need to ask BlockRun to lift this cap.`,
+        isError: true,
+      };
+    }
+
+    // ── AskUser cost preview (skipped if env auto-approve or non-UI mode) ──
+    const autoApprove = process.env.FRANKLIN_MEDIA_AUTO_APPROVE_ALL === '1';
+    if (ctx.onAskUser && !autoApprove) {
+      const timeoutSec = raw.timeout ?? 300;
+      const lines = [
+        `Create Modal sandbox?`,
+        ``,
+        `  Tier:        ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}`,
+        `  Image:       python:3.11`,
+        `  Timeout:     ${timeoutSec}s (${(timeoutSec / 60).toFixed(1)} min)`,
+        ...(raw.cpu ? [`  CPU cores:   ${raw.cpu}`] : []),
+        ...(raw.memory ? [`  Memory:      ${raw.memory} MB`] : []),
+        ``,
+        `Create cost:   ${fmtUsd(price)} (one-time)`,
+        `Each exec:     ${fmtUsd(EXEC_PRICE_USD)}`,
+        `Terminate:     ${fmtUsd(TERMINATE_PRICE_USD)}`,
+      ];
+      try {
+        const answer = await ctx.onAskUser(lines.join('\n'), ['Approve', 'Cancel']);
+        if (answer !== 'Approve') {
+          return { output: '## Sandbox creation cancelled\n\nNo USDC was spent.' };
+        }
+      } catch {
+        // askUser failed (UI gone) — fall through and create. Better than
+        // silently aborting in headless contexts.
+      }
+    }
+
+    // Wallet reservation — block over-spend if other in-flight calls hold balance.
+    let reservation: ReservationToken | null = null;
+    try {
+      reservation = await walletReservation.hold(price);
+      if (!reservation) {
+        return {
+          output:
+            `Insufficient USDC for ModalCreate (${tier}, ~${fmtUsd(price)}). ` +
+            `Other in-flight paid calls may be holding your balance — wait or fund the wallet.`,
+          isError: true,
+        };
+      }
+    } catch { /* fall through, x402 will surface real error */ }
+
+    try {
+      const body: Record<string, unknown> = {};
+      if (gpu) body.gpu = gpu;
+      if (typeof timeoutCoerced === 'number') body.timeout = timeoutCoerced;
+      if (typeof cpuCoerced === 'number') body.cpu = cpuCoerced;
+      if (typeof memoryCoerced === 'number') body.memory = memoryCoerced;
+
+      const res = await postWithPayment(
+        modalEndpoint('create'),
+        body,
+        'Franklin Modal sandbox create',
+        ctx.abortSignal,
+        90_000, // 90s — sandbox cold-start can be slow on fresh GPU pulls
+      );
+
+      if (!res.ok) {
+        const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
+        // Surface the per-field validation issues — usually the
+        // actionable bit ("expected number, received string at path
+        // ['timeout']").
+        const details = Array.isArray(res.body.details)
+          ? '\nDetails: ' + res.body.details.map((d: Record<string, unknown>) =>
+              `${(d.path as string[] | undefined)?.join('.') ?? '?'}: ${d.message ?? JSON.stringify(d)}`
+            ).join('; ')
+          : '';
+        return {
+          output: `ModalCreate failed (${res.status}): ${err}${details}`,
+          isError: true,
+        };
+      }
+
+      const sandboxId =
+        (typeof res.body.sandbox_id === 'string' && res.body.sandbox_id) ||
+        (typeof res.body.id === 'string' && res.body.id) ||
+        '';
+      if (!sandboxId) {
+        return {
+          output: `ModalCreate returned no sandbox_id. Raw: ${res.raw.slice(0, 300)}`,
+          isError: true,
+        };
+      }
+
+      sessionSandboxTracker.add({
+        id: sandboxId,
+        gpu: tier,
+        createdAt: Date.now(),
+        timeoutSeconds: raw.timeout ?? 300,
+      });
+
+      // Stats — surface Modal usage in `franklin insights` like other paid tools.
+      try {
+        recordUsage(`modal/${tier}`, 0, 0, price, 0);
+      } catch { /* ignore */ }
+
+      return {
+        output:
+          `Sandbox created\n` +
+          `- id: \`${sandboxId}\`\n` +
+          `- tier: ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}\n` +
+          `- timeout: ${raw.timeout ?? 300}s\n` +
+          `- charged: ${fmtUsd(price)}\n\n` +
+          `Next: ModalExec({ sandbox_id: "${sandboxId}", command: ["python","-c","print(1)"] })`,
+      };
+    } finally {
+      walletReservation.release(reservation);
+    }
+  },
+};
+
+// ─── ModalExec ───────────────────────────────────────────────────────────
+
+interface ModalExecInput {
+  sandbox_id?: string;
+  command?: unknown; // string OR string[] — we normalize
+  timeout?: number;
+}
+
+export const modalExecCapability: CapabilityHandler = {
+  spec: {
+    name: 'ModalExec',
+    description:
+      'Run a command inside a Modal sandbox (must already exist via ModalCreate). ' +
+      '`command` accepts either an execve-style array (e.g. ["python","-c","print(1)"]) ' +
+      'or a shell string (e.g. "pip install torch && python train.py") which is auto-wrapped ' +
+      'as ["sh","-c", <string>]. Returns stdout, stderr, exit_code synchronously. ' +
+      'Each call charges $0.001. The sandbox keeps state across exec calls (filesystem, ' +
+      'installed pip packages, etc) until ModalTerminate. ' +
+      'CRITICAL: timeout is HARD-CAPPED at 60 seconds by the gateway — anything longer ' +
+      'returns HTTP 400. For long-running commands (pip install large packages, model ' +
+      'downloads, training loops), use the fire-and-poll pattern: ' +
+      '  exec1: ["sh","-c","nohup <slow-cmd> > /workspace/log 2>&1 & echo $! > /workspace/pid"] (<1s) ' +
+      '  exec2: ["sh","-c","tail -50 /workspace/log"] (poll progress, <1s) ' +
+      '  exec3: ["sh","-c","kill -0 $(cat /workspace/pid) 2>/dev/null && echo RUN || echo DONE"] (check live) ' +
+      'This decouples actual work duration from the per-exec 60s ceiling, but the sandbox ' +
+      'itself still dies at 300s wall-clock — total useful work fits in ~5 minutes.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        sandbox_id: { type: 'string', description: 'Sandbox id from ModalCreate.' },
+        command: {
+          description: 'Execve-style array OR shell string. Strings are wrapped as ["sh","-c", string].',
+        },
+        timeout: { type: 'number', description: 'Per-exec timeout in seconds. Default 60, MAX 60 (gateway hard cap). Use fire-and-poll for longer work.' },
+      },
+      required: ['sandbox_id', 'command'],
+    },
+  },
+  concurrent: false,
+  async execute(input: Record<string, unknown>, ctx: ExecutionScope): Promise<CapabilityResult> {
+    const raw = input as ModalExecInput;
+    if (!raw.sandbox_id) return { output: 'Error: sandbox_id is required', isError: true };
+
+    const command = normalizeCommand(raw.command);
+    if (!command) {
+      // JSON.stringify(undefined) returns undefined — guard the slice call.
+      const got = raw.command === undefined
+        ? 'undefined (missing)'
+        : JSON.stringify(raw.command);
+      return {
+        output:
+          `Error: invalid command. Expected a non-empty string or string[] of length >= 1. ` +
+          `Got: ${(got ?? 'undefined').slice(0, 100)}`,
+        isError: true,
+      };
+    }
+
+    let reservation: ReservationToken | null = null;
+    try {
+      reservation = await walletReservation.hold(EXEC_PRICE_USD);
+      // For micro-cost calls don't hard-block on insufficient — just proceed.
+    } catch { /* ignore */ }
+
+    try {
+      // Same string-as-number guard as ModalCreate. LLMs love
+      // "timeout":"300".
+      let coercedTimeout: number | undefined;
+      if (raw.timeout !== undefined && raw.timeout !== null && (raw.timeout as unknown) !== '') {
+        const n = typeof raw.timeout === 'string' ? Number(raw.timeout) : raw.timeout;
+        if (typeof n === 'number' && Number.isFinite(n)) coercedTimeout = n;
+      }
+      // Gateway hard-caps exec timeout at 60s. Cap client-side so we
+      // never burn an x402 round-trip on a 400. Default to 60s if
+      // unset since "I want it to actually run" is a more sensible
+      // default than the lib's smaller value.
+      const EXEC_TIMEOUT_MAX = 60;
+      if (coercedTimeout === undefined || coercedTimeout > EXEC_TIMEOUT_MAX) {
+        coercedTimeout = EXEC_TIMEOUT_MAX;
+      }
+      const body: Record<string, unknown> = {
+        sandbox_id: raw.sandbox_id,
+        command,
+      };
+      if (coercedTimeout !== undefined) body.timeout = coercedTimeout;
+
+      const res = await postWithPayment(
+        modalEndpoint('exec'),
+        body,
+        'Franklin Modal sandbox exec',
+        ctx.abortSignal,
+        Math.max(30_000, ((coercedTimeout ?? 300) + 30) * 1000),
+      );
+
+      if (!res.ok) {
+        // 400 here usually means the agent built the wrong shape (bad
+        // sandbox_id, malformed command). Dump the full raw body so the
+        // agent can see exactly what the gateway complained about and
+        // self-correct on the next turn instead of looping blind.
+        const err = res.body.error ? String(res.body.error) : '(no error field)';
+        const details = res.body.details ? `\nDetails: ${JSON.stringify(res.body.details)}` : '';
+        const raw = res.raw.length > 500 ? res.raw.slice(0, 500) + '…' : res.raw;
+        return {
+          output:
+            `ModalExec failed (${res.status}): ${err}${details}\n` +
+            `Raw response: ${raw}\n` +
+            `Sent: command=${JSON.stringify(command).slice(0, 200)}`,
+          isError: true,
+        };
+      }
+
+      const stdout = typeof res.body.stdout === 'string' ? res.body.stdout : '';
+      const stderr = typeof res.body.stderr === 'string' ? res.body.stderr : '';
+      // Gateway field shape isn't 100% pinned — accept exit_code, exitCode,
+      // returncode, code (in priority order). If NONE of them are present
+      // but stdout/stderr came back, treat as success (exit 0) rather than
+      // poisoning the failure counter on a healthy run with an unfamiliar
+      // response shape.
+      const rawExit =
+        typeof res.body.exit_code === 'number' ? res.body.exit_code :
+        typeof res.body.exitCode === 'number' ? res.body.exitCode :
+        typeof res.body.returncode === 'number' ? res.body.returncode :
+        typeof res.body.code === 'number' ? res.body.code :
+        null;
+      const hasAnyOutput = stdout.length > 0 || stderr.length > 0;
+      const exitCode = rawExit !== null ? rawExit : (hasAnyOutput ? 0 : -1);
+
+      try { recordUsage('modal/exec', 0, 0, EXEC_PRICE_USD, 0); } catch { /* ignore */ }
+
+      const summary = `exit ${exitCode}` + (rawExit === null ? ' (inferred — no exit_code field in response)' : '');
+      const sections = [
+        `\`${command.join(' ')}\` → ${summary}`,
+      ];
+      if (stdout) sections.push(`--- stdout ---\n${stdout}`);
+      if (stderr) sections.push(`--- stderr ---\n${stderr}`);
+      // Only mark as error when we have a real non-zero exit code OR
+      // we have nothing at all (no stdout / stderr / exit_code) which
+      // suggests an actual problem rather than a parsing edge case.
+      const isError = rawExit !== null ? rawExit !== 0 : !hasAnyOutput;
+      return { output: sections.join('\n\n'), isError };
+    } finally {
+      walletReservation.release(reservation);
+    }
+  },
+};
+
+// ─── ModalStatus ─────────────────────────────────────────────────────────
+
+export const modalStatusCapability: CapabilityHandler = {
+  spec: {
+    name: 'ModalStatus',
+    description:
+      'Check the status of a Modal sandbox (running / terminated). Charges $0.001. ' +
+      'Useful when you suspect a sandbox died or you want to confirm a previous ' +
+      'ModalTerminate succeeded.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        sandbox_id: { type: 'string' },
+      },
+      required: ['sandbox_id'],
+    },
+  },
+  concurrent: false,
+  async execute(input: Record<string, unknown>, ctx: ExecutionScope): Promise<CapabilityResult> {
+    const sandbox_id = (input as { sandbox_id?: string }).sandbox_id;
+    if (!sandbox_id) return { output: 'Error: sandbox_id is required', isError: true };
+
+    let reservation: ReservationToken | null = null;
+    try { reservation = await walletReservation.hold(STATUS_PRICE_USD); } catch { /* ignore */ }
+
+    try {
+      const res = await postWithPayment(
+        modalEndpoint('status'),
+        { sandbox_id },
+        'Franklin Modal sandbox status',
+        ctx.abortSignal,
+        30_000,
+      );
+
+      if (!res.ok) {
+        const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
+        return { output: `ModalStatus failed (${res.status}): ${err}`, isError: true };
+      }
+
+      try { recordUsage('modal/status', 0, 0, STATUS_PRICE_USD, 0); } catch { /* ignore */ }
+
+      const status = (res.body.status as string) || 'unknown';
+      const extra = JSON.stringify(res.body, null, 2);
+      return { output: `Sandbox \`${sandbox_id}\` status: **${status}**\n\n${extra}` };
+    } finally {
+      walletReservation.release(reservation);
+    }
+  },
+};
+
+// ─── ModalTerminate ──────────────────────────────────────────────────────
+
+export const modalTerminateCapability: CapabilityHandler = {
+  spec: {
+    name: 'ModalTerminate',
+    description:
+      'Terminate a Modal sandbox and release its resources. Charges $0.001. ' +
+      'Strongly recommended after every successful ModalExec sequence — ' +
+      'Modal bills wall-clock GPU time until the sandbox terminates or hits ' +
+      'its `timeout`. Session-end auto-cleanup also calls this for any sandboxes ' +
+      'the agent forgot, but explicit is better.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        sandbox_id: { type: 'string' },
+      },
+      required: ['sandbox_id'],
+    },
+  },
+  concurrent: false,
+  async execute(input: Record<string, unknown>, ctx: ExecutionScope): Promise<CapabilityResult> {
+    const sandbox_id = (input as { sandbox_id?: string }).sandbox_id;
+    if (!sandbox_id) return { output: 'Error: sandbox_id is required', isError: true };
+
+    let reservation: ReservationToken | null = null;
+    try { reservation = await walletReservation.hold(TERMINATE_PRICE_USD); } catch { /* ignore */ }
+
+    try {
+      const res = await postWithPayment(
+        modalEndpoint('terminate'),
+        { sandbox_id },
+        'Franklin Modal sandbox terminate',
+        ctx.abortSignal,
+        30_000,
+      );
+
+      // Always remove from tracker — even on failure, retrying is wasteful.
+      sessionSandboxTracker.remove(sandbox_id);
+
+      if (!res.ok) {
+        const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
+        return {
+          output:
+            `ModalTerminate returned ${res.status}: ${err}\n\n` +
+            `(Removed from local tracker regardless. Modal-side cleanup will happen at the timeout.)`,
+          isError: res.status >= 500, // 4xx (e.g. already-terminated) is benign
+        };
+      }
+
+      try { recordUsage('modal/terminate', 0, 0, TERMINATE_PRICE_USD, 0); } catch { /* ignore */ }
+
+      return { output: `Sandbox \`${sandbox_id}\` terminated.` };
+    } finally {
+      walletReservation.release(reservation);
+    }
+  },
+};
+
+// ─── Bulk session cleanup ────────────────────────────────────────────────
+
+/**
+ * Terminate every sandbox the current session has created. Called from
+ * vscode-session.ts at session end (and the SessionToolGuard cleanup path)
+ * so a missed agent ModalTerminate doesn't leave Modal billing the user
+ * up to the per-sandbox timeout. Best-effort: failures are logged but
+ * don't block session shutdown.
+ */
+export async function terminateAllSessionSandboxes(opts: { abortSignal?: AbortSignal } = {}): Promise<{
+  attempted: number;
+  succeeded: number;
+  failed: Array<{ id: string; error: string }>;
+}> {
+  const ids = sessionSandboxTracker.drainIds();
+  const failed: Array<{ id: string; error: string }> = [];
+  let succeeded = 0;
+  const ctrl = new AbortController();
+  if (opts.abortSignal) {
+    if (opts.abortSignal.aborted) ctrl.abort();
+    else opts.abortSignal.addEventListener('abort', () => ctrl.abort(), { once: true });
+  }
+  // Sequential — terminating a few sandboxes in parallel offers no real
+  // win over serial, and serial keeps the wallet-reservation accounting
+  // simple.
+  for (const id of ids) {
+    try {
+      const res = await postWithPayment(
+        modalEndpoint('terminate'),
+        { sandbox_id: id },
+        'Franklin Modal sandbox cleanup',
+        ctrl.signal,
+        20_000,
+      );
+      if (res.ok) succeeded++;
+      else failed.push({ id, error: String(res.body.error ?? res.raw.slice(0, 200)) });
+    } catch (err) {
+      failed.push({ id, error: (err as Error).message });
+    }
+  }
+  return { attempted: ids.length, succeeded, failed };
+}
+
+// ─── All-in-one export for index.ts registration ─────────────────────────
+
+export const modalCapabilities: CapabilityHandler[] = [
+  modalCreateCapability,
+  modalExecCapability,
+  modalStatusCapability,
+  modalTerminateCapability,
+];
diff --git a/src/wallet/reservation.ts b/src/wallet/reservation.ts
new file mode 100644
index 0000000..e8dee36
--- /dev/null
+++ b/src/wallet/reservation.ts
@@ -0,0 +1,118 @@
+/**
+ * WalletReservation — local accounting layer for concurrent paid tool calls.
+ *
+ * Problem this solves: when N batch tools (ImageGen / VideoGen) run in
+ * parallel, each independently checks balance and dispatches its x402
+ * payment. With balance $0.20 and 6 calls × $0.04 each, all 6 see "$0.20
+ * available, $0.04 fits" and start; only 5 can actually settle on-chain,
+ * the rest fail mid-flight with insufficient-funds and the user sees
+ * partial completion with no preflight warning.
+ *
+ * The fix is *not* on-chain — x402 is fire-and-forget per-request, there's
+ * no real "hold" capability. Instead this is a per-process bookkeeping
+ * layer:
+ *   1. Tool calls hold(amount) before paying.
+ *   2. hold() refuses if (balance - sum(active reservations)) < amount.
+ *   3. After payment succeeds OR fails, tool calls release(token).
+ *
+ * Single-process JS guarantees the check-and-set is atomic (no real race),
+ * and balance is cached briefly so we don't hit the RPC for every hold.
+ */
+
+import { setupAgentWallet, setupAgentSolanaWallet } from '@blockrun/llm';
+import { loadChain } from '../config.js';
+
+export interface ReservationToken {
+  id: string;
+  amountUsd: number;
+}
+
+const BALANCE_CACHE_MS = 5_000;
+
+class WalletReservationManager {
+  private reserved = new Map<string, number>();
+  private cachedBalance: { value: number; fetchedAt: number } | null = null;
+  private balanceFetchInflight: Promise<number> | null = null;
+
+  private async fetchBalance(): Promise<number> {
+    if (this.cachedBalance && Date.now() - this.cachedBalance.fetchedAt < BALANCE_CACHE_MS) {
+      return this.cachedBalance.value;
+    }
+    if (this.balanceFetchInflight) return this.balanceFetchInflight;
+
+    const chain = loadChain();
+    this.balanceFetchInflight = (async () => {
+      try {
+        if (chain === 'solana') {
+          const client = await setupAgentSolanaWallet({ silent: true });
+          return await client.getBalance();
+        }
+        const client = setupAgentWallet({ silent: true });
+        return await client.getBalance();
+      } catch {
+        // If balance fetch fails, return Infinity so reservations don't
+        // block — the actual payment will surface the real error. We'd
+        // rather under-protect than block all paid tools on RPC flakiness.
+        return Number.POSITIVE_INFINITY;
+      }
+    })()
+      .then((v) => {
+        this.cachedBalance = { value: v, fetchedAt: Date.now() };
+        this.balanceFetchInflight = null;
+        return v;
+      });
+
+    return this.balanceFetchInflight;
+  }
+
+  private totalReserved(): number {
+    let sum = 0;
+    for (const v of this.reserved.values()) sum += v;
+    return sum;
+  }
+
+  /**
+   * Try to reserve `amountUsd`. Returns a token on success, or null if
+   * insufficient (balance - already-reserved < amountUsd). Caller MUST
+   * release the token after the actual payment resolves, success or fail.
+   */
+  async hold(amountUsd: number): Promise<ReservationToken | null> {
+    if (amountUsd <= 0) {
+      // Free / zero-cost calls don't need accounting.
+      return { id: `free-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, amountUsd: 0 };
+    }
+    const balance = await this.fetchBalance();
+    const available = balance - this.totalReserved();
+    if (available < amountUsd) return null;
+
+    const id = `res-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    this.reserved.set(id, amountUsd);
+    return { id, amountUsd };
+  }
+
+  /**
+   * Release a hold. Idempotent — releasing the same token twice is a no-op.
+   * Invalidate the balance cache so the next hold sees up-to-date state.
+   */
+  release(token: ReservationToken | string | null | undefined): void {
+    if (!token) return;
+    const id = typeof token === 'string' ? token : token.id;
+    if (this.reserved.delete(id)) {
+      // A real payment may have just settled on-chain; force re-fetch
+      // next time so subsequent holds see the post-payment balance.
+      this.cachedBalance = null;
+    }
+  }
+
+  /** Force the next hold() to refetch balance from chain. */
+  invalidateBalance(): void {
+    this.cachedBalance = null;
+  }
+
+  /** Snapshot of current reservation state — diagnostic / testing only. */
+  snapshot(): { count: number; totalUsd: number } {
+    return { count: this.reserved.size, totalUsd: this.totalReserved() };
+  }
+}
+
+export const walletReservation = new WalletReservationManager();