diff --git a/.github/workflows/generate-toolkit-docs.yml b/.github/workflows/generate-toolkit-docs.yml index 20a00e134..c501cbb21 100644 --- a/.github/workflows/generate-toolkit-docs.yml +++ b/.github/workflows/generate-toolkit-docs.yml @@ -21,6 +21,10 @@ permissions: jobs: generate: runs-on: ubuntu-latest + # Opt in to Node 24 for JavaScript actions before GitHub forces the + # switch on 2026-06-02. Harmless today; unblocks the cutover. + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" steps: - name: Checkout repository @@ -57,6 +61,9 @@ jobs: --llm-provider openai \ --llm-model "$OPENAI_MODEL" \ --llm-api-key "$OPENAI_API_KEY" \ + --llm-editor-provider anthropic \ + --llm-editor-model "$ANTHROPIC_EDITOR_MODEL" \ + --llm-editor-api-key "$ANTHROPIC_API_KEY" \ --toolkit-concurrency 8 \ --llm-concurrency 15 \ --exclude-file ./excluded-toolkits.txt \ @@ -68,6 +75,11 @@ jobs: ENGINE_API_KEY: ${{ secrets.ENGINE_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_MODEL: ${{ secrets.OPENAI_MODEL || 'gpt-4o-mini' }} + # Stronger model for the secret-coherence editor. Keeps + # stale-secret cleanup precise instead of re-summarizing the whole + # artifact (which gpt-4o-mini tends to do). + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_EDITOR_MODEL: ${{ secrets.ANTHROPIC_EDITOR_MODEL || 'claude-sonnet-4-6' }} - name: Sync toolkit sidebar navigation run: pnpm dlx tsx toolkit-docs-generator/scripts/sync-toolkit-sidebar.ts --remove-empty-sections=false --verbose diff --git a/toolkit-docs-generator/README.md b/toolkit-docs-generator/README.md index 9a5743f13..7b3a65c0e 100644 --- a/toolkit-docs-generator/README.md +++ b/toolkit-docs-generator/README.md @@ -42,6 +42,8 @@ Required secrets: Optional secrets: - `OPENAI_MODEL` (defaults in the workflow) +- `ANTHROPIC_API_KEY` enables the secret-coherence editor (see below). Without it the workflow still runs; the scanners emit warnings but no LLM edits are applied. +- `ANTHROPIC_EDITOR_MODEL` (defaults to `claude-sonnet-4-6` in the workflow) ## Rendering pipeline (docs site) @@ -66,6 +68,57 @@ The docs site consumes the generated JSON directly: This step does not change JSON output. It only updates navigation files. +## Secret coherence (stale-reference cleanup + coverage check) + +When a toolkit loses a secret upstream (typically because the tool that required it was removed), the rendered docs can keep mentioning it in the summary and in hand-authored documentation chunks. Symmetrically, a toolkit can end up with current secrets the summary never names, or name them without any link to the Arcade config docs. + +The generator runs two checks after summary generation, in [`src/merger/secret-coherence.ts`](src/merger/secret-coherence.ts) and [`src/llm/secret-edit-generator.ts`](src/llm/secret-edit-generator.ts): + +1. **Stale-reference scan** (deterministic): diffs current vs previous toolkit secret sets and searches the summary, every toolkit-level `documentationChunks` entry, and every per-tool chunk for any removed secret name. Exact substring match — secret names are distinctive ALLCAPS_WITH_UNDER. +2. **Coverage-gap scan** (deterministic): flags any current secret that is not mentioned in the summary and any summary that lacks a link to the Arcade secret config docs. + +If an LLM editor is configured (`--llm-editor-provider` / `--llm-editor-model` / `--llm-editor-api-key`), both classes of issue are auto-fixed: + +- Stale references are removed with a **minimum-necessary edit** prompt — whole sentences, bullets, or table rows that exist only to describe the removed secret are deleted; sentences that mention the removed secret alongside other content are minimally rewritten; nothing else is touched. This is intentionally different from the summary generator, which rewrites from scratch and tends to oversimplify. +- Missing secrets get appended to the summary's `**Secrets**` section with as much detail as the secret actually needs — a short URL override may be one line; a scoped API key typically needs several sentences describing the provider dashboard page, required scopes or permissions, and account-tier constraints, plus an inline link to the provider's own documentation for how to create it. The prompt explicitly forbids inventing docs URLs. +- Missing Arcade-config links are added at the end of the `**Secrets**` section. +- The editor is instructed to preserve surrounding content verbatim (no re-summarization, no reorder). + +When the editor is not configured, the scanners still run and their findings land as non-fatal warnings in the run log. Editor exceptions are caught individually so a single LLM failure does not break the run. + +The default editor model is **Claude Sonnet 4.6** — chosen to avoid the oversimplification observed when bulk summaries were regenerated by `gpt-4o-mini`. Override with `--llm-editor-model` or the `LLM_EDITOR_MODEL` / `ANTHROPIC_EDITOR_MODEL` env var. + +### OAuth section in summaries + +The summary generator is configured to **never list OAuth scopes** in the generated overview. Each per-provider Arcade auth docs page (under `/en/references/auth-providers/`) is the source of truth for scopes and configuration; the summary links to it instead of duplicating. This keeps the overview scannable and prevents drift when provider pages update their scope lists. + +### CLI flags + +- `--llm-editor-provider ` — editor provider. Falls back to `LLM_EDITOR_PROVIDER`. +- `--llm-editor-model ` — editor model. Falls back to `LLM_EDITOR_MODEL` / `ANTHROPIC_EDITOR_MODEL`. +- `--llm-editor-api-key ` — editor API key. Falls back to `LLM_EDITOR_API_KEY`, then `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` per provider. +- `--llm-editor-base-url ` — override editor base URL. +- `--llm-editor-temperature ` — editor temperature. +- `--llm-editor-max-tokens ` — editor max output tokens (default `8192`). +- `--llm-editor-max-retries ` — retry attempts on transient errors (default `3`). +- `--skip-secret-coherence` — disable both the scan and the edit step entirely. + +### Local example (editor on) + +```bash +pnpm dlx tsx src/cli/index.ts generate \ + --providers "Github" \ + --tool-metadata-url "$ENGINE_API_URL" \ + --tool-metadata-key "$ENGINE_API_KEY" \ + --llm-provider openai \ + --llm-model gpt-4.1-mini \ + --llm-api-key "$OPENAI_API_KEY" \ + --llm-editor-provider anthropic \ + --llm-editor-model claude-sonnet-4-6 \ + --llm-editor-api-key "$ANTHROPIC_API_KEY" \ + --output data/toolkits +``` + ## Architecture at a glance - **CLI**: `toolkit-docs-generator/src/cli/index.ts` @@ -182,6 +235,8 @@ deletes it and rebuilds `index.json`. - `--previous-output` compare against a previous output directory - `--custom-sections` load curated docs sections - `--skip-examples`, `--skip-summary` disable LLM steps +- `--skip-secret-coherence` disable the stale-reference scan + coverage fill (see the Secret coherence section) +- `--llm-editor-provider`, `--llm-editor-model`, `--llm-editor-api-key` configure the secret-coherence editor (Sonnet 4.6 by default) - `--no-verify-output` skip output verification ## Troubleshooting diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index b6a1ca5af..1137c3703 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -33,6 +33,7 @@ import { createLlmClient, type LlmClient, type LlmProvider, + LlmSecretEditGenerator, LlmToolExampleGenerator, LlmToolkitSummaryGenerator, } from "../llm/index.js"; @@ -363,6 +364,124 @@ const resolveLlmConfig = ( }; }; +interface SecretEditorCliOptions { + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; +} + +// Headroom is calibrated against the largest single artifact the editor +// might receive: a long documentation chunk that must be reproduced +// verbatim minus a removed secret (worst-case output size ≈ input size). +// Largest chunk in current data is ~6K chars (~1.5K tokens); a summary +// with no word cap for a 40+ tool toolkit with several secrets can land +// in the 2–3K output-token range. 8K keeps a safe margin without any +// meaningful cost or latency penalty on Sonnet 4.6. +const DEFAULT_EDITOR_MAX_TOKENS = 8192; + +const resolveEditorApiKey = ( + provider: LlmProvider, + explicit: string | undefined +): string | undefined => { + if (explicit) return explicit; + if (process.env.LLM_EDITOR_API_KEY) return process.env.LLM_EDITOR_API_KEY; + if (provider === "anthropic") { + return process.env.ANTHROPIC_API_KEY; + } + return process.env.OPENAI_API_KEY; +}; + +/** + * Resolve the editor model from CLI options, then from env vars in + * documented precedence order. Kept as a shared helper so the verbose + * log in each `generate` action displays the same model that + * resolveSecretEditGenerator will actually use. + */ +const resolveEditorModel = (options: { + llmEditorModel?: string; +}): string | undefined => + options.llmEditorModel ?? + process.env.LLM_EDITOR_MODEL ?? + process.env.ANTHROPIC_EDITOR_MODEL; + +/** + * Build an LLM secret-edit generator from CLI options + env. Returns + * undefined when the editor is disabled or unconfigured; callers fall back + * to scanner-only warnings in that case. + */ +const resolveSecretEditGenerator = ( + options: SecretEditorCliOptions, + verbose: boolean +): LlmSecretEditGenerator | undefined => { + if (options.skipSecretCoherence) { + return; + } + + const providerRaw = + options.llmEditorProvider ?? process.env.LLM_EDITOR_PROVIDER; + const model = resolveEditorModel(options); + + // Editor stays opt-in: both provider and model must be explicitly set. + if (!(providerRaw && model)) { + return; + } + + const provider = resolveLlmProvider(providerRaw); + const apiKey = resolveEditorApiKey(provider, options.llmEditorApiKey); + if (!apiKey) { + // Fail open: unconfigured editor degrades to scanner-only warnings + // instead of crashing the whole generation run. CI and local scripts + // often point at the same flag set and shouldn't break when the + // editor's API key is simply absent. + if (verbose) { + console.log( + chalk.yellow( + `⚠ Secret-coherence editor skipped: no API key found for provider ${provider}.` + ) + ); + } + return; + } + + const onRetry = verbose + ? (attempt: number, error: Error, delayMs: number) => { + console.log( + chalk.yellow( + ` ⚠️ Secret editor call failed (attempt ${attempt}), retrying in ${delayMs}ms: ${error.message}` + ) + ); + } + : undefined; + + const client = createLlmClient({ + provider, + config: { + apiKey, + ...(options.llmEditorBaseUrl + ? { baseUrl: options.llmEditorBaseUrl } + : {}), + retry: { + maxRetries: options.llmEditorMaxRetries ?? 3, + onRetry, + }, + }, + }); + + return new LlmSecretEditGenerator({ + client, + model, + ...(options.llmEditorTemperature !== undefined + ? { temperature: options.llmEditorTemperature } + : {}), + maxTokens: options.llmEditorMaxTokens ?? DEFAULT_EDITOR_MAX_TOKENS, + }); +}; + const resolveApiSource = (options: { apiSource?: string; toolMetadataUrl?: string; @@ -856,6 +975,39 @@ program "Path to a .txt file with toolkit IDs to skip during generation (one per line)" ) .option("--verbose", "Enable verbose logging", false) + .option( + "--llm-editor-provider ", + "Secret-coherence editor LLM provider (openai|anthropic). Defaults to LLM_EDITOR_PROVIDER env." + ) + .option( + "--llm-editor-model ", + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL or ANTHROPIC_EDITOR_MODEL env." + ) + .option( + "--llm-editor-api-key ", + "Secret-coherence editor API key. Falls back to LLM_EDITOR_API_KEY or the provider-specific env var." + ) + .option("--llm-editor-base-url ", "Secret-coherence editor LLM base URL") + .option( + "--llm-editor-temperature ", + "Secret-coherence editor temperature", + (value) => Number.parseFloat(value) + ) + .option( + "--llm-editor-max-tokens ", + "Secret-coherence editor max tokens (default: 8192)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--llm-editor-max-retries ", + "Secret-coherence editor max retry attempts (default: 3)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--skip-secret-coherence", + "Disable the secret-coherence scan and edit step", + false + ) .action( async (options: { providers?: string; @@ -897,6 +1049,14 @@ program excludeFile?: string; ignoreFile?: string; verbose: boolean; + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: legacy CLI flow }) => { const spinner = ora("Parsing input...").start(); @@ -1078,6 +1238,27 @@ program } toolkitSummaryGenerator = new LlmToolkitSummaryGenerator(llmConfig); } + + const secretEditGenerator = resolveSecretEditGenerator( + options, + options.verbose + ); + if (options.verbose) { + if (secretEditGenerator) { + console.log( + chalk.dim( + `Secret-coherence editor enabled (model: ${resolveEditorModel(options)})` + ) + ); + } else if (!options.skipSecretCoherence) { + console.log( + chalk.dim( + "Secret-coherence editor not configured; scanners will still emit warnings." + ) + ); + } + } + const previousOutputDir = options.forceRegenerate ? undefined : (options.previousOutput ?? @@ -1340,6 +1521,8 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence ? { skipSecretCoherence: true } : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1468,6 +1651,10 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence + ? { skipSecretCoherence: true } + : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1489,6 +1676,22 @@ program spinner.succeed( `Processed ${summary.completed} toolkit(s) with ${summary.totalTools} tools in ${summary.elapsed}` ); + + // Surface per-toolkit warnings to stdout so CI logs show what + // the merger saw. Without this, stale-secret / coverage / + // summary-generation warnings only land in the run log file + // on disk — which isn't visible in GitHub Actions output. + for (const mergeResult of allResults) { + if (mergeResult.warnings.length === 0) continue; + console.log( + chalk.yellow( + `⚠ ${mergeResult.toolkit.id}: ${mergeResult.warnings.length} warning(s)` + ) + ); + for (const warning of mergeResult.warnings) { + console.log(chalk.dim(` - ${warning}`)); + } + } } } else { const { providersToProcess } = filterProvidersBySkipIds( @@ -1812,6 +2015,39 @@ program "Path to a .txt file with toolkit IDs to skip during generation (one per line)" ) .option("--verbose", "Enable verbose logging", false) + .option( + "--llm-editor-provider ", + "Secret-coherence editor LLM provider (openai|anthropic). Defaults to LLM_EDITOR_PROVIDER env." + ) + .option( + "--llm-editor-model ", + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL or ANTHROPIC_EDITOR_MODEL env." + ) + .option( + "--llm-editor-api-key ", + "Secret-coherence editor API key. Falls back to LLM_EDITOR_API_KEY or the provider-specific env var." + ) + .option("--llm-editor-base-url ", "Secret-coherence editor LLM base URL") + .option( + "--llm-editor-temperature ", + "Secret-coherence editor temperature", + (value) => Number.parseFloat(value) + ) + .option( + "--llm-editor-max-tokens ", + "Secret-coherence editor max tokens (default: 8192)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--llm-editor-max-retries ", + "Secret-coherence editor max retry attempts (default: 3)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--skip-secret-coherence", + "Disable the secret-coherence scan and edit step", + false + ) .action( async (options: { output: string; @@ -1848,6 +2084,14 @@ program ignoreFile?: string; requireComplete: boolean; verbose: boolean; + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: legacy CLI flow }) => { const spinner = ora("Initializing...").start(); @@ -1929,6 +2173,27 @@ program } toolkitSummaryGenerator = new LlmToolkitSummaryGenerator(llmConfig); } + + const secretEditGenerator = resolveSecretEditGenerator( + options, + options.verbose + ); + if (options.verbose) { + if (secretEditGenerator) { + console.log( + chalk.dim( + `Secret-coherence editor enabled (model: ${resolveEditorModel(options)})` + ) + ); + } else if (!options.skipSecretCoherence) { + console.log( + chalk.dim( + "Secret-coherence editor not configured; scanners will still emit warnings." + ) + ); + } + } + const previousOutputDir = options.forceRegenerate ? undefined : (options.previousOutput ?? @@ -2127,6 +2392,10 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence + ? { skipSecretCoherence: true } + : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -2148,6 +2417,22 @@ program `Processed ${summary.completed} toolkit(s) with ${summary.totalTools} tools in ${summary.elapsed}` ); + // Surface per-toolkit warnings to stdout (stale-secret scan, + // coverage gaps, summary-gen failures) so CI logs show what + // the merger saw — otherwise they only land in the run log + // file on disk. + for (const mergeResult of results) { + if (mergeResult.warnings.length === 0) continue; + console.log( + chalk.yellow( + `⚠ ${mergeResult.toolkit.id}: ${mergeResult.warnings.length} warning(s)` + ) + ); + for (const warning of mergeResult.warnings) { + console.log(chalk.dim(` - ${warning}`)); + } + } + // Generate output (batch mode if not incremental) if (!useIncremental && results.length > 0) { spinner.start("Writing output files..."); diff --git a/toolkit-docs-generator/src/llm/index.ts b/toolkit-docs-generator/src/llm/index.ts index 162d4b879..ef58af37d 100644 --- a/toolkit-docs-generator/src/llm/index.ts +++ b/toolkit-docs-generator/src/llm/index.ts @@ -1,3 +1,4 @@ export * from "./client.js"; +export * from "./secret-edit-generator.js"; export * from "./tool-example-generator.js"; export * from "./toolkit-summary-generator.js"; diff --git a/toolkit-docs-generator/src/llm/secret-edit-generator.ts b/toolkit-docs-generator/src/llm/secret-edit-generator.ts new file mode 100644 index 000000000..ce8bc162e --- /dev/null +++ b/toolkit-docs-generator/src/llm/secret-edit-generator.ts @@ -0,0 +1,196 @@ +/** + * LLM editor for secret-coherence fixes. + * + * Unlike toolkit-summary-generator (which rewrites a summary from scratch), + * this editor is asked to preserve the source text as-is and only change the + * passages that mention a removed secret — or, for coverage-gap fixes, to + * minimally weave in missing secret information without re-styling the rest. + * + * Keeping edits local prevents the "oversimplification on rerun" behavior + * users observed when the regenerator reprocessed richer hand-refined text. + */ +import { + ARCADE_SECRETS_DASHBOARD_URL, + ARCADE_SECRETS_DOC_URL, +} from "../merger/secret-coherence.js"; +import type { LlmClient } from "./client.js"; + +export interface SecretEditGeneratorConfig { + readonly client: LlmClient; + readonly model: string; + readonly temperature?: number; + readonly maxTokens?: number; + readonly systemPrompt?: string; +} + +export interface SecretCleanupEditInput { + readonly kind: "summary" | "documentation_chunk"; + readonly content: string; + readonly removedSecrets: readonly string[]; + readonly currentSecrets: readonly string[]; + readonly toolkitLabel: string; +} + +export interface SecretCoverageEditInput { + readonly content: string; + readonly missingSecretNames: readonly string[]; + readonly currentSecrets: readonly string[]; + readonly toolkitLabel: string; + readonly requireConfigLink: boolean; +} + +export interface ISecretEditGenerator { + /** + * Edit the provided content to remove all references to `removedSecrets` + * while preserving every other sentence, bullet, table row, heading, and + * example unchanged. Returns the edited content. + */ + cleanupStaleReferences: (input: SecretCleanupEditInput) => Promise; + + /** + * Edit the provided summary to add any missing secret mentions (one + * short, factual line per missing secret) and, if required, a link to + * the Arcade config doc. Must not alter existing content, ordering, or + * voice; new lines append to or minimally extend the **Secrets** section. + */ + fillCoverageGaps: (input: SecretCoverageEditInput) => Promise; +} + +const DEFAULT_SYSTEM_PROMPT = + "You are a careful documentation editor for the Arcade MCP toolkit docs. " + + "You make the smallest possible change that satisfies the request. " + + "Never re-summarize, shorten unrelated content, rewrite headings, or " + + "reorder existing sections. Preserve markdown syntax, backticks, tables, " + + "and code exactly."; + +// Anchored to the start/end of the full string, with a required newline +// between the opening fence (optionally followed by `markdown`/`md`/`text` +// plus horizontal whitespace) and the captured content. A bare +// ```python / ```bash / ```json at the start means the LLM returned a +// code block that *is* the content — not a wrapper — so the pattern must +// not match and stripOptionalFence will return the text unchanged. +// Greedy capture extends to the last closing fence so inner fenced +// blocks survive. +const FENCE_PATTERN = + /^\s*```(?:markdown|md|text)?[ \t]*\r?\n([\s\S]*)\r?\n```\s*$/; + +const stripOptionalFence = (text: string): string => { + const match = text.match(FENCE_PATTERN); + if (match?.[1]) { + return match[1].trim(); + } + return text.trim(); +}; + +const formatList = (values: readonly string[]): string => + values.length > 0 ? values.join(", ") : "None"; + +const buildCleanupPrompt = (input: SecretCleanupEditInput): string => { + const removedList = formatList(input.removedSecrets); + const currentList = formatList(input.currentSecrets); + const artifact = + input.kind === "summary" + ? "toolkit summary (markdown prose, roughly one screen)" + : "toolkit documentation chunk (markdown, may contain callouts, tables, and code blocks)"; + + return [ + `You are editing an Arcade MCP toolkit ${artifact} for ${input.toolkitLabel}.`, + "", + `Secrets that were REMOVED from this toolkit and must no longer appear: ${removedList}.`, + `Secrets that are STILL present and must be preserved: ${currentList}.`, + "", + "Rules:", + "- Delete any sentence, bullet, table row, or note whose ONLY topic is a removed secret.", + "- If a sentence or bullet mentions a removed secret alongside other content, rewrite that single sentence as minimally as possible to drop the removed-secret reference; do not paraphrase unrelated parts.", + "- Do not add new information. Do not rewrite unrelated content. Do not change headings, ordering, tone, or code blocks.", + "- If a whole section (for example an `## Authentication` or a setup table) becomes redundant because its only content referred to the removed secrets, remove that section cleanly (including its heading).", + "- Keep the result valid markdown. Preserve surrounding blank lines.", + "", + "Return ONLY the edited content, with no commentary, no explanation, and no code fences around the whole document.", + "", + "Content:", + "<<<", + input.content, + ">>>", + ].join("\n"); +}; + +const buildCoveragePrompt = (input: SecretCoverageEditInput): string => { + const missingList = formatList(input.missingSecretNames); + const currentList = formatList(input.currentSecrets); + const linkInstruction = input.requireConfigLink + ? `- The **Secrets** section must include a link to the Arcade config docs. Use this exact URL: ${ARCADE_SECRETS_DOC_URL}. If a short mention of the Arcade Dashboard secrets page is useful, ${ARCADE_SECRETS_DASHBOARD_URL} is acceptable as an additional reference.` + : "- Do not add any new links."; + + return [ + `You are editing an Arcade MCP toolkit summary for ${input.toolkitLabel}.`, + "", + `Secrets currently required by the toolkit: ${currentList}.`, + `Secrets missing from the summary that MUST be added: ${missingList}.`, + "", + "Rules:", + "- Ensure every current secret is mentioned by its exact name (inside backticks).", + "- For each missing secret, add a factual explanation of what it is and how a developer obtains it from the provider. Use as much detail as the secret actually needs — a short URL override may be a single line; a scoped API key may need several sentences naming the provider dashboard page, the required scopes or permissions, and any account tier constraints.", + "- When possible include an inline markdown link to the provider's own documentation page that tells the reader how to create or retrieve that specific secret. If you do not know the provider's docs URL, omit the link rather than inventing one.", + "- Prefer appending to or lightly extending an existing `**Secrets**` section. Only create a `**Secrets**` section if none exists.", + "- Do not rewrite unrelated content. Do not change headings, ordering, tone, or other sections.", + linkInstruction, + "- Keep phrasing factual, developer-focused, and free of marketing copy.", + "", + "Return ONLY the edited summary, with no commentary, no explanation, and no code fences around the whole document.", + "", + "Summary:", + "<<<", + input.content, + ">>>", + ].join("\n"); +}; + +export class LlmSecretEditGenerator implements ISecretEditGenerator { + private readonly client: LlmClient; + private readonly model: string; + private readonly temperature: number | undefined; + private readonly maxTokens: number | undefined; + private readonly systemPrompt: string; + + constructor(config: SecretEditGeneratorConfig) { + this.client = config.client; + this.model = config.model; + this.temperature = config.temperature; + this.maxTokens = config.maxTokens; + this.systemPrompt = config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT; + } + + private async generate(prompt: string): Promise { + const response = await this.client.generateText({ + model: this.model, + prompt, + system: this.systemPrompt, + ...(this.temperature !== undefined + ? { temperature: this.temperature } + : {}), + ...(this.maxTokens !== undefined ? { maxTokens: this.maxTokens } : {}), + }); + const trimmed = stripOptionalFence(response); + if (trimmed.length === 0) { + throw new Error("Secret edit LLM response was empty"); + } + return trimmed; + } + + cleanupStaleReferences(input: SecretCleanupEditInput): Promise { + if (input.removedSecrets.length === 0) { + return Promise.resolve(input.content); + } + return this.generate(buildCleanupPrompt(input)); + } + + fillCoverageGaps(input: SecretCoverageEditInput): Promise { + const hasMissing = input.missingSecretNames.length > 0; + const needsLink = input.requireConfigLink; + if (!(hasMissing || needsLink)) { + return Promise.resolve(input.content); + } + return this.generate(buildCoveragePrompt(input)); + } +} diff --git a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts index d2739a137..892fc7367 100644 --- a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts +++ b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts @@ -1,4 +1,9 @@ import type { ToolkitSummaryGenerator } from "../merger/data-merger.js"; +import { + ARCADE_AUTH_PROVIDERS_BASE_URL, + ARCADE_SECRETS_DASHBOARD_URL, + ARCADE_SECRETS_DOC_URL, +} from "../merger/secret-coherence.js"; import type { MergedTool, MergedToolkit, SecretType } from "../types/index.js"; import type { LlmClient } from "./client.js"; @@ -31,13 +36,12 @@ const formatAuth = (toolkit: MergedToolkit): string => { return "none"; } - const scopes = - toolkit.auth.allScopes.length > 0 - ? toolkit.auth.allScopes.join(", ") - : "None"; const provider = toolkit.auth.providerId ?? "unknown"; - return `${toolkit.auth.type}; provider: ${provider}; scopes: ${scopes}`; + // Scopes are intentionally omitted from the prompt: the summary should + // not re-list them — it points readers at the per-provider Arcade docs + // page where scopes live and stay in sync with the source of truth. + return `${toolkit.auth.type}; provider: ${provider}`; }; const collectSecrets = (tools: MergedTool[]) => { @@ -61,26 +65,29 @@ const collectSecrets = (tools: MergedTool[]) => { const buildPrompt = (toolkit: MergedToolkit): string => { const secrets = collectSecrets(toolkit.tools); + const hasSecrets = secrets.names.length > 0; return [ - "Write a concise summary for Arcade toolkit docs.", + "Write a summary for Arcade toolkit docs.", 'Return JSON: {"summary": ""}', "", + "Goals: compact but complete. No fixed word limit — use as many words as needed to cover every current capability and every current secret, and no more. Prefer scannable structure over prose padding.", + "", "Requirements:", - "- 60 to 140 words.", "- Start with 1 to 2 sentences that explain the provider and what the toolkit enables.", - "- Add a **Capabilities** section with 3 to 5 bullet points.", - "- Do not list tools one by one. Summarize shared capabilities.", - "- If auth type is oauth2 or mixed, add an **OAuth** section with provider and scopes.", - "- If auth type is api_key or mixed, mention API key usage in **OAuth**.", - "- If any secrets exist, add a **Secrets** section describing secret types and examples.", - "- Use Markdown. Keep it concise and developer-focused.", + "- Add a **Capabilities** section with 3 to 6 bullets summarizing shared capabilities (group tools by theme; do not list tools one by one).", + `- If auth type is oauth2 or mixed, add an **OAuth** section that names the provider and links to the Arcade provider docs at ${ARCADE_AUTH_PROVIDERS_BASE_URL}/ (use the OAuth provider ID supplied in the Auth line below as the slug). Do NOT list scopes — the provider page already documents them and repeating scopes here drifts.`, + "- If auth type is api_key or mixed, mention API key usage under **OAuth** or a dedicated heading.", + `- If any secrets exist, add a **Secrets** section. List every secret by its exact name in backticks. For each secret, give a factual explanation of what it is and how a developer obtains it from the provider — use as much detail as the secret actually needs (a short URL override may be one line; a scoped API key may need several sentences naming the provider dashboard page, required scopes/permissions, and any account tier). When possible include an inline markdown link to the provider's own documentation page that tells the reader how to create/retrieve that specific secret. If you do not know the provider's docs URL, omit the link rather than inventing one. End the section with the Arcade config docs link: ${ARCADE_SECRETS_DOC_URL} (and optionally mention ${ARCADE_SECRETS_DASHBOARD_URL}).`, + "- Use Markdown. Developer-focused. Say 'Arcade' (never 'Arcade AI').", + "- Do not add marketing copy, repetition, or filler.", "", `Toolkit: ${toolkit.label} (${toolkit.id})`, `Description: ${toolkit.description ?? "No description"}`, `Auth: ${formatAuth(toolkit)}`, + `Secrets required: ${hasSecrets ? "Yes" : "None"}`, `Secret types: ${secrets.types.length > 0 ? secrets.types.join(", ") : "None"}`, - `Secret names: ${secrets.names.length > 0 ? secrets.names.join(", ") : "None"}`, + `Secret names: ${hasSecrets ? secrets.names.join(", ") : "None"}`, `Tools (${toolkit.tools.length}):`, formatToolLines(toolkit.tools), ].join("\n"); diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index d3cf02ed5..cefc08dde 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -5,6 +5,7 @@ * into the final MergedToolkit format. */ +import type { ISecretEditGenerator } from "../llm/secret-edit-generator.js"; import type { ICustomSectionsSource } from "../sources/interfaces.js"; import type { IToolkitDataSource, @@ -28,6 +29,13 @@ import { detectMetadataChanges, formatFreshnessWarnings, } from "./metadata-freshness.js"; +import { + detectSecretCoherenceIssues, + groupStaleRefsByTarget, + hasCoherenceIssues, + type SecretCoherenceIssues, + type StaleSecretEditTarget, +} from "./secret-coherence.js"; // ============================================================================ // Merger Configuration @@ -38,6 +46,18 @@ export interface DataMergerConfig { customSectionsSource: ICustomSectionsSource; toolExampleGenerator?: ToolExampleGenerator; toolkitSummaryGenerator?: ToolkitSummaryGenerator; + /** + * Optional editor used to repair stale secret references and fill + * coverage gaps in summary / documentation chunks. When omitted the + * scanners still run and emit warnings, but no content is rewritten. + */ + secretEditGenerator?: ISecretEditGenerator; + /** + * When true, the secret-coherence step is disabled entirely — neither + * the scan nor the LLM edit runs, and no warnings are emitted. Wired + * from the CLI's `--skip-secret-coherence` flag. + */ + skipSecretCoherence?: boolean; previousToolkits?: ReadonlyMap; /** Maximum concurrent LLM calls for tool examples (default: 5) */ llmConcurrency?: number; @@ -467,6 +487,79 @@ const getToolDocumentationChunks = ( return fromPrevious; }; +const collectCurrentSecretNames = (toolkit: MergedToolkit): Set => { + const names = new Set(); + for (const tool of toolkit.tools) { + for (const name of tool.secrets) { + names.add(name); + } + for (const info of tool.secretsInfo ?? []) { + names.add(info.name); + } + } + return names; +}; + +const describeLocation = ( + location: + | { kind: "summary" } + | { kind: "toolkit_chunk"; chunkIndex: number } + | { + kind: "tool_chunk"; + toolQualifiedName: string; + chunkIndex: number; + } +): string => { + switch (location.kind) { + case "summary": + return "summary"; + case "toolkit_chunk": + return `toolkit documentation chunk #${location.chunkIndex}`; + case "tool_chunk": + return `tool chunk #${location.chunkIndex} of ${location.toolQualifiedName}`; + default: + return "unknown location"; + } +}; + +const applyEditedContent = ( + toolkit: MergedToolkit, + target: StaleSecretEditTarget, + edited: string +): void => { + switch (target.kind) { + case "summary": + toolkit.summary = edited; + return; + case "toolkit_chunk": { + const chunk = toolkit.documentationChunks[target.chunkIndex]; + if (chunk) { + toolkit.documentationChunks[target.chunkIndex] = { + ...chunk, + content: edited, + }; + } + return; + } + case "tool_chunk": { + const tool = toolkit.tools.find( + (candidate) => candidate.qualifiedName === target.toolQualifiedName + ); + if (!tool) return; + const chunk = tool.documentationChunks[target.chunkIndex]; + if (chunk) { + tool.documentationChunks[target.chunkIndex] = { + ...chunk, + content: edited, + }; + } + return; + } + default: + return; + } +}; + /** * Mark the toolkit's summary as stale — the summary is being carried forward * from a previous run even though the toolkit signature changed (regen was @@ -850,6 +943,8 @@ export class DataMerger { private readonly customSectionsSource: ICustomSectionsSource; private readonly toolExampleGenerator: ToolExampleGenerator | undefined; private readonly toolkitSummaryGenerator: ToolkitSummaryGenerator | undefined; + private readonly secretEditGenerator: ISecretEditGenerator | undefined; + private readonly skipSecretCoherence: boolean; private readonly previousToolkits: | ReadonlyMap | undefined; @@ -876,6 +971,8 @@ export class DataMerger { this.customSectionsSource = config.customSectionsSource; this.toolExampleGenerator = config.toolExampleGenerator; this.toolkitSummaryGenerator = config.toolkitSummaryGenerator; + this.secretEditGenerator = config.secretEditGenerator; + this.skipSecretCoherence = config.skipSecretCoherence ?? false; this.previousToolkits = config.previousToolkits; this.llmConcurrency = config.llmConcurrency ?? 10; this.toolkitConcurrency = config.toolkitConcurrency ?? 5; @@ -954,6 +1051,7 @@ export class DataMerger { } ); await this.maybeGenerateSummary(result, previousToolkit); + await this.enforceSecretCoherence(result, previousToolkit); // Write immediately if callback provided (incremental mode) if (this.onToolkitComplete) { @@ -1047,6 +1145,138 @@ export class DataMerger { } } + private async enforceSecretCoherence( + result: MergeResult, + previousToolkit?: MergedToolkit + ): Promise { + if (this.skipSecretCoherence) { + // --skip-secret-coherence disables the entire step: no scan, no + // warnings, no edits. Callers who want warnings without edits + // should leave the flag off and simply not configure a + // secretEditGenerator. + return; + } + const issues = detectSecretCoherenceIssues(result.toolkit, previousToolkit); + if (!hasCoherenceIssues(issues)) { + return; + } + + this.appendCoherenceWarnings(result, issues); + + if (!this.secretEditGenerator) { + return; + } + + // Order matters: stale cleanup runs first, then coverage gaps are + // re-detected against the edited summary. If cleanup accidentally + // dropped a passage that incidentally mentioned a current secret, + // the fresh scan notices and the editor restores it. + await this.applyStaleRefCleanup(result, issues); + const postCleanupIssues = detectSecretCoherenceIssues( + result.toolkit, + previousToolkit + ); + await this.applyCoverageFill(result, postCleanupIssues); + } + + private appendCoherenceWarnings( + result: MergeResult, + issues: SecretCoherenceIssues + ): void { + for (const stale of issues.staleReferences) { + const where = describeLocation(stale.location); + result.warnings.push( + `Stale secret reference in ${where}: ${stale.removedSecret} (removed from toolkit ${result.toolkit.id})` + ); + } + for (const gap of issues.coverageGaps) { + if (gap.kind === "missing_secret_in_summary") { + result.warnings.push( + `Summary does not mention current secret: ${gap.secretName} (toolkit ${result.toolkit.id})` + ); + } else { + result.warnings.push( + `Summary is missing a link to the Arcade secret config docs (toolkit ${result.toolkit.id})` + ); + } + } + } + + private async applyStaleRefCleanup( + result: MergeResult, + issues: SecretCoherenceIssues + ): Promise { + const editor = this.secretEditGenerator; + if (!editor) { + return; + } + const targets = groupStaleRefsByTarget(issues.staleReferences); + if (targets.length === 0) { + return; + } + const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + .sort() + .map((name) => name); + for (const target of targets) { + try { + const edited = await editor.cleanupStaleReferences({ + kind: target.kind === "summary" ? "summary" : "documentation_chunk", + content: target.content, + removedSecrets: target.removedSecrets, + currentSecrets, + toolkitLabel: result.toolkit.label, + }); + applyEditedContent(result.toolkit, target, edited); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + result.warnings.push( + `Secret cleanup edit failed for ${result.toolkit.id} (${target.kind}): ${message}` + ); + } + } + } + + private async applyCoverageFill( + result: MergeResult, + issues: SecretCoherenceIssues + ): Promise { + const editor = this.secretEditGenerator; + if (!editor) { + return; + } + const summary = result.toolkit.summary; + if (!summary) { + return; + } + const missing = issues.coverageGaps + .filter((gap) => gap.kind === "missing_secret_in_summary") + .map((gap) => gap.secretName as string); + const needsLink = issues.coverageGaps.some( + (gap) => gap.kind === "missing_secret_config_link" + ); + if (missing.length === 0 && !needsLink) { + return; + } + const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + .sort() + .map((name) => name); + try { + const edited = await editor.fillCoverageGaps({ + content: summary, + missingSecretNames: missing, + currentSecrets, + toolkitLabel: result.toolkit.label, + requireConfigLink: needsLink, + }); + result.toolkit.summary = edited; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + result.warnings.push( + `Secret coverage edit failed for ${result.toolkit.id}: ${message}` + ); + } + } + /** * Merge data for a single toolkit */ @@ -1079,6 +1309,7 @@ export class DataMerger { } ); await this.maybeGenerateSummary(result, previousToolkit); + await this.enforceSecretCoherence(result, previousToolkit); return result; } diff --git a/toolkit-docs-generator/src/merger/secret-coherence.ts b/toolkit-docs-generator/src/merger/secret-coherence.ts new file mode 100644 index 000000000..5b991a799 --- /dev/null +++ b/toolkit-docs-generator/src/merger/secret-coherence.ts @@ -0,0 +1,323 @@ +/** + * Secret coherence scanners + * + * Deterministic checks that keep the rendered toolkit docs consistent with + * the toolkit's current secret set. Two classes of problem: + * + * - Stale reference: a secret name appears in summary or documentation + * chunks but is no longer declared by any tool in the toolkit (typically + * because the tool that required it was removed upstream). + * - Coverage gap: a current secret is not documented in the summary, or + * secrets are mentioned without a link to the Arcade config docs. + * + * These scanners return structured issues. Remediation (LLM-driven edits or + * warnings) is performed by callers in the merger pipeline. + */ +import type { DocumentationChunk, MergedToolkit } from "../types/index.js"; + +export const ARCADE_SECRETS_DOC_URL = + "https://docs.arcade.dev/en/guides/create-tools/tool-basics/create-tool-secrets"; +export const ARCADE_SECRETS_DASHBOARD_URL = + "https://api.arcade.dev/dashboard/auth/secrets"; + +/** + * Base URL for Arcade's per-provider OAuth docs. Specific provider pages + * live at `${base}/` — e.g. `/github`, `/google`, `/atlassian`. + * Used by the summary prompt so OAuth sections can link out instead of + * repeating scope lists that would drift from the provider page. + */ +export const ARCADE_AUTH_PROVIDERS_BASE_URL = + "https://docs.arcade.dev/en/references/auth-providers"; + +const SECRET_REFERENCE_URLS: readonly string[] = [ + ARCADE_SECRETS_DOC_URL, + ARCADE_SECRETS_DASHBOARD_URL, + // Tolerate language-path variants or bare domains that still link somewhere + // useful on docs.arcade.dev/dashboard. + "docs.arcade.dev/en/guides/create-tools/tool-basics/create-tool-secrets", + "arcade.dev/dashboard/auth/secrets", +]; + +export type StaleSecretLocation = + | { kind: "summary" } + | { kind: "toolkit_chunk"; chunkIndex: number } + | { kind: "tool_chunk"; toolQualifiedName: string; chunkIndex: number }; + +export type StaleSecretReference = { + removedSecret: string; + location: StaleSecretLocation; + content: string; +}; + +export type SecretCoverageGap = + | { kind: "missing_secret_in_summary"; secretName: string } + | { kind: "missing_secret_config_link" }; + +export type SecretCoherenceIssues = { + staleReferences: StaleSecretReference[]; + coverageGaps: SecretCoverageGap[]; +}; + +const collectToolkitSecrets = (toolkit: MergedToolkit): Set => { + const names = new Set(); + for (const tool of toolkit.tools) { + for (const secret of tool.secrets) { + names.add(secret); + } + for (const info of tool.secretsInfo ?? []) { + names.add(info.name); + } + } + return names; +}; + +const toolkitChunks = (toolkit: MergedToolkit): readonly DocumentationChunk[] => + toolkit.documentationChunks ?? []; + +/** + * Exact-substring test. Secret names are ALLCAPS_WITH_UNDER and distinctive + * enough that we don't need word-boundary regex; a plain substring match + * avoids false negatives around punctuation, backticks, and table pipes. + */ +const contentMentionsSecret = (content: string, secret: string): boolean => + content.includes(secret); + +const findSummaryStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const summary = toolkit.summary; + if (!summary) { + return []; + } + + const refs: StaleSecretReference[] = []; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(summary, removedSecret)) { + refs.push({ + removedSecret, + location: { kind: "summary" }, + content: summary, + }); + } + } + return refs; +}; + +const findToolkitChunkStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const refs: StaleSecretReference[] = []; + const chunks = toolkitChunks(toolkit); + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) { + const chunk = chunks[chunkIndex]; + if (!chunk) continue; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(chunk.content, removedSecret)) { + refs.push({ + removedSecret, + location: { kind: "toolkit_chunk", chunkIndex }, + content: chunk.content, + }); + } + } + } + return refs; +}; + +const findToolChunkStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const refs: StaleSecretReference[] = []; + for (const tool of toolkit.tools) { + const chunks = tool.documentationChunks ?? []; + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) { + const chunk = chunks[chunkIndex]; + if (!chunk) continue; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(chunk.content, removedSecret)) { + refs.push({ + removedSecret, + location: { + kind: "tool_chunk", + toolQualifiedName: tool.qualifiedName, + chunkIndex, + }, + content: chunk.content, + }); + } + } + } + } + return refs; +}; + +/** + * Compare current toolkit secrets against the previous snapshot to identify + * secrets that were removed, then scan summary and documentation chunks for + * any lingering references. Returns at most one entry per + * (location, removedSecret) pair so callers can drive an LLM edit for each. + */ +export const detectStaleSecretReferences = ( + toolkit: MergedToolkit, + previousToolkit?: MergedToolkit +): StaleSecretReference[] => { + if (!previousToolkit) { + return []; + } + + const currentSecrets = collectToolkitSecrets(toolkit); + const previousSecrets = collectToolkitSecrets(previousToolkit); + const removedSecrets: string[] = []; + for (const name of previousSecrets) { + if (!currentSecrets.has(name)) { + removedSecrets.push(name); + } + } + + if (removedSecrets.length === 0) { + return []; + } + + return [ + ...findSummaryStaleRefs(toolkit, removedSecrets), + ...findToolkitChunkStaleRefs(toolkit, removedSecrets), + ...findToolChunkStaleRefs(toolkit, removedSecrets), + ]; +}; + +const summaryHasConfigLink = (summary: string): boolean => + SECRET_REFERENCE_URLS.some((url) => summary.includes(url)); + +/** + * Gaps in the summary's coverage of the toolkit's current secrets: + * - Any current secret name that is not mentioned by exact substring. + * - If at least one secret exists, a missing link to the Arcade config docs. + * + * Only runs when a summary is present; toolkits without a summary are a + * separate concern handled by the summary generator itself. + */ +export const detectSecretCoverageGaps = ( + toolkit: MergedToolkit +): SecretCoverageGap[] => { + const summary = toolkit.summary; + if (!summary) { + return []; + } + + const currentSecrets = collectToolkitSecrets(toolkit); + if (currentSecrets.size === 0) { + return []; + } + + const gaps: SecretCoverageGap[] = []; + for (const secretName of currentSecrets) { + if (!summary.includes(secretName)) { + gaps.push({ kind: "missing_secret_in_summary", secretName }); + } + } + if (!summaryHasConfigLink(summary)) { + gaps.push({ kind: "missing_secret_config_link" }); + } + return gaps; +}; + +export const detectSecretCoherenceIssues = ( + toolkit: MergedToolkit, + previousToolkit?: MergedToolkit +): SecretCoherenceIssues => ({ + staleReferences: detectStaleSecretReferences(toolkit, previousToolkit), + coverageGaps: detectSecretCoverageGaps(toolkit), +}); + +export const hasCoherenceIssues = (issues: SecretCoherenceIssues): boolean => + issues.staleReferences.length > 0 || issues.coverageGaps.length > 0; + +/** + * Group stale references by the artifact they live in so an LLM editor can + * be asked to edit each artifact exactly once, even when multiple removed + * secrets appear in the same chunk or summary. + */ +export type StaleSecretEditTarget = + | { kind: "summary"; removedSecrets: string[]; content: string } + | { + kind: "toolkit_chunk"; + chunkIndex: number; + removedSecrets: string[]; + content: string; + } + | { + kind: "tool_chunk"; + toolQualifiedName: string; + chunkIndex: number; + removedSecrets: string[]; + content: string; + }; + +const locationKey = (ref: StaleSecretReference): string => { + const location = ref.location; + switch (location.kind) { + case "summary": + return "summary"; + case "toolkit_chunk": + return `toolkit_chunk:${location.chunkIndex}`; + case "tool_chunk": + return `tool_chunk:${location.toolQualifiedName}:${location.chunkIndex}`; + default: + return ""; + } +}; + +export const groupStaleRefsByTarget = ( + refs: readonly StaleSecretReference[] +): StaleSecretEditTarget[] => { + const byKey = new Map< + string, + { ref: StaleSecretReference; removedSecrets: Set } + >(); + + for (const ref of refs) { + const key = locationKey(ref); + const existing = byKey.get(key); + if (existing) { + existing.removedSecrets.add(ref.removedSecret); + } else { + byKey.set(key, { + ref, + removedSecrets: new Set([ref.removedSecret]), + }); + } + } + + return Array.from(byKey.values()).map(({ ref, removedSecrets }) => { + const location = ref.location; + const sortedRemovedSecrets = Array.from(removedSecrets).sort(); + switch (location.kind) { + case "summary": + return { + kind: "summary", + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + case "toolkit_chunk": + return { + kind: "toolkit_chunk", + chunkIndex: location.chunkIndex, + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + case "tool_chunk": + return { + kind: "tool_chunk", + toolQualifiedName: location.toolQualifiedName, + chunkIndex: location.chunkIndex, + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + default: + throw new Error("Unknown stale secret location kind"); + } + }); +}; diff --git a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts new file mode 100644 index 000000000..c8f341d3d --- /dev/null +++ b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts @@ -0,0 +1,175 @@ +import { describe, expect, it, vi } from "vitest"; +import type { LlmClient } from "../../src/llm/client.js"; +import { LlmSecretEditGenerator } from "../../src/llm/secret-edit-generator.js"; + +const fakeClient = (response: string): LlmClient => ({ + provider: "anthropic", + generateText: vi.fn(async () => response), +}); + +describe("LlmSecretEditGenerator.cleanupStaleReferences", () => { + it("skips the LLM call when no secrets were removed", async () => { + const client = fakeClient("unused"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "summary", + content: "original", + removedSecrets: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe("original"); + expect(client.generateText).not.toHaveBeenCalled(); + }); + + it("strips an optional code fence from the response", async () => { + const client = fakeClient("```markdown\nedited content\n```"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "summary", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe("edited content"); + }); + + it("preserves inner fenced code blocks when unwrapping the outer fence", async () => { + // The LLM may wrap an edited documentation chunk (which itself + // contains a fenced code block) inside an outer markdown fence. A + // non-greedy fence regex would stop at the first inner ``` and + // silently truncate the rest of the content. + const wrappedEdit = [ + "```markdown", + "Setup steps:", + "", + "```python", + "arcade.run(tool='Github.CreateIssue')", + "```", + "", + "Further notes follow.", + "```", + ].join("\n"); + const client = fakeClient(wrappedEdit); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toContain("```python"); + expect(out).toContain("Further notes follow."); + }); + + it("leaves non-wrapped content untouched (no outer fence)", async () => { + // Response has inner fenced blocks but no outer fence — must pass + // through verbatim, not partially matched. + const response = "No wrapper.\n\n```js\nconsole.log(1);\n```\nTail text."; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + + it("does not strip a non-markdown language fence (e.g. ```python)", async () => { + // A documentation chunk that IS a code block must survive the fence + // strip. Only plain ``` or ```markdown/md/text qualifies as a + // wrapper; a ```python block is the content itself. + const response = "```python\nimport arcade\narcade.run()\n```"; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + + it("does not strip a bash fence either", async () => { + const response = "```bash\narcade deploy --env prod\n```"; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + + it("passes the removed and current secrets into the prompt", async () => { + const client = fakeClient("ok"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "Mentions OLD_TOKEN in a row.", + removedSecrets: ["OLD_TOKEN"], + currentSecrets: ["KEEP_URL"], + toolkitLabel: "GitHub", + }); + expect(client.generateText).toHaveBeenCalledTimes(1); + const call = (client.generateText as ReturnType).mock + .calls[0]?.[0] as { prompt: string }; + expect(call.prompt).toContain("OLD_TOKEN"); + expect(call.prompt).toContain("KEEP_URL"); + expect(call.prompt).toContain("GitHub"); + }); + + it("throws when the LLM response is empty", async () => { + const client = fakeClient(""); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + await expect( + editor.cleanupStaleReferences({ + kind: "summary", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: [], + toolkitLabel: "GitHub", + }) + ).rejects.toThrow(/empty/i); + }); +}); + +describe("LlmSecretEditGenerator.fillCoverageGaps", () => { + it("skips the LLM call when nothing is missing and no link is required", async () => { + const client = fakeClient("unused"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.fillCoverageGaps({ + content: "original", + missingSecretNames: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + requireConfigLink: false, + }); + expect(out).toBe("original"); + expect(client.generateText).not.toHaveBeenCalled(); + }); + + it("calls the LLM when a config link is required", async () => { + const client = fakeClient("edited"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.fillCoverageGaps({ + content: "Summary without a link.", + missingSecretNames: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + requireConfigLink: true, + }); + expect(out).toBe("edited"); + expect(client.generateText).toHaveBeenCalledTimes(1); + }); +}); diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index 71155f0a6..7a1bacfc2 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -4,7 +4,8 @@ * These tests use in-memory implementations (NOT mocks) to verify * the merge logic works correctly. */ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import type { ISecretEditGenerator } from "../../src/llm/secret-edit-generator.js"; import { computeAllScopes, DataMerger, @@ -1185,219 +1186,287 @@ describe("DataMerger", () => { expect(result.toolkit.summary).toBeUndefined(); }); - it("preserves previous summary when no LLM generator is available and the signature changed", async () => { + it("runs the secret-coherence editor when a removed secret still appears in a toolkit documentation chunk", async () => { + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + toolSource: new InMemoryToolDataSource([toolWithSecret]), metadataSource: new InMemoryMetadataSource([githubMetadata]), }); - const previousResult = await mergeToolkit( + + const previousWithOldSecret = await mergeToolkit( "Github", - [githubTool1], + [ + createTool({ + ...toolWithSecret, + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], githubMetadata, null, createStubGenerator() ); - previousResult.toolkit.summary = "Hand-authored summary"; - - const merger = new DataMerger({ - toolkitDataSource, - customSectionsSource: new EmptyCustomSectionsSource(), - toolExampleGenerator: createStubGenerator(), - previousToolkits: new Map([["github", previousResult.toolkit]]), - }); - - const result = await merger.mergeToolkit("Github"); + // Put the stale reference inside a toolkit-level doc chunk — chunks + // persist verbatim across runs, unlike the summary which gets + // regenerated when the signature changes. + previousWithOldSecret.toolkit.documentationChunks = [ + { + type: "section", + location: "before_available_tools", + position: "after", + content: + "| Secret | Required For |\n| `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` | Notifications |", + }, + ]; - expect(result.toolkit.tools).toHaveLength(2); - expect(result.toolkit.summary).toBe("Hand-authored summary"); - expect(result.toolkit.summaryStale).toBe(true); - expect(result.toolkit.summaryStaleReason).toBe( - "llm_generator_unavailable" + const cleanupSpy = vi.fn( + async () => + "| Secret | Required For |\n| `GITHUB_SERVER_URL` | All tools |" ); - expect( - result.warnings.some((warning) => - warning.includes("Summary is stale for Github") - ) - ).toBe(true); - }); - - it("preserves previous summary when the LLM generator throws", async () => { - const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), - metadataSource: new InMemoryMetadataSource([githubMetadata]), - }); - const previousResult = await mergeToolkit( - "Github", - [githubTool1], - githubMetadata, - null, - createStubGenerator() + const coverageSpy = vi.fn( + async (input: { content: string }) => + `${input.content}\n\n[config link]` ); - previousResult.toolkit.summary = "Hand-authored summary"; - - const failingSummary: ToolkitSummaryGenerator = { - generate: async () => { - throw new Error("rate limited"); - }, + const secretEditGenerator: ISecretEditGenerator = { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, }; const merger = new DataMerger({ toolkitDataSource, customSectionsSource: new EmptyCustomSectionsSource(), toolExampleGenerator: createStubGenerator(), - toolkitSummaryGenerator: failingSummary, - previousToolkits: new Map([["github", previousResult.toolkit]]), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + secretEditGenerator, + previousToolkits: new Map([["github", previousWithOldSecret.toolkit]]), }); const result = await merger.mergeToolkit("Github"); - expect(result.toolkit.summary).toBe("Hand-authored summary"); + expect(cleanupSpy).toHaveBeenCalledTimes(1); + const cleanupCall = cleanupSpy.mock.calls[0]?.[0] as { + removedSecrets: string[]; + kind: string; + }; + expect(cleanupCall.removedSecrets).toEqual([ + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ]); + expect(cleanupCall.kind).toBe("documentation_chunk"); + // The chunk content in the result reflects the editor output. + expect( + result.toolkit.documentationChunks[0]?.content.includes( + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN" + ) + ).toBe(false); + expect(result.toolkit.documentationChunks[0]?.content).toContain( + "GITHUB_SERVER_URL" + ); expect( result.warnings.some((warning) => - warning.includes("Summary generation failed for Github") + warning.includes("Stale secret reference") ) ).toBe(true); - expect(result.toolkit.summaryStale).toBe(true); - expect(result.toolkit.summaryStaleReason).toBe("llm_generation_failed"); }); - it("clears the stale flag when the generator succeeds on the next run", async () => { - // A toolkit whose summary was stale on a prior run should come back - // clean once the generator actually produces a new summary. This - // proves the CI gate will stop flagging the toolkit once fixed. - const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), - metadataSource: new InMemoryMetadataSource([githubMetadata]), - }); - const previousResult = await mergeToolkit( - "Github", - [githubTool1], - githubMetadata, - null, - createStubGenerator() - ); - previousResult.toolkit.summary = "Older summary"; - previousResult.toolkit.summaryStale = true; - previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; - - const merger = new DataMerger({ - toolkitDataSource, - customSectionsSource: new EmptyCustomSectionsSource(), - toolExampleGenerator: createStubGenerator(), - toolkitSummaryGenerator: createStubSummaryGenerator("Fresh summary"), - previousToolkits: new Map([["github", previousResult.toolkit]]), + it("passes the post-cleanup summary to the coverage editor, not the original", async () => { + // Ordering guarantee: applyStaleRefCleanup runs before the coverage + // scan is re-computed. We prove this by making cleanup mutate a + // chunk (unrelated to summary), then verifying the coverage editor + // receives the current-summary content rather than a pre-cleanup + // snapshot. This also demonstrates that re-detection uses the + // updated toolkit state. + const toolWithSecrets = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], }); - - const result = await merger.mergeToolkit("Github"); - - expect(result.toolkit.summary).toBe("Fresh summary (Github)"); - expect(result.toolkit.summaryStale).toBeUndefined(); - expect(result.toolkit.summaryStaleReason).toBeUndefined(); - }); - - it("does not flag stale when the signature matches a fresh previous summary", async () => { - // Baseline: if previous.summaryStale is falsy, signature match is a - // valid proof of freshness and the reuse path should keep the - // summary and stay clean. const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1]), + toolSource: new InMemoryToolDataSource([toolWithSecrets]), metadataSource: new InMemoryMetadataSource([githubMetadata]), }); - const previousResult = await mergeToolkit( + const previous = await mergeToolkit( "Github", - [githubTool1], + [ + createTool({ + ...toolWithSecrets, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], githubMetadata, null, createStubGenerator() ); - previousResult.toolkit.summary = "Cached summary"; + previous.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Legacy note about OLD_SECRET.", + }, + ]; - const countingSummary = createCountingSummaryGenerator(); + const cleanupSpy = vi.fn(async () => "Edited chunk."); + const coverageSpy = vi.fn( + async (input: { content: string }) => `${input.content} [link]` + ); + const secretEditGenerator: ISecretEditGenerator = { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, + }; const merger = new DataMerger({ toolkitDataSource, customSectionsSource: new EmptyCustomSectionsSource(), toolExampleGenerator: createStubGenerator(), - toolkitSummaryGenerator: countingSummary.generator, - previousToolkits: new Map([["github", previousResult.toolkit]]), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + secretEditGenerator, + previousToolkits: new Map([["github", previous.toolkit]]), }); const result = await merger.mergeToolkit("Github"); - expect(result.toolkit.summary).toBe("Cached summary"); - expect(result.toolkit.summaryStale).toBeUndefined(); - expect(result.toolkit.summaryStaleReason).toBeUndefined(); - expect(countingSummary.getCalls()).toBe(0); + expect(cleanupSpy).toHaveBeenCalledTimes(1); + // Coverage editor runs because the stub summary does not mention + // GITHUB_SERVER_URL. It must see the current summary state — post- + // cleanup — not any snapshot taken before cleanup. + expect(coverageSpy).toHaveBeenCalledTimes(1); + const coverageCall = coverageSpy.mock.calls[0]?.[0] as { + content: string; + missingSecretNames: string[]; + }; + // The content passed in was the post-cleanup summary (unchanged by + // cleanup in this scenario, since the stale ref was in a chunk). + expect(coverageCall.content).toBe("Stub summary (Github)"); + expect(coverageCall.missingSecretNames).toContain("GITHUB_SERVER_URL"); + // After the coverage edit, the summary should reflect the editor's + // output built on top of the post-cleanup content. + expect(result.toolkit.summary).toBe("Stub summary (Github) [link]"); }); - it("regenerates when signature matches but the previous summary was already stale", async () => { - // If the previous summary was already flagged stale, a matching - // signature does NOT prove freshness — the stale summary was - // carried forward from an earlier toolset. The reuse fast path - // must skip, and the LLM must actually regenerate. + it("emits warnings but no LLM calls when no editor is configured", async () => { + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1]), + toolSource: new InMemoryToolDataSource([toolWithSecret]), metadataSource: new InMemoryMetadataSource([githubMetadata]), }); - const previousResult = await mergeToolkit( + const previousWithOldSecret = await mergeToolkit( "Github", - [githubTool1], + [ + createTool({ + ...toolWithSecret, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], githubMetadata, null, createStubGenerator() ); - previousResult.toolkit.summary = "Stale carried-forward summary"; - previousResult.toolkit.summaryStale = true; - previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + previousWithOldSecret.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Still references OLD_SECRET for legacy flows.", + }, + ]; const merger = new DataMerger({ toolkitDataSource, customSectionsSource: new EmptyCustomSectionsSource(), toolExampleGenerator: createStubGenerator(), - toolkitSummaryGenerator: createStubSummaryGenerator("Fresh"), - previousToolkits: new Map([["github", previousResult.toolkit]]), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + previousToolkits: new Map([["github", previousWithOldSecret.toolkit]]), }); const result = await merger.mergeToolkit("Github"); - expect(result.toolkit.summary).toBe("Fresh (Github)"); - expect(result.toolkit.summaryStale).toBeUndefined(); - expect(result.toolkit.summaryStaleReason).toBeUndefined(); + expect( + result.warnings.some( + (warning) => + warning.includes("Stale secret reference") && + warning.includes("OLD_SECRET") + ) + ).toBe(true); }); - it("keeps the stale flag when previous was stale, signature matches, and no generator is available", async () => { - // Same as above, but the regen attempt is not possible. The carried- - // forward summary remains, and the stale flag must persist so CI - // keeps flagging the toolkit until a working LLM run produces a - // fresh one. + it("skipSecretCoherence suppresses both the edit step and the scan warnings", async () => { + // --skip-secret-coherence is documented to disable the entire + // step. That means no LLM edits AND no coherence warnings in the + // run log. A stale secret reference in a chunk must pass through + // without any signal to the run log. + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); const toolkitDataSource = createCombinedToolkitDataSource({ - toolSource: new InMemoryToolDataSource([githubTool1]), + toolSource: new InMemoryToolDataSource([toolWithSecret]), metadataSource: new InMemoryMetadataSource([githubMetadata]), }); - const previousResult = await mergeToolkit( + const previous = await mergeToolkit( "Github", - [githubTool1], + [ + createTool({ + ...toolWithSecret, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], githubMetadata, null, createStubGenerator() ); - previousResult.toolkit.summary = "Stale carried-forward summary"; - previousResult.toolkit.summaryStale = true; - previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + previous.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Still references OLD_SECRET for legacy flows.", + }, + ]; + const cleanupSpy = vi.fn(async () => "unreached"); + const coverageSpy = vi.fn( + async (input: { content: string }) => input.content + ); const merger = new DataMerger({ toolkitDataSource, customSectionsSource: new EmptyCustomSectionsSource(), toolExampleGenerator: createStubGenerator(), - previousToolkits: new Map([["github", previousResult.toolkit]]), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub"), + secretEditGenerator: { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, + }, + skipSecretCoherence: true, + previousToolkits: new Map([["github", previous.toolkit]]), }); const result = await merger.mergeToolkit("Github"); - expect(result.toolkit.summary).toBe("Stale carried-forward summary"); - expect(result.toolkit.summaryStale).toBe(true); - expect(result.toolkit.summaryStaleReason).toBe( - "llm_generator_unavailable" - ); + expect(cleanupSpy).not.toHaveBeenCalled(); + expect(coverageSpy).not.toHaveBeenCalled(); + expect( + result.warnings.some((warning) => + warning.includes("Stale secret reference") + ) + ).toBe(false); }); it("reuses previous examples when the tool is unchanged", async () => { diff --git a/toolkit-docs-generator/tests/merger/secret-coherence.test.ts b/toolkit-docs-generator/tests/merger/secret-coherence.test.ts new file mode 100644 index 000000000..efe5a686f --- /dev/null +++ b/toolkit-docs-generator/tests/merger/secret-coherence.test.ts @@ -0,0 +1,264 @@ +import { describe, expect, it } from "vitest"; +import { + ARCADE_SECRETS_DOC_URL, + detectSecretCoherenceIssues, + detectSecretCoverageGaps, + detectStaleSecretReferences, + groupStaleRefsByTarget, + hasCoherenceIssues, +} from "../../src/merger/secret-coherence.js"; +import type { + DocumentationChunk, + MergedTool, + MergedToolkit, +} from "../../src/types/index.js"; + +const chunk = ( + overrides: Partial = {} +): DocumentationChunk => ({ + type: "markdown", + location: "header", + position: "before", + content: "", + ...overrides, +}); + +const tool = (overrides: Partial = {}): MergedTool => ({ + name: "Example", + qualifiedName: "Github.Example", + fullyQualifiedName: "Github.Example@1.0.0", + description: "desc", + parameters: [], + auth: null, + secrets: [], + secretsInfo: [], + output: null, + documentationChunks: [], + ...overrides, +}); + +const toolkit = (overrides: Partial = {}): MergedToolkit => ({ + id: "github", + label: "GitHub", + version: "1.0.0", + description: null, + metadata: { + category: "development", + iconUrl: "", + isBYOC: false, + isPro: false, + type: "arcade", + docsLink: "", + isComingSoon: false, + isHidden: false, + }, + auth: null, + tools: [], + documentationChunks: [], + customImports: [], + subPages: [], + ...overrides, +}); + +describe("detectStaleSecretReferences", () => { + it("returns nothing when there is no previous toolkit", () => { + const result = detectStaleSecretReferences(toolkit()); + expect(result).toEqual([]); + }); + + it("returns nothing when no secrets were removed", () => { + const previous = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + expect(detectStaleSecretReferences(current, previous)).toEqual([]); + }); + + it("finds a removed secret still mentioned in the summary", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + summary: + "GitHub toolkit. Set `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` to use notifications.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.removedSecret).toBe( + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN" + ); + expect(result[0]?.location).toEqual({ kind: "summary" }); + }); + + it("finds a removed secret still mentioned in a toolkit documentation chunk", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + documentationChunks: [ + chunk({ + location: "before_available_tools", + content: + "| Secret | Required |\n| `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` | Notifications |", + }), + ], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.location).toEqual({ + kind: "toolkit_chunk", + chunkIndex: 0, + }); + }); + + it("finds a removed secret in a per-tool documentation chunk", () => { + const previous = toolkit({ + tools: [ + tool({ + qualifiedName: "Github.GetNotificationSummary", + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + tools: [ + tool({ + qualifiedName: "Github.GetNotificationSummary", + secrets: ["GITHUB_SERVER_URL"], + documentationChunks: [ + chunk({ + content: + "Requires GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN for notifications.", + }), + ], + }), + ], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.location).toEqual({ + kind: "tool_chunk", + toolQualifiedName: "Github.GetNotificationSummary", + chunkIndex: 0, + }); + }); +}); + +describe("detectSecretCoverageGaps", () => { + it("returns nothing when the toolkit has no summary", () => { + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + expect(detectSecretCoverageGaps(current)).toEqual([]); + }); + + it("returns nothing when the toolkit has no secrets", () => { + const current = toolkit({ summary: "No secrets here." }); + expect(detectSecretCoverageGaps(current)).toEqual([]); + }); + + it("flags secrets that are missing from the summary", () => { + const current = toolkit({ + summary: `Toolkit info. See ${ARCADE_SECRETS_DOC_URL} to configure.`, + tools: [ + tool({ + secrets: ["GITHUB_SERVER_URL", "GITHUB_WEBHOOK_SECRET"], + }), + ], + }); + const gaps = detectSecretCoverageGaps(current); + const missing = gaps.filter( + (gap) => gap.kind === "missing_secret_in_summary" + ); + expect(missing).toHaveLength(2); + }); + + it("flags a missing Arcade config link when secrets exist", () => { + const current = toolkit({ + summary: + "Toolkit info. Uses `GITHUB_SERVER_URL` but no configuration link.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const gaps = detectSecretCoverageGaps(current); + expect(gaps.some((gap) => gap.kind === "missing_secret_config_link")).toBe( + true + ); + }); + + it("does not flag the link when the dashboard URL is present", () => { + const current = toolkit({ + summary: + "Toolkit info. Set `GITHUB_SERVER_URL` in the Arcade Dashboard: https://api.arcade.dev/dashboard/auth/secrets.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const gaps = detectSecretCoverageGaps(current); + expect(gaps.some((gap) => gap.kind === "missing_secret_config_link")).toBe( + false + ); + }); +}); + +describe("groupStaleRefsByTarget", () => { + it("groups multiple removed secrets hitting the same artifact into one edit target", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: ["A_SECRET", "B_SECRET", "KEEP_SECRET"], + }), + ], + }); + const current = toolkit({ + summary: "Mentions A_SECRET and B_SECRET together.", + tools: [tool({ secrets: ["KEEP_SECRET"] })], + }); + const refs = detectStaleSecretReferences(current, previous); + const targets = groupStaleRefsByTarget(refs); + expect(targets).toHaveLength(1); + expect(targets[0]?.kind).toBe("summary"); + expect(targets[0]?.removedSecrets).toEqual(["A_SECRET", "B_SECRET"]); + }); +}); + +describe("hasCoherenceIssues", () => { + it("is true when either stale refs or coverage gaps exist", () => { + const previous = toolkit({ + tools: [tool({ secrets: ["OLD_SECRET", "KEEP"] })], + }); + const current = toolkit({ + summary: "Still says OLD_SECRET here.", + tools: [tool({ secrets: ["KEEP"] })], + }); + const issues = detectSecretCoherenceIssues(current, previous); + expect(hasCoherenceIssues(issues)).toBe(true); + }); + + it("is false when the toolkit is coherent", () => { + const current = toolkit({ + summary: `Uses \`SECRET_A\`. Configure via ${ARCADE_SECRETS_DOC_URL}.`, + tools: [tool({ secrets: ["SECRET_A"] })], + }); + const issues = detectSecretCoherenceIssues(current); + expect(hasCoherenceIssues(issues)).toBe(false); + }); +}); diff --git a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts index 5ba776a36..f9ff3407d 100644 --- a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts +++ b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts @@ -36,3 +36,26 @@ test("porter workflow generates docs and opens a PR", () => { expect(workflowContents).toContain("[AUTO] Adding MCP Servers docs update"); expect(workflowContents).toContain("pull-requests: write"); }); + +test("porter workflow wires the secret-coherence editor", () => { + expect(workflowContents).toContain("--llm-editor-provider anthropic"); + expect(workflowContents).toContain("--llm-editor-model"); + expect(workflowContents).toContain("--llm-editor-api-key"); + expect(workflowContents).toContain("ANTHROPIC_API_KEY"); + expect(workflowContents).toContain("claude-sonnet-4-6"); +}); + +test("porter workflow opts JS actions into Node 24 to unblock the 2026-06-02 deprecation", () => { + expect(workflowContents).toContain( + 'FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"' + ); +}); + +test("workflow dispatch keeps default full-run behavior", () => { + expect(workflowContents).toContain("workflow_dispatch:"); + expect(workflowContents).toContain("--all"); + expect(workflowContents).toContain("--skip-unchanged"); + expect(workflowContents).not.toContain("providers:"); + expect(workflowContents).not.toContain("inputs.providers"); + expect(workflowContents).not.toContain("PROVIDERS_INPUT="); +});