From 4e15e1dd70860afa87a23b381e43c5e92343f16e Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 14 Mar 2026 22:32:59 -0700 Subject: [PATCH 1/3] fix: add missing refusal field to OpenAI Chat Completions responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI now returns a `refusal` field (null for non-refusal responses) on all Chat Completions messages. Both the SDK types and real API include it, but llmock was omitting it — causing shape mismatches for consumers that validate response structure. --- src/__tests__/api-conformance.test.ts | 2 ++ src/__tests__/helpers.test.ts | 2 ++ src/helpers.ts | 3 ++- src/types.ts | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/__tests__/api-conformance.test.ts b/src/__tests__/api-conformance.test.ts index 0e1e951..7f406df 100644 --- a/src/__tests__/api-conformance.test.ts +++ b/src/__tests__/api-conformance.test.ts @@ -167,6 +167,8 @@ describe("OpenAI Chat Completions conformance", () => { expect(choice).toHaveProperty("finish_reason"); expect(choice.message.role).toBe("assistant"); expect(typeof choice.message.content).toBe("string"); + expect(choice.message).toHaveProperty("refusal"); + expect(choice.message.refusal).toBeNull(); }); it("usage has prompt_tokens, completion_tokens, total_tokens as numbers", async () => { diff --git a/src/__tests__/helpers.test.ts b/src/__tests__/helpers.test.ts index 8e38418..facf8ea 100644 --- a/src/__tests__/helpers.test.ts +++ b/src/__tests__/helpers.test.ts @@ -307,6 +307,7 @@ describe("buildTextCompletion", () => { expect(result.choices[0].index).toBe(0); expect(result.choices[0].message.role).toBe("assistant"); expect(result.choices[0].message.content).toBe("Hello!"); + expect(result.choices[0].message.refusal).toBeNull(); expect(result.choices[0].finish_reason).toBe("stop"); }); @@ -331,6 +332,7 @@ describe("buildToolCallCompletion", () => { expect(result.choices).toHaveLength(1); expect(result.choices[0].finish_reason).toBe("tool_calls"); expect(result.choices[0].message.content).toBeNull(); + expect(result.choices[0].message.refusal).toBeNull(); }); it("maps tool calls with correct structure", () => { diff --git a/src/helpers.ts b/src/helpers.ts index faabaaa..97b8c03 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -171,7 +171,7 @@ export function buildTextCompletion(content: string, model: string): ChatComplet choices: [ { index: 0, - message: { role: "assistant", content }, + message: { role: "assistant", content, refusal: null }, finish_reason: "stop", }, ], @@ -191,6 +191,7 @@ export function buildToolCallCompletion(toolCalls: ToolCall[], model: string): C message: { role: "assistant", content: null, + refusal: null, tool_calls: toolCalls.map((tc) => ({ id: tc.id || generateToolCallId(), type: "function" as const, diff --git a/src/types.ts b/src/types.ts index 598aea1..3b833dc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -170,6 +170,7 @@ export interface ChatCompletionChoice { export interface ChatCompletionMessage { role: "assistant"; content: string | null; + refusal: string | null; tool_calls?: ToolCallMessage[]; } From dc58492d9a9987bf8a85fe66e96aa0f84529327e Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 14 Mar 2026 22:33:14 -0700 Subject: [PATCH 2/3] feat: add live API drift detection test suite Three-layer triangulation between SDK types, real API responses, and llmock output to detect response shape drift across OpenAI (Chat + Responses), Anthropic Claude, and Google Gemini. - schema.ts: shape extraction, three-way comparison, severity classification - sdk-shapes.ts: expected shapes from SDK types - providers.ts: raw fetch clients, SSE parsing, model listing - helpers.ts: shared test fixtures and server lifecycle - 4 provider drift test files (16 tests) + model deprecation checks (3 tests) - vitest.config.drift.ts: separate config with 30s timeout - Weekly CI workflow (.github/workflows/test-drift.yml) - DRIFT.md: full documentation --- .github/workflows/test-drift.yml | 21 + DRIFT.md | 118 ++++ README.md | 2 +- package.json | 6 +- pnpm-lock.yaml | 339 ++++++++++++ src/__tests__/drift/anthropic.drift.ts | 188 +++++++ src/__tests__/drift/gemini.drift.ts | 187 +++++++ src/__tests__/drift/helpers.ts | 103 ++++ src/__tests__/drift/models.drift.ts | 100 ++++ src/__tests__/drift/openai-chat.drift.ts | 173 ++++++ src/__tests__/drift/openai-responses.drift.ts | 184 +++++++ src/__tests__/drift/providers.ts | 422 ++++++++++++++ src/__tests__/drift/schema.ts | 476 ++++++++++++++++ src/__tests__/drift/sdk-shapes.ts | 517 ++++++++++++++++++ vitest.config.drift.ts | 9 + 15 files changed, 2843 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test-drift.yml create mode 100644 DRIFT.md create mode 100644 src/__tests__/drift/anthropic.drift.ts create mode 100644 src/__tests__/drift/gemini.drift.ts create mode 100644 src/__tests__/drift/helpers.ts create mode 100644 src/__tests__/drift/models.drift.ts create mode 100644 src/__tests__/drift/openai-chat.drift.ts create mode 100644 src/__tests__/drift/openai-responses.drift.ts create mode 100644 src/__tests__/drift/providers.ts create mode 100644 src/__tests__/drift/schema.ts create mode 100644 src/__tests__/drift/sdk-shapes.ts create mode 100644 vitest.config.drift.ts diff --git a/.github/workflows/test-drift.yml b/.github/workflows/test-drift.yml new file mode 100644 index 0000000..5eb00c2 --- /dev/null +++ b/.github/workflows/test-drift.yml @@ -0,0 +1,21 @@ +name: Drift Tests +on: + schedule: + - cron: "0 6 * * 1" # Weekly Monday 6am UTC + workflow_dispatch: # Manual trigger +jobs: + drift: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + - run: pnpm install --frozen-lockfile + - run: pnpm test:drift + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} diff --git a/DRIFT.md b/DRIFT.md new file mode 100644 index 0000000..2039000 --- /dev/null +++ b/DRIFT.md @@ -0,0 +1,118 @@ +# Live API Drift Detection + +llmock produces responses shaped like real LLM APIs. Providers change their APIs over time. **Drift** means the mock no longer matches reality — your tests pass against llmock but break against the real API. + +## Three-Layer Approach + +Drift detection compares three independent sources to triangulate the cause of any mismatch: + +| SDK types = Real API? | Real API = llmock? | Diagnosis | +| --------------------- | ------------------ | -------------------------------------------------------------------- | +| Yes | No | **llmock drift** — response builders need updating | +| No | No | **Provider changed before SDK update** — flag, wait for SDK catch-up | +| Yes | Yes | **No drift** — all clear | +| No | Yes | **SDK drift** — provider deprecated something SDK still references | + +Two-way comparison (mock vs real) can't distinguish between "we need to fix llmock" and "the SDK hasn't caught up yet." Three-way comparison can. + +## Running Drift Tests + +```bash +# All providers (requires all three API keys) +OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-... GOOGLE_API_KEY=... pnpm test:drift + +# Single provider (others skip automatically) +OPENAI_API_KEY=sk-... pnpm test:drift + +# Strict mode — warnings also fail +STRICT_DRIFT=1 OPENAI_API_KEY=sk-... pnpm test:drift +``` + +Required environment variables: + +- `OPENAI_API_KEY` — OpenAI API key +- `ANTHROPIC_API_KEY` — Anthropic API key +- `GOOGLE_API_KEY` — Google AI API key + +Each provider's tests skip independently if its key is not set. You can run drift tests for just one provider. + +## Reading Results + +### Severity levels + +- **critical** — Test fails. llmock produces a different shape than the real API for a field that both the SDK and real API agree on. This means llmock needs an update. +- **warning** — Test passes (unless `STRICT_DRIFT=1`). The real API has a field that neither the SDK nor llmock knows about, or the SDK and real API disagree. Usually means a provider added something new. +- **info** — Always passes. Known intentional differences (usage fields are always zero, optional fields llmock omits, etc.). + +### Example report output + +``` +API DRIFT DETECTED: OpenAI Chat Completions (non-streaming text) + + 1. [critical] LLMOCK DRIFT — field in SDK + real API but missing from mock + Path: usage.completion_tokens_details + SDK: object { reasoning_tokens: number } + Real: object { reasoning_tokens: number, accepted_prediction_tokens: number } + Mock: + + 2. [warning] PROVIDER ADDED FIELD — in real API but not in SDK or mock + Path: system_fingerprint + SDK: + Real: string + Mock: + + 3. [info] MOCK EXTRA FIELD — in mock but not in real API + Path: choices[0].logprobs + SDK: null | object + Real: + Mock: null +``` + +## Fixing Detected Drift + +When a `critical` drift is detected: + +1. **Identify the response builder** — the report path tells you which provider and field: + - OpenAI Chat Completions → `src/helpers.ts` (`buildTextCompletion`, `buildToolCallCompletion`, `buildTextChunks`, `buildToolCallChunks`) + - OpenAI Responses API → `src/responses.ts` (`buildTextResponse`, `buildToolCallResponse`, `buildTextStreamEvents`, `buildToolCallStreamEvents`) + - Anthropic Claude → `src/messages.ts` (`buildClaudeTextResponse`, `buildClaudeToolCallResponse`, `buildClaudeTextStreamEvents`, `buildClaudeToolCallStreamEvents`) + - Google Gemini → `src/gemini.ts` (`buildGeminiTextResponse`, `buildGeminiToolCallResponse`, `buildGeminiTextStreamChunks`, `buildGeminiToolCallStreamChunks`) + +2. **Update the builder** — add or modify the field to match the real API shape. + +3. **Run conformance tests** — `pnpm test` to verify existing API conformance tests still pass. + +4. **Run drift tests** — `pnpm test:drift` to verify the drift is resolved. + +## Model Deprecation + +The `models.drift.ts` test scrapes model names referenced in llmock's test files, README, and fixtures, then checks each provider's model listing API to verify they still exist. + +When a model is deprecated: + +1. Update the model name in the affected test files and fixtures +2. Update `src/__tests__/drift/providers.ts` if the cheap test model changed +3. Run `pnpm test` and `pnpm test:drift` + +## Adding a New Provider + +1. Add the provider's SDK as a devDependency in `package.json` +2. Add shape extraction functions to `src/__tests__/drift/sdk-shapes.ts` +3. Add raw fetch client functions to `src/__tests__/drift/providers.ts` +4. Create `src/__tests__/drift/.drift.ts` with 4 test scenarios +5. Add model listing function to `providers.ts` and model check to `models.drift.ts` +6. Update the allowlist in `schema.ts` if needed + +## CI Schedule + +Drift tests run on a schedule: + +- **Weekly**: Monday 6:00 AM UTC +- **Manual**: Trigger via GitHub Actions UI (`workflow_dispatch`) +- **NOT** on PR or push — these tests hit real APIs and cost money + +See `.github/workflows/test-drift.yml`. + +## Cost + +~20 API calls per run using the cheapest available models (`gpt-4o-mini`, `claude-haiku-4-5-20251001`, `gemini-2.5-flash`) with 10-100 max tokens each. Under $0.01/week. diff --git a/README.md b/README.md index 3c97534..c45ecd8 100644 --- a/README.md +++ b/README.md @@ -673,7 +673,7 @@ Areas where llmock could grow, and explicit non-goals for the current scope. ### Testing -- **Live API conformance**: The `api-conformance` tests validate response format structure but do not run against real LLM APIs. A subset of tests that hit actual OpenAI/Anthropic/Gemini endpoints (gated behind API keys) would catch format drift as providers evolve their APIs. +- **Live API drift detection**: The `drift` test suite runs against real OpenAI, Anthropic, and Gemini APIs to catch response format drift. See [DRIFT.md](DRIFT.md) for details on the three-layer triangulation approach, how to run tests, and how to fix detected drift. Runs weekly in CI; requires API keys. - **Token counts**: Usage fields are always zero across all providers. - **Vision/image content**: Image content parts are not handled by any provider. diff --git a/package.json b/package.json index 9bcca87..4addd72 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@copilotkit/llmock", - "version": "1.3.1", + "version": "1.3.2", "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)", "license": "MIT", "packageManager": "pnpm@10.28.2", @@ -36,6 +36,7 @@ "scripts": { "build": "tsdown", "test": "vitest run", + "test:drift": "vitest run --config vitest.config.drift.ts", "test:exports": "publint && attw --pack .", "lint": "eslint .", "format:check": "prettier --check .", @@ -60,6 +61,9 @@ "tsdown": "^0.12.5", "typescript": "^5.8.3", "typescript-eslint": "^8.35.1", + "@anthropic-ai/sdk": "^0.78.0", + "@google/generative-ai": "^0.24.0", + "openai": "^4.0.0", "vitest": "^3.2.1" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 18bd495..1b8931b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: devDependencies: + '@anthropic-ai/sdk': + specifier: ^0.78.0 + version: 0.78.0 '@arethetypeswrong/cli': specifier: ^0.17.3 version: 0.17.4 @@ -20,6 +23,9 @@ importers: '@eslint/js': specifier: ^9.30.0 version: 9.39.3 + '@google/generative-ai': + specifier: ^0.24.0 + version: 0.24.1 eslint: specifier: ^9.30.0 version: 9.39.3(jiti@2.6.1) @@ -32,6 +38,9 @@ importers: lint-staged: specifier: ^16.3.2 version: 16.3.2 + openai: + specifier: ^4.0.0 + version: 4.104.0 prettier: specifier: ^3.6.2 version: 3.8.1 @@ -56,6 +65,15 @@ packages: '@andrewbranch/untar.js@1.0.3': resolution: {integrity: sha512-Jh15/qVmrLGhkKJBdXlK1+9tY4lZruYjsgkDFj08ZmDiWVBLJcqkok7Z0/R0In+i1rScBpJlSvrTS2Lm41Pbnw==} + '@anthropic-ai/sdk@0.78.0': + resolution: {integrity: sha512-PzQhR715td/m1UaaN5hHXjYB8Gl2lF9UVhrrGrZeysiF6Rb74Wc9GCB8hzLdzmQtBd1qe89F9OptgB9Za1Ib5w==} + hasBin: true + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + '@arethetypeswrong/cli@0.17.4': resolution: {integrity: sha512-AeiKxtf67XD/NdOqXgBOE5TZWH3EOCt+0GkbUpekOzngc+Q/cRZ5azjWyMxISxxfp0EItgm5NoSld9p7BAA5xQ==} engines: {node: '>=18'} @@ -86,6 +104,10 @@ packages: engines: {node: '>=6.0.0'} hasBin: true + '@babel/runtime@7.28.6': + resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==} + engines: {node: '>=6.9.0'} + '@babel/types@7.29.0': resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==} engines: {node: '>=6.9.0'} @@ -369,6 +391,10 @@ packages: resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@google/generative-ai@0.24.1': + resolution: {integrity: sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==} + engines: {node: '>=18.0.0'} + '@humanfs/core@0.19.1': resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==} engines: {node: '>=18.18.0'} @@ -658,6 +684,12 @@ packages: '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} + '@types/node-fetch@2.6.13': + resolution: {integrity: sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==} + + '@types/node@18.19.130': + resolution: {integrity: sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==} + '@types/node@25.3.3': resolution: {integrity: sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==} @@ -753,6 +785,10 @@ packages: resolution: {integrity: sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==} hasBin: true + abort-controller@3.0.0: + resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} + engines: {node: '>=6.5'} + acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} peerDependencies: @@ -763,6 +799,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agentkeepalive@4.6.0: + resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} + engines: {node: '>= 8.0.0'} + ajv@6.14.0: resolution: {integrity: sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==} @@ -810,6 +850,9 @@ packages: resolution: {integrity: sha512-m1Q/RaVOnTp9JxPX+F+Zn7IcLYMzM8kZofDImfsKZd8MbR+ikdOzTeztStWqfrqIxZnYWryyI9ePm3NGjnZgGw==} engines: {node: '>=20.19.0'} + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} @@ -835,6 +878,10 @@ packages: resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} engines: {node: '>=8'} + call-bind-apply-helpers@1.0.2: + resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} + engines: {node: '>= 0.4'} + callsites@3.1.0: resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} engines: {node: '>=6'} @@ -900,6 +947,10 @@ packages: colorette@2.0.20: resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==} + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + commander@10.0.1: resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==} engines: {node: '>=14'} @@ -971,6 +1022,10 @@ packages: defu@6.1.4: resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==} + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + diff@8.0.3: resolution: {integrity: sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==} engines: {node: '>=0.3.1'} @@ -988,6 +1043,10 @@ packages: oxc-resolver: optional: true + dunder-proto@1.0.1: + resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} + engines: {node: '>= 0.4'} + emoji-regex@10.6.0: resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} @@ -1012,9 +1071,25 @@ packages: error-ex@1.3.4: resolution: {integrity: sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==} + es-define-property@1.0.1: + resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} + engines: {node: '>= 0.4'} + + es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + es-module-lexer@1.7.0: resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + es-object-atoms@1.1.1: + resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} + engines: {node: '>= 0.4'} + + es-set-tostringtag@2.1.0: + resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} + engines: {node: '>= 0.4'} + esbuild@0.27.3: resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==} engines: {node: '>=18'} @@ -1083,6 +1158,10 @@ packages: resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==} engines: {node: '>=0.10.0'} + event-target-shim@5.0.1: + resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} + engines: {node: '>=6'} + eventemitter3@5.0.4: resolution: {integrity: sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==} @@ -1137,11 +1216,25 @@ packages: flatted@3.3.4: resolution: {integrity: sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==} + form-data-encoder@1.7.2: + resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} + + form-data@4.0.5: + resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} + engines: {node: '>= 6'} + + formdata-node@4.4.1: + resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} + engines: {node: '>= 12.20'} + fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} os: [darwin] + function-bind@1.1.2: + resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + get-caller-file@2.0.5: resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} engines: {node: 6.* || 8.* || >= 10.*} @@ -1150,6 +1243,14 @@ packages: resolution: {integrity: sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==} engines: {node: '>=18'} + get-intrinsic@1.3.0: + resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} + engines: {node: '>= 0.4'} + + get-proto@1.0.1: + resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} + engines: {node: '>= 0.4'} + get-tsconfig@4.13.6: resolution: {integrity: sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==} @@ -1171,16 +1272,35 @@ packages: resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==} engines: {node: '>=18'} + gopd@1.2.0: + resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} + engines: {node: '>= 0.4'} + has-flag@4.0.0: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} + has-symbols@1.1.0: + resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} + engines: {node: '>= 0.4'} + + has-tostringtag@1.0.2: + resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} + engines: {node: '>= 0.4'} + + hasown@2.0.2: + resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} + engines: {node: '>= 0.4'} + highlight.js@10.7.3: resolution: {integrity: sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==} hookable@5.5.3: resolution: {integrity: sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==} + humanize-ms@1.2.1: + resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} + husky@9.1.7: resolution: {integrity: sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA==} engines: {node: '>=18'} @@ -1268,6 +1388,10 @@ packages: json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} + json-schema-to-ts@3.1.1: + resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==} + engines: {node: '>=16'} + json-schema-traverse@0.4.1: resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==} @@ -1359,6 +1483,10 @@ packages: engines: {node: '>= 16'} hasBin: true + math-intrinsics@1.1.0: + resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} + engines: {node: '>= 0.4'} + meow@12.1.1: resolution: {integrity: sha512-BhXM0Au22RwUneMPwSCnyhTOizdWoIEPU9sp0Aqa1PnDMR5Wv2FGXYDjuzJEIX+Eo2Rb8xuYe5jrnm5QowQFkw==} engines: {node: '>=16.10'} @@ -1367,6 +1495,14 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} + mime-db@1.52.0: + resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} + engines: {node: '>= 0.6'} + + mime-types@2.1.35: + resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} + engines: {node: '>= 0.6'} + mimic-function@5.0.1: resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==} engines: {node: '>=18'} @@ -1399,10 +1535,24 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead + node-emoji@2.2.0: resolution: {integrity: sha512-Z3lTE9pLaJF47NyMhd4ww1yFTAP8YhYI8SleJiHzM46Fgpm5cnNzSl9XfzFNqbaz+VlJrIj3fXQ4DeN1Rjm6cw==} engines: {node: '>=18'} + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -1411,6 +1561,18 @@ packages: resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==} engines: {node: '>=18'} + openai@4.104.0: + resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.23.8 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} engines: {node: '>= 0.8.0'} @@ -1695,6 +1857,12 @@ packages: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + + ts-algebra@2.0.0: + resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==} + ts-api-utils@2.4.0: resolution: {integrity: sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==} engines: {node: '>=18.12'} @@ -1753,6 +1921,9 @@ packages: unconfig@7.5.0: resolution: {integrity: sha512-oi8Qy2JV4D3UQ0PsopR28CzdQ3S/5A1zwsUwp/rosSbfhJ5z7b90bIyTwi/F7hCLD4SGcZVjDzd4XoUQcEanvA==} + undici-types@5.26.5: + resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + undici-types@7.18.2: resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} @@ -1844,6 +2015,16 @@ packages: jsdom: optional: true + web-streams-polyfill@4.0.0-beta.3: + resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==} + engines: {node: '>= 14'} + + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} @@ -1903,6 +2084,10 @@ snapshots: '@andrewbranch/untar.js@1.0.3': {} + '@anthropic-ai/sdk@0.78.0': + dependencies: + json-schema-to-ts: 3.1.1 + '@arethetypeswrong/cli@0.17.4': dependencies: '@arethetypeswrong/core': 0.17.4 @@ -1946,6 +2131,8 @@ snapshots: dependencies: '@babel/types': 7.29.0 + '@babel/runtime@7.28.6': {} + '@babel/types@7.29.0': dependencies: '@babel/helper-string-parser': 7.27.1 @@ -2206,6 +2393,8 @@ snapshots: '@eslint/core': 0.17.0 levn: 0.4.1 + '@google/generative-ai@0.24.1': {} + '@humanfs/core@0.19.1': {} '@humanfs/node@0.16.7': @@ -2390,6 +2579,15 @@ snapshots: '@types/json-schema@7.0.15': {} + '@types/node-fetch@2.6.13': + dependencies: + '@types/node': 25.3.3 + form-data: 4.0.5 + + '@types/node@18.19.130': + dependencies: + undici-types: 5.26.5 + '@types/node@25.3.3': dependencies: undici-types: 7.18.2 @@ -2532,12 +2730,20 @@ snapshots: jsonparse: 1.3.1 through: 2.3.8 + abort-controller@3.0.0: + dependencies: + event-target-shim: 5.0.1 + acorn-jsx@5.3.2(acorn@8.16.0): dependencies: acorn: 8.16.0 acorn@8.16.0: {} + agentkeepalive@4.6.0: + dependencies: + humanize-ms: 1.2.1 + ajv@6.14.0: dependencies: fast-deep-equal: 3.1.3 @@ -2581,6 +2787,8 @@ snapshots: '@babel/parser': 7.29.0 pathe: 2.0.3 + asynckit@0.4.0: {} + balanced-match@1.0.2: {} balanced-match@4.0.4: {} @@ -2602,6 +2810,11 @@ snapshots: cac@6.7.14: {} + call-bind-apply-helpers@1.0.2: + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + callsites@3.1.0: {} chai@5.3.3: @@ -2673,6 +2886,10 @@ snapshots: colorette@2.0.20: {} + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + commander@10.0.1: {} commander@14.0.3: {} @@ -2733,6 +2950,8 @@ snapshots: defu@6.1.4: {} + delayed-stream@1.0.0: {} + diff@8.0.3: {} dot-prop@5.3.0: @@ -2741,6 +2960,12 @@ snapshots: dts-resolver@2.1.3: {} + dunder-proto@1.0.1: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-errors: 1.3.0 + gopd: 1.2.0 + emoji-regex@10.6.0: {} emoji-regex@8.0.0: {} @@ -2757,8 +2982,23 @@ snapshots: dependencies: is-arrayish: 0.2.1 + es-define-property@1.0.1: {} + + es-errors@1.3.0: {} + es-module-lexer@1.7.0: {} + es-object-atoms@1.1.1: + dependencies: + es-errors: 1.3.0 + + es-set-tostringtag@2.1.0: + dependencies: + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + has-tostringtag: 1.0.2 + hasown: 2.0.2 + esbuild@0.27.3: optionalDependencies: '@esbuild/aix-ppc64': 0.27.3 @@ -2870,6 +3110,8 @@ snapshots: esutils@2.0.3: {} + event-target-shim@5.0.1: {} + eventemitter3@5.0.4: {} expect-type@1.3.0: {} @@ -2914,13 +3156,48 @@ snapshots: flatted@3.3.4: {} + form-data-encoder@1.7.2: {} + + form-data@4.0.5: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + es-set-tostringtag: 2.1.0 + hasown: 2.0.2 + mime-types: 2.1.35 + + formdata-node@4.4.1: + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 4.0.0-beta.3 + fsevents@2.3.3: optional: true + function-bind@1.1.2: {} + get-caller-file@2.0.5: {} get-east-asian-width@1.5.0: {} + get-intrinsic@1.3.0: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-define-property: 1.0.1 + es-errors: 1.3.0 + es-object-atoms: 1.1.1 + function-bind: 1.1.2 + get-proto: 1.0.1 + gopd: 1.2.0 + has-symbols: 1.1.0 + hasown: 2.0.2 + math-intrinsics: 1.1.0 + + get-proto@1.0.1: + dependencies: + dunder-proto: 1.0.1 + es-object-atoms: 1.1.1 + get-tsconfig@4.13.6: dependencies: resolve-pkg-maps: 1.0.0 @@ -2941,12 +3218,28 @@ snapshots: globals@14.0.0: {} + gopd@1.2.0: {} + has-flag@4.0.0: {} + has-symbols@1.1.0: {} + + has-tostringtag@1.0.2: + dependencies: + has-symbols: 1.1.0 + + hasown@2.0.2: + dependencies: + function-bind: 1.1.2 + highlight.js@10.7.3: {} hookable@5.5.3: {} + humanize-ms@1.2.1: + dependencies: + ms: 2.1.3 + husky@9.1.7: {} ignore@5.3.2: {} @@ -3004,6 +3297,11 @@ snapshots: json-parse-even-better-errors@2.3.1: {} + json-schema-to-ts@3.1.1: + dependencies: + '@babel/runtime': 7.28.6 + ts-algebra: 2.0.0 + json-schema-traverse@0.4.1: {} json-schema-traverse@1.0.0: {} @@ -3096,6 +3394,8 @@ snapshots: marked@9.1.6: {} + math-intrinsics@1.1.0: {} + meow@12.1.1: {} micromatch@4.0.8: @@ -3103,6 +3403,12 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 + mime-db@1.52.0: {} + + mime-types@2.1.35: + dependencies: + mime-db: 1.52.0 + mimic-function@5.0.1: {} minimatch@10.2.4: @@ -3129,6 +3435,8 @@ snapshots: natural-compare@1.4.0: {} + node-domexception@1.0.0: {} + node-emoji@2.2.0: dependencies: '@sindresorhus/is': 4.6.0 @@ -3136,12 +3444,28 @@ snapshots: emojilib: 2.4.0 skin-tone: 2.0.0 + node-fetch@2.7.0: + dependencies: + whatwg-url: 5.0.0 + object-assign@4.1.1: {} onetime@7.0.0: dependencies: mimic-function: 5.0.1 + openai@4.104.0: + dependencies: + '@types/node': 18.19.130 + '@types/node-fetch': 2.6.13 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + optionator@0.9.4: dependencies: deep-is: 0.1.4 @@ -3424,6 +3748,10 @@ snapshots: dependencies: is-number: 7.0.0 + tr46@0.0.3: {} + + ts-algebra@2.0.0: {} + ts-api-utils@2.4.0(typescript@5.9.3): dependencies: typescript: 5.9.3 @@ -3487,6 +3815,8 @@ snapshots: quansync: 1.0.0 unconfig-core: 7.5.0 + undici-types@5.26.5: {} + undici-types@7.18.2: {} unicode-emoji-modifier-base@1.0.0: {} @@ -3575,6 +3905,15 @@ snapshots: - tsx - yaml + web-streams-polyfill@4.0.0-beta.3: {} + + webidl-conversions@3.0.1: {} + + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + which@2.0.2: dependencies: isexe: 2.0.0 diff --git a/src/__tests__/drift/anthropic.drift.ts b/src/__tests__/drift/anthropic.drift.ts new file mode 100644 index 0000000..795ca26 --- /dev/null +++ b/src/__tests__/drift/anthropic.drift.ts @@ -0,0 +1,188 @@ +/** + * Anthropic Claude Messages API drift tests. + * + * Three-way comparison: SDK types × real API × llmock output. + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { + extractShape, + triangulate, + compareSSESequences, + formatDriftReport, + shouldFail, +} from "./schema.js"; +import { + anthropicMessageShape, + anthropicMessageToolCallShape, + anthropicStreamEventShapes, + anthropicToolStreamEventShapes, +} from "./sdk-shapes.js"; +import { anthropicNonStreaming, anthropicStreaming } from "./providers.js"; +import { httpPost, parseTypedSSE, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => { + const config = { apiKey: ANTHROPIC_API_KEY! }; + + it("non-streaming text shape matches", async () => { + const sdkShape = anthropicMessageShape(); + + const [realRes, mockRes] = await Promise.all([ + anthropicNonStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/messages`, { + model: "claude-haiku-4-5-20251001", + max_tokens: 10, + messages: [{ role: "user", content: "Say hello" }], + stream: false, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Anthropic Claude (non-streaming text)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming text event sequence and shapes match", async () => { + const sdkEvents = anthropicStreamEventShapes(); + + const [realStream, mockStreamRes] = await Promise.all([ + anthropicStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/messages`, { + model: "claude-haiku-4-5-20251001", + max_tokens: 10, + messages: [{ role: "user", content: "Say hello" }], + stream: true, + }), + ]); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + + const mockEvents = parseTypedSSE(mockStreamRes.body); + expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0); + + const mockSSEShapes = mockEvents.map((e) => ({ + type: e.type, + dataShape: extractShape(e.data), + })); + + const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes); + const report = formatDriftReport("Anthropic Claude (streaming text events)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("non-streaming tool call shape matches", async () => { + const sdkShape = anthropicMessageToolCallShape(); + + const tools = [ + { + name: "get_weather", + description: "Get weather", + input_schema: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const [realRes, mockRes] = await Promise.all([ + anthropicNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/messages`, { + model: "claude-haiku-4-5-20251001", + max_tokens: 50, + messages: [{ role: "user", content: "Weather in Paris" }], + stream: false, + tools, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Anthropic Claude (non-streaming tool call)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming tool call event sequence matches", async () => { + const sdkEvents = [ + ...anthropicStreamEventShapes().filter( + (e) => + e.type === "message_start" || e.type === "message_delta" || e.type === "message_stop", + ), + ...anthropicToolStreamEventShapes(), + ]; + + const tools = [ + { + name: "get_weather", + description: "Get weather", + input_schema: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const [realStream, mockStreamRes] = await Promise.all([ + anthropicStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/messages`, { + model: "claude-haiku-4-5-20251001", + max_tokens: 50, + messages: [{ role: "user", content: "Weather in Paris" }], + stream: true, + tools, + }), + ]); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + + const mockEvents = parseTypedSSE(mockStreamRes.body); + expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0); + + const mockSSEShapes = mockEvents.map((e) => ({ + type: e.type, + dataShape: extractShape(e.data), + })); + + const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes); + const report = formatDriftReport("Anthropic Claude (streaming tool call events)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); +}); diff --git a/src/__tests__/drift/gemini.drift.ts b/src/__tests__/drift/gemini.drift.ts new file mode 100644 index 0000000..d48e3be --- /dev/null +++ b/src/__tests__/drift/gemini.drift.ts @@ -0,0 +1,187 @@ +/** + * Google Gemini GenerateContent API drift tests. + * + * Three-way comparison: SDK types × real API × llmock output. + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { + geminiContentResponseShape, + geminiToolCallResponseShape, + geminiStreamChunkShape, + geminiStreamLastChunkShape, +} from "./sdk-shapes.js"; +import { geminiNonStreaming, geminiStreaming } from "./providers.js"; +import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; +const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!GOOGLE_API_KEY)("Google Gemini drift", () => { + const config = { apiKey: GOOGLE_API_KEY! }; + + it("non-streaming text shape matches", async () => { + const sdkShape = geminiContentResponseShape(); + + const [realRes, mockRes] = await Promise.all([ + geminiNonStreaming(config, [{ role: "user", parts: [{ text: "Say hello" }] }]), + httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + contents: [{ role: "user", parts: [{ text: "Say hello" }] }], + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Gemini (non-streaming text)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming text shape matches", async () => { + const sdkChunkShape = geminiStreamChunkShape(); + const sdkLastShape = geminiStreamLastChunkShape(); + + const [realStream, mockStreamRes] = await Promise.all([ + geminiStreaming(config, [{ role: "user", parts: [{ text: "Say hello" }] }]), + httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + contents: [{ role: "user", parts: [{ text: "Say hello" }] }], + }), + ]); + + const mockChunks = parseDataOnlySSE(mockStreamRes.body); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0); + + // Compare intermediate chunks (if multiple exist) + if (realStream.rawEvents.length > 1 && mockChunks.length > 1) { + const realChunkShape = extractShape(realStream.rawEvents[0].data); + const mockChunkShape = extractShape(mockChunks[0]); + + const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape); + const report = formatDriftReport("Gemini (streaming intermediate chunk)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + + // Compare last chunk + const realLastShape = extractShape(realStream.rawEvents[realStream.rawEvents.length - 1].data); + const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]); + + const lastDiffs = triangulate(sdkLastShape, realLastShape, mockLastShape); + const lastReport = formatDriftReport("Gemini (streaming last chunk)", lastDiffs); + + if (shouldFail(lastDiffs)) { + expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical")); + } + }); + + it("non-streaming tool call shape matches", async () => { + const sdkShape = geminiToolCallResponseShape(); + + const tools = [ + { + functionDeclarations: [ + { + name: "get_weather", + description: "Get weather", + parameters: { + type: "OBJECT", + properties: { + city: { type: "STRING" }, + }, + required: ["city"], + }, + }, + ], + }, + ]; + + const [realRes, mockRes] = await Promise.all([ + geminiNonStreaming(config, [{ role: "user", parts: [{ text: "Weather in Paris" }] }], tools), + httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + contents: [{ role: "user", parts: [{ text: "Weather in Paris" }] }], + tools, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Gemini (non-streaming tool call)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming tool call shape matches", async () => { + const sdkLastShape = geminiStreamLastChunkShape(); + + const tools = [ + { + functionDeclarations: [ + { + name: "get_weather", + description: "Get weather", + parameters: { + type: "OBJECT", + properties: { + city: { type: "STRING" }, + }, + required: ["city"], + }, + }, + ], + }, + ]; + + const [realStream, mockStreamRes] = await Promise.all([ + geminiStreaming(config, [{ role: "user", parts: [{ text: "Weather in Paris" }] }], tools), + httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + contents: [{ role: "user", parts: [{ text: "Weather in Paris" }] }], + tools, + }), + ]); + + const mockChunks = parseDataOnlySSE(mockStreamRes.body); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0); + + const realLastShape = extractShape(realStream.rawEvents[realStream.rawEvents.length - 1].data); + const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]); + + const diffs = triangulate(sdkLastShape, realLastShape, mockLastShape); + const report = formatDriftReport("Gemini (streaming tool call)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); +}); diff --git a/src/__tests__/drift/helpers.ts b/src/__tests__/drift/helpers.ts new file mode 100644 index 0000000..44b1369 --- /dev/null +++ b/src/__tests__/drift/helpers.ts @@ -0,0 +1,103 @@ +/** + * Shared test helpers for drift detection test files. + * + * Provides httpPost, SSE parsers (for mock server output), common + * fixtures, and server lifecycle management used by all provider-specific + * drift test files. + */ + +/* eslint-disable @typescript-eslint/no-explicit-any */ +import http from "node:http"; +import { createServer, type ServerInstance } from "../../server.js"; +import type { Fixture } from "../../types.js"; + +// --------------------------------------------------------------------------- +// HTTP helpers +// --------------------------------------------------------------------------- + +export async function httpPost( + url: string, + body: object, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const req = http.request( + url, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c) => chunks.push(c)); + res.on("end", () => + resolve({ + status: res.statusCode!, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }), + ); + }, + ); + req.on("error", reject); + req.write(JSON.stringify(body)); + req.end(); + }); +} + +// --------------------------------------------------------------------------- +// SSE parsers +// --------------------------------------------------------------------------- + +/** Parse data-only SSE blocks (OpenAI Chat Completions, Gemini). */ +export function parseDataOnlySSE(body: string): object[] { + return body + .split("\n\n") + .filter((block) => block.startsWith("data: ") && !block.includes("[DONE]")) + .map((block) => JSON.parse(block.slice(6))); +} + +/** Parse typed SSE blocks with event: + data: (Anthropic, OpenAI Responses). */ +export function parseTypedSSE(body: string): { type: string; data: Record }[] { + return body + .split("\n\n") + .filter((block) => block.includes("event: ") && block.includes("data: ")) + .map((block) => { + const eventMatch = block.match(/^event: (.+)$/m); + const dataMatch = block.match(/^data: (.+)$/m); + return { + type: eventMatch![1], + data: JSON.parse(dataMatch![1]), + }; + }); +} + +// --------------------------------------------------------------------------- +// Common fixtures +// --------------------------------------------------------------------------- + +export const TEXT_FIXTURE: Fixture = { + match: { userMessage: "Say hello" }, + response: { content: "Hello!" }, +}; + +export const TOOL_FIXTURE: Fixture = { + match: { userMessage: "Weather in Paris" }, + response: { + toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }], + }, +}; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +export async function startDriftServer(): Promise { + return createServer([TEXT_FIXTURE, TOOL_FIXTURE], { + port: 0, + chunkSize: 100, + }); +} + +export async function stopDriftServer(instance: ServerInstance): Promise { + await new Promise((r) => instance.server.close(() => r())); +} diff --git a/src/__tests__/drift/models.drift.ts b/src/__tests__/drift/models.drift.ts new file mode 100644 index 0000000..8a4a7aa --- /dev/null +++ b/src/__tests__/drift/models.drift.ts @@ -0,0 +1,100 @@ +/** + * Model deprecation checks — verify that models referenced in llmock's + * tests, docs, and examples still exist at each provider. + */ + +import { describe, it, expect } from "vitest"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { listOpenAIModels, listAnthropicModels, listGeminiModels } from "./providers.js"; + +// --------------------------------------------------------------------------- +// Scrape referenced models from the codebase +// --------------------------------------------------------------------------- + +const PROJECT_ROOT = path.resolve(import.meta.dirname, "..", "..", ".."); + +function scrapeModels(pattern: RegExp, files: string[]): string[] { + const models = new Set(); + for (const file of files) { + const filePath = path.join(PROJECT_ROOT, file); + if (!fs.existsSync(filePath)) continue; + const content = fs.readFileSync(filePath, "utf-8"); + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(content)) !== null) { + models.add(match[1]); + } + } + return [...models]; +} + +const sourceFiles = [ + "src/__tests__/api-conformance.test.ts", + "src/__tests__/ws-api-conformance.test.ts", + "README.md", + "fixtures/example-greeting.json", + "fixtures/example-multi-turn.json", + "fixtures/example-tool-call.json", +]; + +// --------------------------------------------------------------------------- +// OpenAI +// --------------------------------------------------------------------------- + +describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI model availability", () => { + it("models used in llmock tests are still available", async () => { + const models = await listOpenAIModels(process.env.OPENAI_API_KEY!); + const referenced = scrapeModels(/\b(gpt-4o(?:-mini)?|gpt-4|gpt-3\.5-turbo)\b/g, sourceFiles); + + if (referenced.length === 0) return; // no models found to check + + for (const m of referenced) { + // OpenAI model list may include versioned variants — check prefix match + const found = models.some((available) => available === m || available.startsWith(`${m}-`)); + expect(found, `Model ${m} no longer available at OpenAI`).toBe(true); + } + }); +}); + +// --------------------------------------------------------------------------- +// Anthropic +// --------------------------------------------------------------------------- + +describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic model availability", () => { + it("models used in llmock tests are still available", async () => { + const models = await listAnthropicModels(process.env.ANTHROPIC_API_KEY!); + const referenced = scrapeModels( + /\b(claude-3(?:\.\d+)?-(?:opus|sonnet|haiku)(?:-\d{8})?)\b/g, + sourceFiles, + ); + + if (referenced.length === 0) return; + + for (const m of referenced) { + const found = models.some((available) => available === m || available.startsWith(`${m}`)); + expect(found, `Model ${m} no longer available at Anthropic`).toBe(true); + } + }); +}); + +// --------------------------------------------------------------------------- +// Gemini +// --------------------------------------------------------------------------- + +describe.skipIf(!process.env.GOOGLE_API_KEY)("Gemini model availability", () => { + it("models used in llmock tests are still available", async () => { + const models = await listGeminiModels(process.env.GOOGLE_API_KEY!); + const referenced = scrapeModels(/\b(gemini-(?:[\w.-]+))\b/g, sourceFiles); + + if (referenced.length === 0) return; + + // Skip experimental and live-only models — they're ephemeral + const stable = referenced.filter((m) => !m.includes("-exp") && !m.endsWith("-live")); + + for (const m of stable) { + const found = models.some((available) => available === m || available.startsWith(`${m}`)); + expect(found, `Model ${m} no longer available at Gemini`).toBe(true); + } + }); +}); diff --git a/src/__tests__/drift/openai-chat.drift.ts b/src/__tests__/drift/openai-chat.drift.ts new file mode 100644 index 0000000..1b38bdc --- /dev/null +++ b/src/__tests__/drift/openai-chat.drift.ts @@ -0,0 +1,173 @@ +/** + * OpenAI Chat Completions API drift tests. + * + * Three-way comparison: SDK types × real API × llmock output. + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { + openaiChatCompletionShape, + openaiChatCompletionToolCallShape, + openaiChatCompletionChunkShape, +} from "./sdk-shapes.js"; +import { openaiChatNonStreaming, openaiChatStreaming } from "./providers.js"; +import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!OPENAI_API_KEY)("OpenAI Chat Completions drift", () => { + const config = { apiKey: OPENAI_API_KEY! }; + + it("non-streaming text shape matches", async () => { + const sdkShape = openaiChatCompletionShape(); + + const [realRes, mockRes] = await Promise.all([ + openaiChatNonStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/chat/completions`, { + model: "gpt-4o-mini", + messages: [{ role: "user", content: "Say hello" }], + stream: false, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("OpenAI Chat (non-streaming text)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming text shape matches", async () => { + const sdkChunkShape = openaiChatCompletionChunkShape(); + + const [realStream, mockStreamRes] = await Promise.all([ + openaiChatStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/chat/completions`, { + model: "gpt-4o-mini", + messages: [{ role: "user", content: "Say hello" }], + stream: true, + }), + ]); + + const mockChunks = parseDataOnlySSE(mockStreamRes.body); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0); + + const realChunkShape = extractShape(realStream.rawEvents[0].data); + const mockChunkShape = extractShape(mockChunks[0]); + + const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape); + const report = formatDriftReport("OpenAI Chat (streaming text chunks)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("non-streaming tool call shape matches", async () => { + const sdkShape = openaiChatCompletionToolCallShape(); + + const tools = [ + { + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + }, + ]; + + const [realRes, mockRes] = await Promise.all([ + openaiChatNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/chat/completions`, { + model: "gpt-4o-mini", + messages: [{ role: "user", content: "Weather in Paris" }], + stream: false, + tools, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("OpenAI Chat (non-streaming tool call)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming tool call shape matches", async () => { + const sdkChunkShape = openaiChatCompletionChunkShape(); + + const tools = [ + { + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + }, + ]; + + const [realStream, mockStreamRes] = await Promise.all([ + openaiChatStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/chat/completions`, { + model: "gpt-4o-mini", + messages: [{ role: "user", content: "Weather in Paris" }], + stream: true, + tools, + }), + ]); + + const mockChunks = parseDataOnlySSE(mockStreamRes.body); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0); + + const realChunkShape = extractShape(realStream.rawEvents[0].data); + const mockChunkShape = extractShape(mockChunks[0]); + + const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape); + const report = formatDriftReport("OpenAI Chat (streaming tool call chunks)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); +}); diff --git a/src/__tests__/drift/openai-responses.drift.ts b/src/__tests__/drift/openai-responses.drift.ts new file mode 100644 index 0000000..88aa639 --- /dev/null +++ b/src/__tests__/drift/openai-responses.drift.ts @@ -0,0 +1,184 @@ +/** + * OpenAI Responses API drift tests. + * + * Three-way comparison: SDK types × real API × llmock output. + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { + extractShape, + triangulate, + compareSSESequences, + formatDriftReport, + shouldFail, +} from "./schema.js"; +import { + openaiResponsesNonStreamingShape, + openaiResponsesTextEventShapes, + openaiResponsesToolCallEventShapes, +} from "./sdk-shapes.js"; +import { openaiResponsesNonStreaming, openaiResponsesStreaming } from "./providers.js"; +import { httpPost, parseTypedSSE, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!OPENAI_API_KEY)("OpenAI Responses API drift", () => { + const config = { apiKey: OPENAI_API_KEY! }; + + it("non-streaming text shape matches", async () => { + const sdkShape = openaiResponsesNonStreamingShape(); + + const [realRes, mockRes] = await Promise.all([ + openaiResponsesNonStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/responses`, { + model: "gpt-4o-mini", + input: [{ role: "user", content: "Say hello" }], + stream: false, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("OpenAI Responses (non-streaming text)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming text event sequence and shapes match", async () => { + const sdkEvents = openaiResponsesTextEventShapes(); + + const [realStream, mockStreamRes] = await Promise.all([ + openaiResponsesStreaming(config, [{ role: "user", content: "Say hello" }]), + httpPost(`${instance.url}/v1/responses`, { + model: "gpt-4o-mini", + input: [{ role: "user", content: "Say hello" }], + stream: true, + }), + ]); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + + const mockEvents = parseTypedSSE(mockStreamRes.body); + expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0); + + const mockSSEShapes = mockEvents.map((e) => ({ + type: e.type, + dataShape: extractShape(e.data), + })); + + const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes); + const report = formatDriftReport("OpenAI Responses (streaming text events)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("non-streaming tool call shape matches", async () => { + const sdkShape = openaiResponsesNonStreamingShape(); + + const tools = [ + { + type: "function", + name: "get_weather", + description: "Get weather", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const [realRes, mockRes] = await Promise.all([ + openaiResponsesNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/responses`, { + model: "gpt-4o-mini", + input: [{ role: "user", content: "Weather in Paris" }], + stream: false, + tools, + }), + ]); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("OpenAI Responses (non-streaming tool call)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); + + it("streaming tool call event sequence matches", async () => { + const sdkEvents = [ + ...openaiResponsesTextEventShapes().filter( + (e) => e.type === "response.created" || e.type === "response.completed", + ), + ...openaiResponsesToolCallEventShapes(), + ]; + + const tools = [ + { + type: "function", + name: "get_weather", + description: "Get weather", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const [realStream, mockStreamRes] = await Promise.all([ + openaiResponsesStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools), + httpPost(`${instance.url}/v1/responses`, { + model: "gpt-4o-mini", + input: [{ role: "user", content: "Weather in Paris" }], + stream: true, + tools, + }), + ]); + + expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0); + + const mockEvents = parseTypedSSE(mockStreamRes.body); + expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0); + + const mockSSEShapes = mockEvents.map((e) => ({ + type: e.type, + dataShape: extractShape(e.data), + })); + + const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes); + const report = formatDriftReport("OpenAI Responses (streaming tool call events)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + }); +}); diff --git a/src/__tests__/drift/providers.ts b/src/__tests__/drift/providers.ts new file mode 100644 index 0000000..82dcd54 --- /dev/null +++ b/src/__tests__/drift/providers.ts @@ -0,0 +1,422 @@ +/** + * Raw fetch() clients for real provider APIs. + * + * Uses fetch directly (no SDKs) to avoid SDK normalization masking real API + * quirks. SSE parsing, retry logic, and model listing endpoints. + */ + +import { extractShape, type SSEEventShape } from "./schema.js"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +interface ProviderConfig { + apiKey: string; +} + +interface FetchResult { + status: number; + body: unknown; + raw: string; +} + +interface StreamResult { + status: number; + events: SSEEventShape[]; + rawEvents: { type: string; data: unknown }[]; +} + +// --------------------------------------------------------------------------- +// Retry helper +// --------------------------------------------------------------------------- + +const RETRYABLE_STATUSES = new Set([429, 500, 502, 503]); + +async function fetchWithRetry(url: string, init: RequestInit, maxRetries = 3): Promise { + let lastError: Error | null = null; + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + const res = await fetch(url, init); + if (RETRYABLE_STATUSES.has(res.status) && attempt < maxRetries - 1) { + const backoff = Math.pow(2, attempt) * 1000; + await new Promise((r) => setTimeout(r, backoff)); + continue; + } + return res; + } catch (err) { + lastError = err as Error; + if (attempt < maxRetries - 1) { + const backoff = Math.pow(2, attempt) * 1000; + await new Promise((r) => setTimeout(r, backoff)); + } + } + } + throw lastError ?? new Error("fetch failed after retries"); +} + +// --------------------------------------------------------------------------- +// Response parsing +// --------------------------------------------------------------------------- + +function assertOk(raw: string, status: number, context: string): void { + if (status >= 400) { + throw new Error(`${context}: API returned ${status}: ${raw.slice(0, 300)}`); + } +} + +function parseJsonResponse(raw: string, status: number, context: string): unknown { + if (!raw) throw new Error(`${context}: empty response (status ${status})`); + assertOk(raw, status, context); + try { + return JSON.parse(raw); + } catch { + throw new Error(`${context}: failed to parse JSON (status ${status}): ${raw.slice(0, 200)}`); + } +} + +// --------------------------------------------------------------------------- +// SSE parsing +// --------------------------------------------------------------------------- + +/** Normalize \r\n to \n for SSE parsing (some providers use \r\n) */ +function normalizeLineEndings(text: string): string { + return text.replace(/\r\n/g, "\n"); +} + +/** Parse data-only SSE (OpenAI Chat Completions, Gemini) */ +function parseDataOnlySSE(text: string): { data: unknown }[] { + return normalizeLineEndings(text) + .split("\n\n") + .filter((block) => block.startsWith("data: ") && !block.includes("[DONE]")) + .map((block) => { + // Rejoin continuation lines (data split across lines) + const json = block + .split("\n") + .map((line) => (line.startsWith("data: ") ? line.slice(6) : line)) + .join(""); + return { data: JSON.parse(json) }; + }); +} + +/** Parse typed SSE (event: + data: format — Responses API, Claude) */ +function parseTypedSSE(text: string): { type: string; data: unknown }[] { + return normalizeLineEndings(text) + .split("\n\n") + .filter((block) => block.includes("event: ") && block.includes("data: ")) + .map((block) => { + const eventMatch = block.match(/^event: (.+)$/m); + const dataMatch = block.match(/^data: (.+)$/m); + return { + type: eventMatch![1], + data: JSON.parse(dataMatch![1]), + }; + }); +} + +function toSSEEventShapes(events: { type: string; data: unknown }[]): SSEEventShape[] { + return events.map((e) => ({ + type: e.type, + dataShape: extractShape(e.data), + })); +} + +// --------------------------------------------------------------------------- +// OpenAI +// --------------------------------------------------------------------------- + +export async function openaiChatNonStreaming( + config: ProviderConfig, + messages: { role: string; content: string }[], + tools?: object[], +): Promise { + const body: Record = { + model: "gpt-4o-mini", + messages, + stream: false, + max_tokens: 10, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.openai.com/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${config.apiKey}`, + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + return { status: res.status, body: parseJsonResponse(raw, res.status, "OpenAI Chat"), raw }; +} + +export async function openaiChatStreaming( + config: ProviderConfig, + messages: { role: string; content: string }[], + tools?: object[], +): Promise { + const body: Record = { + model: "gpt-4o-mini", + messages, + stream: true, + max_tokens: 10, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.openai.com/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${config.apiKey}`, + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + assertOk(raw, res.status, "OpenAI Chat streaming"); + const parsed = parseDataOnlySSE(raw); + const rawEvents = parsed.map((p) => ({ + type: "chat.completion.chunk", + data: p.data, + })); + return { + status: res.status, + events: toSSEEventShapes(rawEvents), + rawEvents, + }; +} + +export async function openaiResponsesNonStreaming( + config: ProviderConfig, + input: object[], + tools?: object[], +): Promise { + const body: Record = { + model: "gpt-4o-mini", + input, + stream: false, + max_output_tokens: 50, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.openai.com/v1/responses", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${config.apiKey}`, + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + return { + status: res.status, + body: parseJsonResponse(raw, res.status, "OpenAI Responses"), + raw, + }; +} + +export async function openaiResponsesStreaming( + config: ProviderConfig, + input: object[], + tools?: object[], +): Promise { + const body: Record = { + model: "gpt-4o-mini", + input, + stream: true, + max_output_tokens: 50, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.openai.com/v1/responses", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${config.apiKey}`, + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + assertOk(raw, res.status, "OpenAI Responses streaming"); + const rawEvents = parseTypedSSE(raw); + return { + status: res.status, + events: toSSEEventShapes(rawEvents), + rawEvents, + }; +} + +// --------------------------------------------------------------------------- +// Anthropic Claude +// --------------------------------------------------------------------------- + +export async function anthropicNonStreaming( + config: ProviderConfig, + messages: { role: string; content: string }[], + tools?: object[], +): Promise { + const body: Record = { + model: "claude-haiku-4-5-20251001", + messages, + max_tokens: 10, + stream: false, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": config.apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + return { status: res.status, body: parseJsonResponse(raw, res.status, "Anthropic"), raw }; +} + +export async function anthropicStreaming( + config: ProviderConfig, + messages: { role: string; content: string }[], + tools?: object[], +): Promise { + const body: Record = { + model: "claude-haiku-4-5-20251001", + messages, + max_tokens: 10, + stream: true, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": config.apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + assertOk(raw, res.status, "Anthropic streaming"); + const rawEvents = parseTypedSSE(raw); + return { + status: res.status, + events: toSSEEventShapes(rawEvents), + rawEvents, + }; +} + +// --------------------------------------------------------------------------- +// Google Gemini +// --------------------------------------------------------------------------- + +export async function geminiNonStreaming( + config: ProviderConfig, + contents: object[], + tools?: object[], +): Promise { + // Gemini 2.5+ uses thinking tokens from the output budget, so we need + // more headroom than other providers to get actual content back + const body: Record = { + contents, + generationConfig: { maxOutputTokens: 100 }, + }; + if (tools) body.tools = tools; + + // Gemini requires API key as query parameter per Google's REST API design + const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${config.apiKey}`; + const res = await fetchWithRetry(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + return { status: res.status, body: parseJsonResponse(raw, res.status, "Gemini"), raw }; +} + +export async function geminiStreaming( + config: ProviderConfig, + contents: object[], + tools?: object[], +): Promise { + const body: Record = { + contents, + generationConfig: { maxOutputTokens: 100 }, + }; + if (tools) body.tools = tools; + + const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse&key=${config.apiKey}`; + const res = await fetchWithRetry(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + + const raw = await res.text(); + assertOk(raw, res.status, "Gemini streaming"); + const parsed = parseDataOnlySSE(raw); + const rawEvents = parsed.map((p) => ({ + type: "gemini.chunk", + data: p.data, + })); + return { + status: res.status, + events: toSSEEventShapes(rawEvents), + rawEvents, + }; +} + +// --------------------------------------------------------------------------- +// Model listing +// --------------------------------------------------------------------------- + +export async function listOpenAIModels(apiKey: string): Promise { + const res = await fetchWithRetry("https://api.openai.com/v1/models", { + method: "GET", + headers: { Authorization: `Bearer ${apiKey}` }, + }); + + const raw = await res.text(); + const json = parseJsonResponse(raw, res.status, "OpenAI model list") as { + data: { id: string }[]; + }; + return json.data.map((m) => m.id); +} + +export async function listAnthropicModels(apiKey: string): Promise { + const res = await fetchWithRetry("https://api.anthropic.com/v1/models", { + method: "GET", + headers: { + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + }, + }); + + const raw = await res.text(); + const json = parseJsonResponse(raw, res.status, "Anthropic model list") as { + data: { id: string }[]; + }; + return json.data.map((m) => m.id); +} + +export async function listGeminiModels(apiKey: string): Promise { + const res = await fetchWithRetry( + `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`, + { method: "GET" }, + ); + + const raw = await res.text(); + const json = parseJsonResponse(raw, res.status, "Gemini model list") as { + models: { name: string }[]; + }; + // Gemini returns "models/gemini-2.5-flash" — strip prefix + return json.models.map((m) => m.name.replace(/^models\//, "")); +} diff --git a/src/__tests__/drift/schema.ts b/src/__tests__/drift/schema.ts new file mode 100644 index 0000000..5ee4476 --- /dev/null +++ b/src/__tests__/drift/schema.ts @@ -0,0 +1,476 @@ +/** + * Shape extraction, three-way comparison, severity classification, and reporting + * for drift detection between SDK types, real API responses, and llmock output. + */ + +// --------------------------------------------------------------------------- +// Shape types +// --------------------------------------------------------------------------- + +export type ShapeNode = + | { kind: "null" } + | { kind: "string" } + | { kind: "number" } + | { kind: "boolean" } + | { kind: "array"; element: ShapeNode | null } + | { kind: "object"; fields: Record }; + +export type DriftSeverity = "critical" | "warning" | "info"; + +export interface ShapeDiff { + path: string; + severity: DriftSeverity; + issue: string; + expected: string; // from SDK types + real: string; // from real API + mock: string; // from llmock +} + +export interface SSEEventShape { + type: string; + dataShape: ShapeNode; +} + +// --------------------------------------------------------------------------- +// Shape extraction +// --------------------------------------------------------------------------- + +export function extractShape(value: unknown): ShapeNode { + if (value === null || value === undefined) { + return { kind: "null" }; + } + if (typeof value === "string") return { kind: "string" }; + if (typeof value === "number") return { kind: "number" }; + if (typeof value === "boolean") return { kind: "boolean" }; + if (Array.isArray(value)) { + if (value.length === 0) return { kind: "array", element: null }; + // Merge shapes of all elements into a unified shape + return { kind: "array", element: mergeShapes(value.map(extractShape)) }; + } + if (typeof value === "object") { + const fields: Record = {}; + for (const [k, v] of Object.entries(value as Record)) { + fields[k] = extractShape(v); + } + return { kind: "object", fields }; + } + return { kind: "null" }; +} + +function mergeShapes(shapes: ShapeNode[]): ShapeNode { + if (shapes.length === 0) return { kind: "null" }; + if (shapes.length === 1) return shapes[0]; + + // If all same kind, merge recursively + const kinds = new Set(shapes.map((s) => s.kind)); + if (kinds.size === 1) { + const kind = shapes[0].kind; + if (kind === "object") { + const allFields = new Set(); + for (const s of shapes) { + if (s.kind === "object") { + for (const k of Object.keys(s.fields)) allFields.add(k); + } + } + const merged: Record = {}; + for (const field of allFields) { + const fieldShapes = shapes + .filter((s) => s.kind === "object" && field in s.fields) + .map((s) => (s as { kind: "object"; fields: Record }).fields[field]); + merged[field] = fieldShapes.length > 0 ? mergeShapes(fieldShapes) : { kind: "null" }; + } + return { kind: "object", fields: merged }; + } + if (kind === "array") { + const elements = shapes + .filter((s) => s.kind === "array" && s.element !== null) + .map((s) => (s as { kind: "array"; element: ShapeNode | null }).element!); + return { kind: "array", element: elements.length > 0 ? mergeShapes(elements) : null }; + } + return shapes[0]; + } + + // Mixed kinds — return the first non-null shape + return shapes.find((s) => s.kind !== "null") ?? { kind: "null" }; +} + +// --------------------------------------------------------------------------- +// Shape description (for reports) +// --------------------------------------------------------------------------- + +export function describeShape(shape: ShapeNode | null): string { + if (shape === null) return ""; + switch (shape.kind) { + case "null": + return "null"; + case "string": + return "string"; + case "number": + return "number"; + case "boolean": + return "boolean"; + case "array": + return `array<${describeShape(shape.element)}>`; + case "object": { + const entries = Object.entries(shape.fields); + if (entries.length === 0) return "object {}"; + if (entries.length <= 3) { + const inner = entries.map(([k, v]) => `${k}: ${describeShape(v)}`).join(", "); + return `object { ${inner} }`; + } + const first3 = entries + .slice(0, 3) + .map(([k, v]) => `${k}: ${describeShape(v)}`) + .join(", "); + return `object { ${first3}, ... +${entries.length - 3} }`; + } + } +} + +// --------------------------------------------------------------------------- +// Two-way comparison +// --------------------------------------------------------------------------- + +export function compareShapes(a: ShapeNode, b: ShapeNode, path = ""): ShapeDiff[] { + const diffs: ShapeDiff[] = []; + + if (a.kind !== b.kind) { + diffs.push({ + path: path || "(root)", + severity: "critical", + issue: `Type mismatch: ${a.kind} vs ${b.kind}`, + expected: describeShape(a), + real: describeShape(b), + mock: "", + }); + return diffs; + } + + if (a.kind === "object" && b.kind === "object") { + const allKeys = new Set([...Object.keys(a.fields), ...Object.keys(b.fields)]); + for (const key of allKeys) { + const childPath = path ? `${path}.${key}` : key; + const inA = key in a.fields; + const inB = key in b.fields; + + if (inA && !inB) { + diffs.push({ + path: childPath, + severity: "warning", + issue: "Field in first but not second", + expected: describeShape(a.fields[key]), + real: "", + mock: "", + }); + } else if (!inA && inB) { + diffs.push({ + path: childPath, + severity: "warning", + issue: "Field in second but not first", + expected: "", + real: describeShape(b.fields[key]), + mock: "", + }); + } else { + diffs.push(...compareShapes(a.fields[key], b.fields[key], childPath)); + } + } + } + + if (a.kind === "array" && b.kind === "array") { + if (a.element && b.element) { + diffs.push(...compareShapes(a.element, b.element, `${path || "(root)"}[]`)); + } + } + + return diffs; +} + +// --------------------------------------------------------------------------- +// Three-way triangulation +// --------------------------------------------------------------------------- + +/** Known intentional differences that should never trigger failures */ +const ALLOWLISTED_PATHS = new Set([ + "usage", + "usage.prompt_tokens", + "usage.completion_tokens", + "usage.total_tokens", + "usage.input_tokens", + "usage.output_tokens", + "usage.completion_tokens_details", + "usage.prompt_tokens_details", + "usage.cache_creation_input_tokens", + "usage.cache_read_input_tokens", + "usageMetadata", + "usageMetadata.promptTokenCount", + "usageMetadata.candidatesTokenCount", + "usageMetadata.totalTokenCount", + "usageMetadata.cachedContentTokenCount", + "system_fingerprint", + "logprobs", + "choices[].logprobs", + "service_tier", + "x_groq", + // Gemini streaming metadata fields vary + "modelVersion", + "avgLogprobs", +]); + +function isAllowlisted(path: string): boolean { + if (ALLOWLISTED_PATHS.has(path)) return true; + // Normalize array indices: choices[0].x → choices[].x + const normalized = path.replace(/\[\d+\]/g, "[]"); + return ALLOWLISTED_PATHS.has(normalized); +} + +export function triangulate( + sdk: ShapeNode | null, + real: ShapeNode | null, + mock: ShapeNode | null, +): ShapeDiff[] { + return triangulateAt("", sdk, real, mock); +} + +function triangulateAt( + path: string, + sdk: ShapeNode | null, + real: ShapeNode | null, + mock: ShapeNode | null, +): ShapeDiff[] { + const diffs: ShapeDiff[] = []; + const displayPath = path || "(root)"; + + const sdkKind = sdk?.kind ?? null; + const realKind = real?.kind ?? null; + const mockKind = mock?.kind ?? null; + + // All absent — nothing to compare + if (!sdk && !real && !mock) return diffs; + + // Field in SDK + real but not mock → llmock drift (critical) + if (sdk && real && !mock) { + diffs.push({ + path: displayPath, + severity: isAllowlisted(path) ? "info" : "critical", + issue: "LLMOCK DRIFT — field in SDK + real API but missing from mock", + expected: describeShape(sdk), + real: describeShape(real), + mock: "", + }); + return diffs; + } + + // Field in real but not SDK or mock → provider added something new + if (!sdk && real && !mock) { + diffs.push({ + path: displayPath, + severity: isAllowlisted(path) ? "info" : "warning", + issue: "PROVIDER ADDED FIELD — in real API but not in SDK or mock", + expected: "", + real: describeShape(real), + mock: "", + }); + return diffs; + } + + // Field in SDK but not real → possibly deprecated/optional + if (sdk && !real) { + diffs.push({ + path: displayPath, + severity: "info", + issue: "SDK EXTRA — field in SDK but not in real API response (optional or deprecated)", + expected: describeShape(sdk), + real: "", + mock: describeShape(mock), + }); + return diffs; + } + + // Field in mock but not real → mock has extra field + if (!sdk && !real && mock) { + diffs.push({ + path: displayPath, + severity: "info", + issue: "MOCK EXTRA FIELD — in mock but not in real API", + expected: "", + real: "", + mock: describeShape(mock), + }); + return diffs; + } + + // All three present — check type mismatches + if (real && mock && realKind !== mockKind) { + // Allow null vs other type (optional fields) + if (realKind !== "null" && mockKind !== "null") { + diffs.push({ + path: displayPath, + severity: isAllowlisted(path) ? "info" : "critical", + issue: `TYPE MISMATCH between real API and mock: ${realKind} vs ${mockKind}`, + expected: describeShape(sdk), + real: describeShape(real), + mock: describeShape(mock), + }); + return diffs; + } + } + + if (sdk && real && sdkKind !== realKind) { + if (sdkKind !== "null" && realKind !== "null") { + diffs.push({ + path: displayPath, + severity: isAllowlisted(path) ? "info" : "warning", + issue: `SDK STALE — type mismatch between SDK and real API: ${sdkKind} vs ${realKind}`, + expected: describeShape(sdk), + real: describeShape(real), + mock: describeShape(mock), + }); + } + } + + // Recurse into object fields + if (realKind === "object" || sdkKind === "object" || mockKind === "object") { + const sdkFields = sdk?.kind === "object" ? sdk.fields : {}; + const realFields = real?.kind === "object" ? real.fields : {}; + const mockFields = mock?.kind === "object" ? mock.fields : {}; + + const allKeys = new Set([ + ...Object.keys(sdkFields), + ...Object.keys(realFields), + ...Object.keys(mockFields), + ]); + + for (const key of allKeys) { + const childPath = path ? `${path}.${key}` : key; + diffs.push( + ...triangulateAt( + childPath, + sdkFields[key] ?? null, + realFields[key] ?? null, + mockFields[key] ?? null, + ), + ); + } + } + + // Recurse into array elements + if (realKind === "array" || sdkKind === "array" || mockKind === "array") { + const sdkElem = sdk?.kind === "array" ? sdk.element : null; + const realElem = real?.kind === "array" ? real.element : null; + const mockElem = mock?.kind === "array" ? mock.element : null; + + if (sdkElem || realElem || mockElem) { + diffs.push(...triangulateAt(`${path || "(root)"}[]`, sdkElem, realElem, mockElem)); + } + } + + return diffs; +} + +// --------------------------------------------------------------------------- +// SSE event sequence comparison +// --------------------------------------------------------------------------- + +export function compareSSESequences( + sdk: SSEEventShape[], + real: SSEEventShape[], + mock: SSEEventShape[], +): ShapeDiff[] { + const diffs: ShapeDiff[] = []; + + // Compare event type sequences + const realTypes = real.map((e) => e.type); + const mockTypes = mock.map((e) => e.type); + + // Check for event types in real but not mock + const realTypeSet = new Set(realTypes); + const mockTypeSet = new Set(mockTypes); + + // Transport-level SSE events that are not part of the response shape + const SSE_TRANSPORT_EVENTS = new Set(["ping"]); + + for (const type of realTypeSet) { + if (!mockTypeSet.has(type)) { + diffs.push({ + path: `SSE:${type}`, + severity: SSE_TRANSPORT_EVENTS.has(type) ? "info" : "critical", + issue: SSE_TRANSPORT_EVENTS.has(type) + ? `TRANSPORT EVENT — real API emits "${type}" (keepalive), mock does not` + : `LLMOCK DRIFT — real API emits event type "${type}" but mock does not`, + expected: type, + real: type, + mock: "", + }); + } + } + + for (const type of mockTypeSet) { + if (!realTypeSet.has(type)) { + diffs.push({ + path: `SSE:${type}`, + severity: "info", + issue: `MOCK EXTRA EVENT — mock emits event type "${type}" but real API does not`, + expected: "", + real: "", + mock: type, + }); + } + } + + // Compare shapes of matching event types + for (const type of realTypeSet) { + if (!mockTypeSet.has(type)) continue; + const realEvent = real.find((e) => e.type === type); + const mockEvent = mock.find((e) => e.type === type); + const sdkEvent = sdk.find((e) => e.type === type); + + if (realEvent && mockEvent) { + const eventDiffs = triangulate( + sdkEvent?.dataShape ?? null, + realEvent.dataShape, + mockEvent.dataShape, + ); + for (const d of eventDiffs) { + diffs.push({ + ...d, + path: `SSE:${type}.${d.path}`, + }); + } + } + } + + return diffs; +} + +// --------------------------------------------------------------------------- +// Report formatting +// --------------------------------------------------------------------------- + +export function formatDriftReport(context: string, diffs: ShapeDiff[]): string { + if (diffs.length === 0) return `No drift detected: ${context}`; + + const lines: string[] = []; + lines.push(`\nAPI DRIFT DETECTED: ${context}\n`); + + for (let i = 0; i < diffs.length; i++) { + const d = diffs[i]; + lines.push(` ${i + 1}. [${d.severity}] ${d.issue}`); + lines.push(` Path: ${d.path}`); + lines.push(` SDK: ${d.expected}`); + lines.push(` Real: ${d.real}`); + lines.push(` Mock: ${d.mock}`); + lines.push(""); + } + + return lines.join("\n"); +} + +// --------------------------------------------------------------------------- +// Strict mode check +// --------------------------------------------------------------------------- + +export function shouldFail(diffs: ShapeDiff[]): boolean { + const strict = process.env.STRICT_DRIFT === "1"; + return diffs.some((d) => d.severity === "critical" || (strict && d.severity === "warning")); +} diff --git a/src/__tests__/drift/sdk-shapes.ts b/src/__tests__/drift/sdk-shapes.ts new file mode 100644 index 0000000..5ff9aec --- /dev/null +++ b/src/__tests__/drift/sdk-shapes.ts @@ -0,0 +1,517 @@ +/** + * Extract expected shapes from SDK types by constructing minimal conformant + * objects and running extractShape() on them. + * + * This gives us the "expected" shape layer without needing the TypeScript + * compiler API. Each function creates a minimal valid instance with all + * required fields populated with representative values. + */ + +import { extractShape, type ShapeNode, type SSEEventShape } from "./schema.js"; + +// --------------------------------------------------------------------------- +// OpenAI Chat Completions +// --------------------------------------------------------------------------- + +export function openaiChatCompletionShape(): ShapeNode { + return extractShape({ + id: "chatcmpl-abc123", + object: "chat.completion", + created: 1700000000, + model: "gpt-4o-mini", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "Hello!", + refusal: null, + }, + logprobs: null, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + completion_tokens_details: { + reasoning_tokens: 0, + accepted_prediction_tokens: 0, + rejected_prediction_tokens: 0, + }, + prompt_tokens_details: { + cached_tokens: 0, + }, + }, + system_fingerprint: "fp_abc123", + service_tier: "default", + }); +} + +export function openaiChatCompletionToolCallShape(): ShapeNode { + return extractShape({ + id: "chatcmpl-abc123", + object: "chat.completion", + created: 1700000000, + model: "gpt-4o-mini", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_abc123", + type: "function", + function: { + name: "get_weather", + arguments: '{"city":"SF"}', + }, + }, + ], + refusal: null, + }, + logprobs: null, + finish_reason: "tool_calls", + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + system_fingerprint: "fp_abc123", + }); +} + +export function openaiChatCompletionChunkShape(): ShapeNode { + return extractShape({ + id: "chatcmpl-abc123", + object: "chat.completion.chunk", + created: 1700000000, + model: "gpt-4o-mini", + choices: [ + { + index: 0, + delta: { + role: "assistant", + content: "", + }, + logprobs: null, + finish_reason: null, + }, + ], + system_fingerprint: "fp_abc123", + }); +} + +// --------------------------------------------------------------------------- +// OpenAI Responses API +// --------------------------------------------------------------------------- + +export function openaiResponsesTextEventShapes(): SSEEventShape[] { + return [ + { + type: "response.created", + dataShape: extractShape({ + type: "response.created", + response: { + id: "resp_abc123", + object: "response", + created_at: 1700000000, + model: "gpt-4o-mini", + status: "in_progress", + output: [], + }, + }), + }, + { + type: "response.in_progress", + dataShape: extractShape({ + type: "response.in_progress", + response: { + id: "resp_abc123", + object: "response", + created_at: 1700000000, + model: "gpt-4o-mini", + status: "in_progress", + output: [], + }, + }), + }, + { + type: "response.output_item.added", + dataShape: extractShape({ + type: "response.output_item.added", + output_index: 0, + item: { + type: "message", + id: "msg_abc123", + status: "in_progress", + role: "assistant", + content: [], + }, + }), + }, + { + type: "response.content_part.added", + dataShape: extractShape({ + type: "response.content_part.added", + output_index: 0, + content_index: 0, + part: { type: "output_text", text: "" }, + }), + }, + { + type: "response.output_text.delta", + dataShape: extractShape({ + type: "response.output_text.delta", + item_id: "msg_abc123", + output_index: 0, + content_index: 0, + delta: "Hello", + }), + }, + { + type: "response.output_text.done", + dataShape: extractShape({ + type: "response.output_text.done", + output_index: 0, + content_index: 0, + text: "Hello!", + }), + }, + { + type: "response.content_part.done", + dataShape: extractShape({ + type: "response.content_part.done", + output_index: 0, + content_index: 0, + part: { type: "output_text", text: "Hello!" }, + }), + }, + { + type: "response.output_item.done", + dataShape: extractShape({ + type: "response.output_item.done", + output_index: 0, + item: { + type: "message", + id: "msg_abc123", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: "Hello!" }], + }, + }), + }, + { + type: "response.completed", + dataShape: extractShape({ + type: "response.completed", + response: { + id: "resp_abc123", + object: "response", + created_at: 1700000000, + model: "gpt-4o-mini", + status: "completed", + output: [ + { + type: "message", + id: "msg_abc123", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: "Hello!" }], + }, + ], + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + }, + }, + }), + }, + ]; +} + +export function openaiResponsesToolCallEventShapes(): SSEEventShape[] { + return [ + { + type: "response.output_item.added", + dataShape: extractShape({ + type: "response.output_item.added", + output_index: 0, + item: { + type: "function_call", + id: "fc_abc123", + call_id: "call_abc123", + name: "get_weather", + arguments: "", + status: "in_progress", + }, + }), + }, + { + type: "response.function_call_arguments.delta", + dataShape: extractShape({ + type: "response.function_call_arguments.delta", + item_id: "fc_abc123", + output_index: 0, + delta: '{"city":', + }), + }, + { + type: "response.function_call_arguments.done", + dataShape: extractShape({ + type: "response.function_call_arguments.done", + output_index: 0, + arguments: '{"city":"SF"}', + }), + }, + ]; +} + +export function openaiResponsesNonStreamingShape(): ShapeNode { + return extractShape({ + id: "resp_abc123", + object: "response", + created_at: 1700000000, + model: "gpt-4o-mini", + status: "completed", + output: [ + { + type: "message", + id: "msg_abc123", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: "Hello!" }], + }, + ], + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + }, + }); +} + +// --------------------------------------------------------------------------- +// Anthropic Claude Messages +// --------------------------------------------------------------------------- + +export function anthropicMessageShape(): ShapeNode { + return extractShape({ + id: "msg_abc123", + type: "message", + role: "assistant", + content: [{ type: "text", text: "Hello!" }], + model: "claude-3-haiku-20240307", + stop_reason: "end_turn", + stop_sequence: null, + usage: { + input_tokens: 10, + output_tokens: 5, + }, + }); +} + +export function anthropicMessageToolCallShape(): ShapeNode { + return extractShape({ + id: "msg_abc123", + type: "message", + role: "assistant", + content: [ + { + type: "tool_use", + id: "toolu_abc123", + name: "get_weather", + input: { city: "SF" }, + }, + ], + model: "claude-3-haiku-20240307", + stop_reason: "tool_use", + stop_sequence: null, + usage: { + input_tokens: 10, + output_tokens: 5, + }, + }); +} + +export function anthropicStreamEventShapes(): SSEEventShape[] { + return [ + { + type: "message_start", + dataShape: extractShape({ + type: "message_start", + message: { + id: "msg_abc123", + type: "message", + role: "assistant", + content: [], + model: "claude-3-haiku-20240307", + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 10, output_tokens: 0 }, + }, + }), + }, + { + type: "content_block_start", + dataShape: extractShape({ + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }), + }, + { + type: "content_block_delta", + dataShape: extractShape({ + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "Hello" }, + }), + }, + { + type: "content_block_stop", + dataShape: extractShape({ + type: "content_block_stop", + index: 0, + }), + }, + { + type: "message_delta", + dataShape: extractShape({ + type: "message_delta", + delta: { stop_reason: "end_turn", stop_sequence: null }, + usage: { output_tokens: 5 }, + }), + }, + { + type: "message_stop", + dataShape: extractShape({ + type: "message_stop", + }), + }, + ]; +} + +export function anthropicToolStreamEventShapes(): SSEEventShape[] { + return [ + { + type: "content_block_start", + dataShape: extractShape({ + type: "content_block_start", + index: 0, + content_block: { + type: "tool_use", + id: "toolu_abc123", + name: "get_weather", + input: {}, + }, + }), + }, + { + type: "content_block_delta", + dataShape: extractShape({ + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"city":' }, + }), + }, + ]; +} + +// --------------------------------------------------------------------------- +// Google Gemini +// --------------------------------------------------------------------------- + +export function geminiContentResponseShape(): ShapeNode { + return extractShape({ + candidates: [ + { + content: { + role: "model", + parts: [{ text: "Hello!" }], + }, + finishReason: "STOP", + index: 0, + safetyRatings: [ + { + category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", + probability: "NEGLIGIBLE", + }, + ], + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + modelVersion: "gemini-1.5-flash", + }); +} + +export function geminiToolCallResponseShape(): ShapeNode { + return extractShape({ + candidates: [ + { + content: { + role: "model", + parts: [ + { + functionCall: { + name: "get_weather", + args: { city: "SF" }, + }, + }, + ], + }, + finishReason: "STOP", + index: 0, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }); +} + +export function geminiStreamChunkShape(): ShapeNode { + return extractShape({ + candidates: [ + { + content: { + role: "model", + parts: [{ text: "Hello" }], + }, + index: 0, + }, + ], + }); +} + +export function geminiStreamLastChunkShape(): ShapeNode { + return extractShape({ + candidates: [ + { + content: { + role: "model", + parts: [{ text: "!" }], + }, + finishReason: "STOP", + index: 0, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }); +} diff --git a/vitest.config.drift.ts b/vitest.config.drift.ts new file mode 100644 index 0000000..99de20b --- /dev/null +++ b/vitest.config.drift.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "vitest/config"; +export default defineConfig({ + test: { + environment: "node", + globals: true, + include: ["src/__tests__/drift/**/*.drift.ts"], + testTimeout: 30000, + }, +}); From 7a961f8d55fb5bf3d1dcdfa625f859a97ffb4a0b Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 14 Mar 2026 22:33:19 -0700 Subject: [PATCH 3/3] chore: bump version to 1.3.2 --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fee7ba..9311d17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # @copilotkit/llmock +## 1.3.2 + +### Patch Changes + +- Fix missing `refusal` field on OpenAI Chat Completions responses — both the SDK and real API return `refusal: null` on non-refusal messages, but llmock was omitting it +- Live API drift detection test suite: three-layer triangulation between SDK types, real API responses, and llmock output across OpenAI (Chat + Responses), Anthropic Claude, and Google Gemini +- Weekly CI workflow for automated drift checks +- `DRIFT.md` documentation for the drift detection system + ## 1.3.1 ### Patch Changes