Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions apps/code/src/main/services/agent/service.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,12 @@ vi.mock("@posthog/agent/posthog-api", () => ({
}));

vi.mock("@posthog/agent/gateway-models", () => ({
DEFAULT_GATEWAY_MODEL: "claude-opus-4-8",
DEFAULT_CODEX_MODEL: "gpt-5.5",
fetchGatewayModels: vi.fn().mockResolvedValue([]),
formatGatewayModelName: vi.fn(),
getProviderName: vi.fn(),
isBlockedModelId: vi.fn().mockReturnValue(false),
}));

vi.mock("@posthog/agent/adapters/claude/session/jsonl-hydration", () => ({
Expand Down
3 changes: 2 additions & 1 deletion apps/code/src/main/services/llm-gateway/schemas.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { DEFAULT_GATEWAY_MODEL } from "@posthog/agent/gateway-models";
import { z } from "zod";

export const llmMessageSchema = z.object({
Expand All @@ -11,7 +12,7 @@ export const promptInput = z.object({
system: z.string().optional(),
messages: z.array(llmMessageSchema),
maxTokens: z.number().optional(),
model: z.string().default("claude-haiku-4-5"),
model: z.string().default(DEFAULT_GATEWAY_MODEL),
});

export type PromptInput = z.infer<typeof promptInput>;
Expand Down
3 changes: 2 additions & 1 deletion apps/code/src/main/services/llm-gateway/service.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { DEFAULT_GATEWAY_MODEL } from "@posthog/agent/gateway-models";
import {
getGatewayInvalidatePlanCacheUrl,
getGatewayUsageUrl,
Expand Down Expand Up @@ -51,7 +52,7 @@ export class LlmGatewayService {
const {
system,
maxTokens,
model = "claude-haiku-4-5",
model = DEFAULT_GATEWAY_MODEL,
signal,
timeoutMs = 60_000,
} = options;
Expand Down
4 changes: 2 additions & 2 deletions apps/code/src/renderer/api/posthogClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ describe("PostHogAPIClient", () => {
await expect(
client.runTaskInCloud("task-123", "feature/legacy-effort", {
adapter: "claude",
model: "claude-opus-4-6",
model: "claude-opus-4-8",
reasoningLevel: "minimal",
}),
).rejects.toThrow(
"Reasoning effort 'minimal' is not supported for claude model 'claude-opus-4-6'.",
"Reasoning effort 'minimal' is not supported for claude model 'claude-opus-4-8'.",
);

expect(post).not.toHaveBeenCalled();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,22 @@ const mockModelOption = {
id: "model",
name: "Model",
type: "select" as const,
currentValue: "gpt-5.4",
currentValue: "gpt-5.5",
options: [
{
group: "recommended",
name: "Recommended",
options: [
{ value: "gpt-5.4", name: "GPT 5.4" },
{ value: "gpt-5.5", name: "gpt-5.5" },
{ value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6" },
],
},
{
group: "other",
name: "Other",
options: [
{ value: "claude-opus-4-6", name: "Claude Opus 4.6" },
{ value: "claude-opus-4-8", name: "Claude Opus 4.8" },
{ value: "o3-pro", name: "o3-pro" },
{ value: "claude-haiku-4-5", name: "Claude Haiku 4.5" },
],
},
],
Expand Down
3 changes: 2 additions & 1 deletion apps/mobile/src/features/inbox/components/TinderView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
} from "react-native-safe-area-context";
import { MarkdownText } from "@/features/chat/components/MarkdownText";
import { createTask, runTaskInCloud } from "@/features/tasks/api";
import { DEFAULT_MODEL } from "@/features/tasks/composer/options";
import type {
CreateTaskOptions,
RepositoryOption,
Expand Down Expand Up @@ -206,7 +207,7 @@ export function TinderView({
await runTaskInCloud(task.id, {
pendingUserMessage: prompt,
runtimeAdapter: "claude",
model: "claude-opus-4-7",
model: DEFAULT_MODEL,
initialPermissionMode: "plan",
runSource: "signal_report",
signalReportId: report.id,
Expand Down
2 changes: 1 addition & 1 deletion apps/mobile/src/features/tasks/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ export interface RunTaskInCloudOptions {
mode?: "interactive" | "background";
/** Adapter to use on the cloud runner. Currently only "claude" on mobile. */
runtimeAdapter?: "claude" | "codex";
/** Gateway model ID, e.g. "claude-opus-4-7". */
/** Gateway model ID, e.g. "claude-opus-4-8". */
model?: string;
/** Reasoning effort: "low" | "medium" | "high" (model-dependent). */
reasoningEffort?: string;
Expand Down
12 changes: 3 additions & 9 deletions apps/mobile/src/features/tasks/composer/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ export interface ModelOption {

export const MODELS: ModelOption[] = [
{
value: "claude-opus-4-7",
label: "Claude Opus 4.7",
value: "claude-opus-4-8",
label: "Claude Opus 4.8",
description: "Most capable, slower",
supportsReasoning: true,
},
Expand All @@ -48,12 +48,6 @@ export const MODELS: ModelOption[] = [
description: "Balanced",
supportsReasoning: true,
},
{
value: "claude-haiku-4-5",
label: "Claude Haiku 4.5",
description: "Fastest",
supportsReasoning: false,
},
];

export const REASONING_LEVELS: {
Expand All @@ -68,7 +62,7 @@ export const REASONING_LEVELS: {
];

export const DEFAULT_EXECUTION_MODE: ExecutionMode = "plan";
export const DEFAULT_MODEL = "claude-opus-4-7";
export const DEFAULT_MODEL = "claude-opus-4-8";
export const DEFAULT_REASONING: ReasoningEffort = "high";

export function modelLabel(value: string): string {
Expand Down
2 changes: 1 addition & 1 deletion packages/agent/src/adapters/claude/claude-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent {

// For model options, fall back to alias resolution when exact match fails.
// This lets callers use human-friendly aliases like "opus" or "sonnet"
// instead of full model IDs like "claude-opus-4-6".
// instead of full model IDs like "claude-opus-4-8".
if (!validValue && params.configId === "model") {
const resolved = resolveModelPreference(params.value, allValues);
if (resolved) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ describe("conversationTurnsToJsonlEntries", () => {
{ type: "text", text: "running" },
]);
expect(conv[0].message.stop_reason).toBeNull();
expect(conv[0].message.model).toBe("claude-opus-4-6");
expect(conv[0].message.model).toBe("claude-opus-4-8");
expect(conv[0].message.id).toMatch(/^msg_01[A-Za-z0-9]{24}$/);

expect(conv[1].type).toBe("assistant");
Expand Down Expand Up @@ -490,13 +490,13 @@ describe("conversationTurnsToJsonlEntries", () => {
{ role: "user", content: [{ type: "text", text: "hi" }] },
{ role: "assistant", content: [{ type: "text", text: "hello" }] },
],
{ sessionId: "s", cwd: "/", model: "claude-opus-4-6", version: "3.0.0" },
{ sessionId: "s", cwd: "/", model: "claude-opus-4-7", version: "3.0.0" },
);

const conv = parseConversationEntries(lines);
expect(conv[0].version).toBe("3.0.0");
expect(conv[1].version).toBe("3.0.0");
expect(conv[1].message.model).toBe("claude-opus-4-6");
expect(conv[1].message.model).toBe("claude-opus-4-7");
});

it("passes gitBranch, slug and permissionMode from config", () => {
Expand Down Expand Up @@ -728,7 +728,7 @@ describe("end-to-end: S3 log entries -> JSONL output", () => {
// All assistant blocks in same turn share message.id
expect(msg1.id).toBe(msg2.id);
expect(msg2.id).toBe(msg3.id);
expect(msg3.model).toBe("claude-opus-4-6");
expect(msg3.model).toBe("claude-opus-4-8");
expect(msg3.id).toMatch(/^msg_01[A-Za-z0-9]{24}$/);

// Verify Bash tool_result entry
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
import * as os from "node:os";
import * as path from "node:path";
import type { ContentBlock } from "@agentclientprotocol/sdk";
import { DEFAULT_GATEWAY_MODEL } from "../../../gateway-models";
import type { PostHogAPIClient } from "../../../posthog-api";
import type { StoredEntry } from "../../../types";
import { supports1MContext } from "./models";
Expand Down Expand Up @@ -312,7 +313,7 @@ export function conversationTurnsToJsonlEntries(
): string[] {
const lines: string[] = [];
let parentUuid: string | null = null;
const model = config.model ?? "claude-opus-4-6";
const model = config.model ?? DEFAULT_GATEWAY_MODEL;
const version = config.version ?? "2.1.63";
const gitBranch = config.gitBranch ?? "";
const slug = config.slug ?? generateSlug();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const rawModelOptions = {
describe("applyAvailableModelsAllowlist", () => {
it("falls back to the unfiltered gateway list when every allowlisted model is unknown", () => {
expect(
applyAvailableModelsAllowlist(rawModelOptions, ["claude-opus-4-5"]),
applyAvailableModelsAllowlist(rawModelOptions, ["claude-unknown-model"]),
).toEqual(rawModelOptions);
});

Expand Down
36 changes: 23 additions & 13 deletions packages/agent/src/adapters/claude/session/models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,53 @@ describe("toSdkModelId", () => {
expect(toSdkModelId("claude-opus-4-7")).toBe("opus");
expect(toSdkModelId("claude-opus-4-8")).toBe("opus");
expect(toSdkModelId("claude-sonnet-4-6")).toBe("sonnet");
expect(toSdkModelId("claude-haiku-4-5")).toBe("haiku");
});

it("passes unknown IDs through unchanged", () => {
expect(toSdkModelId("custom-model")).toBe("custom-model");
});

it("passes deprecated gateway IDs through unchanged", () => {
expect(toSdkModelId("claude-opus-4-6")).toBe("claude-opus-4-6");
expect(toSdkModelId("claude-sonnet-4-5")).toBe("claude-sonnet-4-5");
expect(toSdkModelId("claude-haiku-4-5")).toBe("claude-haiku-4-5");
});
});

describe("model capability flags", () => {
it("flags 1M context support", () => {
expect(supports1MContext("claude-opus-4-6")).toBe(false);
expect(supports1MContext("claude-opus-4-7")).toBe(true);
expect(supports1MContext("claude-sonnet-4-6")).toBe(true);
expect(supports1MContext("claude-haiku-4-5")).toBe(false);
});

it("flags effort support and xhigh-effort support", () => {
expect(supportsEffort("claude-opus-4-5")).toBe(true);
expect(supportsEffort("claude-opus-4-5")).toBe(false);
expect(supportsEffort("claude-opus-4-6")).toBe(false);
expect(supportsXhighEffort("claude-opus-4-7")).toBe(true);
expect(supportsXhighEffort("claude-opus-4-5")).toBe(false);
expect(supportsXhighEffort("claude-opus-4-6")).toBe(false);
expect(supportsEffort("claude-haiku-4-5")).toBe(false);
});

it("excludes MCP injection only for Haiku", () => {
it("allows MCP injection for supported Claude models", () => {
expect(supportsMcpInjection("claude-opus-4-7")).toBe(true);
expect(supportsMcpInjection("claude-sonnet-4-6")).toBe(true);
});

it("keeps deprecated Haiku sessions excluded from MCP injection", () => {
expect(supportsMcpInjection("claude-haiku-4-5")).toBe(false);
});
});

describe("getEffortOptions", () => {
it("returns null for models without effort support", () => {
expect(getEffortOptions("claude-haiku-4-5")).toBeNull();
expect(getEffortOptions("claude-opus-4-6")).toBeNull();
});

it("returns low/medium/high for effort-supporting models", () => {
const opts = getEffortOptions("claude-opus-4-5");
const opts = getEffortOptions("claude-sonnet-4-6");
expect(opts?.map((o) => o.value)).toEqual(["low", "medium", "high"]);
});

Expand All @@ -68,9 +80,7 @@ describe("resolveModelPreference", () => {
const options = [
{ value: "claude-opus-4-8", name: "Claude Opus 4.8" },
{ value: "claude-opus-4-7", name: "Claude Opus 4.7" },
{ value: "claude-opus-4-6", name: "Claude Opus 4.6" },
{ value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6" },
{ value: "claude-haiku-4-5", name: "Claude Haiku 4.5" },
];

it("returns null for empty preference", () => {
Expand All @@ -85,8 +95,8 @@ describe("resolveModelPreference", () => {
});

it("matches case-insensitively on display name", () => {
expect(resolveModelPreference("claude haiku 4.5", options)).toBe(
"claude-haiku-4-5",
expect(resolveModelPreference("claude sonnet 4.6", options)).toBe(
"claude-sonnet-4-6",
);
});

Expand All @@ -100,11 +110,11 @@ describe("resolveModelPreference", () => {

it("refuses cross-version alias matches", () => {
const optionsWithAlias = [
{ value: "opus", name: "Claude Opus 4.7" },
{ value: "claude-opus-4-6", name: "Claude Opus 4.6" },
{ value: "opus", name: "Claude Opus 4.8" },
{ value: "claude-opus-4-7", name: "Claude Opus 4.7" },
];
expect(resolveModelPreference("claude-opus-4-6", optionsWithAlias)).toBe(
"claude-opus-4-6",
expect(resolveModelPreference("claude-opus-4-7", optionsWithAlias)).toBe(
"claude-opus-4-7",
);
});

Expand Down
14 changes: 3 additions & 11 deletions packages/agent/src/adapters/claude/session/models.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
export const DEFAULT_MODEL = "opus";

const GATEWAY_TO_SDK_MODEL: Record<string, string> = {
"claude-opus-4-5": "opus",
"claude-opus-4-6": "opus",
"claude-opus-4-7": "opus",
"claude-opus-4-8": "opus",
"claude-sonnet-4-5": "sonnet",
"claude-sonnet-4-6": "sonnet",
"claude-haiku-4-5": "haiku",
};

export function toSdkModelId(modelId: string): string {
return GATEWAY_TO_SDK_MODEL[modelId] ?? modelId;
}

const MODELS_WITH_1M_CONTEXT = new Set([
"claude-opus-4-6",
"claude-opus-4-7",
"claude-opus-4-8",
"claude-sonnet-4-6",
Expand All @@ -26,15 +21,12 @@ export function supports1MContext(modelId: string): boolean {
}

const MODELS_WITH_EFFORT = new Set([
"claude-opus-4-5",
"claude-opus-4-6",
"claude-opus-4-7",
"claude-opus-4-8",
"claude-sonnet-4-6",
]);

const MODELS_WITH_XHIGH_EFFORT = new Set([
"claude-opus-4-6",
"claude-opus-4-7",
"claude-opus-4-8",
]);
Expand Down Expand Up @@ -78,7 +70,7 @@ export function getEffortOptions(modelId: string): EffortOption[] | null {
}

// Model alias resolution — lets callers use human-friendly aliases like
// "opus" or "sonnet" instead of full model IDs like "claude-opus-4-6".
// "opus" or "sonnet" instead of full model IDs like "claude-opus-4-8".

const MODEL_CONTEXT_HINT_PATTERN = /\[(\d+m)\]$/i;

Expand Down Expand Up @@ -112,8 +104,8 @@ interface ModelOption {
}

// Captures a model family version such as `4-6` or `4.7` so we can keep
// `claude-opus-4-6` from being copied onto the SDK's `opus` alias when that
// alias currently resolves to a different family version (e.g. Opus 4.7).
// `claude-opus-4-7` from being copied onto the SDK's `opus` alias when that
// alias currently resolves to a different family version (e.g. Opus 4.8).
const MODEL_FAMILY_VERSION_PATTERN = /\b(\d+)[-.](\d+)\b/;

function extractModelFamilyVersion(s: string | undefined): string | null {
Expand Down
2 changes: 1 addition & 1 deletion packages/agent/src/adapters/claude/session/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ function buildHooks(
}

/**
* Read-only Haiku-powered exploration agent. Registered under the `ph-explore`
* Read-only exploration agent. Registered under the `ph-explore`
* name rather than `Explore` to work around a Claude Agent SDK bug where
* `options.agents` cannot shadow built-in agent definitions. The
* `createSubagentRewriteHook` rewrites `subagent_type: "Explore"` to
Expand Down
8 changes: 8 additions & 0 deletions packages/agent/src/adapters/codex/models.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { describe, expect, it } from "vitest";
import { formatCodexModelName } from "./models";

describe("formatCodexModelName", () => {
it("uses raw lowercase model ids", () => {
expect(formatCodexModelName("GPT-5.5")).toBe("gpt-5.5");
});
});
Loading
Loading