diff --git a/apps/web/src/app/api/telegram/webhook/route.test.ts b/apps/web/src/app/api/telegram/webhook/route.test.ts index e8f630a..1d56d4e 100644 --- a/apps/web/src/app/api/telegram/webhook/route.test.ts +++ b/apps/web/src/app/api/telegram/webhook/route.test.ts @@ -31,7 +31,8 @@ const { classifyTurnWithResponsesMock, summarizeConversationMemoryWithResponsesMock, respondToConversationTurnWithResponsesMock, - recoverConfirmedMutationWithResponsesMock + recoverConfirmedMutationWithResponsesMock, + extractSlotsWithResponsesMock } = vi.hoisted(() => ({ editTelegramMessageMock: vi.fn(), sendTelegramMessageMock: vi.fn(), @@ -40,7 +41,8 @@ const { classifyTurnWithResponsesMock: vi.fn(), summarizeConversationMemoryWithResponsesMock: vi.fn(), respondToConversationTurnWithResponsesMock: vi.fn(), - recoverConfirmedMutationWithResponsesMock: vi.fn() + recoverConfirmedMutationWithResponsesMock: vi.fn(), + extractSlotsWithResponsesMock: vi.fn() })); vi.mock("@atlas/integrations", async () => { @@ -84,6 +86,7 @@ vi.mock("@atlas/integrations", async () => { recoverConfirmedMutationWithResponses: recoverConfirmedMutationWithResponsesMock, classifyTurnWithResponses: classifyTurnWithResponsesMock, routeTurnWithResponses: routeTurnWithResponsesMock, + extractSlotsWithResponses: extractSlotsWithResponsesMock, sendTelegramChatAction: sendTelegramChatActionMock, sendTelegramMessage: sendTelegramMessageMock, summarizeConversationMemoryWithResponses: summarizeConversationMemoryWithResponsesMock @@ -283,6 +286,15 @@ beforeEach(async () => { summarizeConversationMemoryWithResponsesMock.mockReset(); respondToConversationTurnWithResponsesMock.mockReset(); recoverConfirmedMutationWithResponsesMock.mockReset(); + extractSlotsWithResponsesMock.mockReset(); + extractSlotsWithResponsesMock.mockResolvedValue({ + time: { hour: 9, minute: 0 }, + day: { kind: "relative", value: "tomorrow" }, + duration: null, + target: null, + confidence: { day: 0.95, time: 0.95 }, + unresolvable: [] + }); routeTurnWithResponsesMock.mockResolvedValue({ route: "mutation", reason: "Direct scheduling request." @@ -715,8 +727,16 @@ describe("telegram webhook route", () => { expect(listInboxItemsForTests()).toHaveLength(1); }); - it("normalizes a Telegram text message and hands it to app services", async () => { + it("normalizes a Telegram text message and routes to clarification when slots are missing", async () => { process.env.TELEGRAM_WEBHOOK_SECRET = "test-webhook-secret"; + extractSlotsWithResponsesMock.mockResolvedValueOnce({ + time: null, + day: null, + duration: null, + target: null, + confidence: {}, + unresolvable: [] + }); const response = await handleTelegramWebhook( buildRequest({ @@ -773,42 +793,23 @@ describe("telegram webhook route", () => { processingStatus: "received", linkedTaskIds: [] }, - processing: { - outcome: "planned" - }, - outboundDelivery: { - status: "edited", - attempts: 1 - } - }); - expect(response.body).toMatchObject({ - outboundDelivery: { - idempotencyKey: expect.any(String), - message: { - message_id: 88 + routing: { + interpretation: { + turnType: "planning_request", + ambiguity: "high" + }, + policy: { + action: "ask_clarification" } + }, + processing: { + outcome: "conversation_replied" } }); expect(listIncomingBotEventsForTests()).toHaveLength(1); - expect(listOutgoingBotEventsForTests()).toHaveLength(1); expect(listInboxItemsForTests()).toHaveLength(1); - expect(listPlannerRunsForTests()).toHaveLength(1); - expect(listTasksForTests()).toHaveLength(1); - expect(listScheduleBlocksForTests()).toHaveLength(1); - expect(sendTelegramChatActionMock).toHaveBeenCalledWith({ - chatId: "999", - action: "typing" - }); - expect(sendTelegramMessageMock).toHaveBeenCalledTimes(1); - expect(sendTelegramMessageMock).toHaveBeenCalledWith({ - chatId: "999", - text: "Checking your schedule" - }); - expect(editTelegramMessageMock).toHaveBeenCalledWith({ - chatId: "999", - messageId: 88, - text: expect.stringContaining("Scheduled 'Review launch checklist'") - }); + expect(listPlannerRunsForTests()).toHaveLength(0); + expect(listTasksForTests()).toHaveLength(0); }); it("preserves mutation behavior when the turn router explicitly returns mutation", async () => { @@ -865,6 +866,14 @@ describe("telegram webhook route", () => { it("does not keep clear scheduling requests in discuss-first mode", async () => { process.env.TELEGRAM_WEBHOOK_SECRET = "test-webhook-secret"; + extractSlotsWithResponsesMock.mockResolvedValueOnce({ + time: { hour: 18, minute: 0 }, + day: { kind: "relative", value: "tomorrow" }, + duration: { minutes: 60 }, + target: null, + confidence: { day: 0.95, time: 0.95, duration: 0.9 }, + unresolvable: [] + }); const response = await handleTelegramWebhook( buildRequest({ diff --git a/apps/web/src/lib/server/decide-turn-policy.ts b/apps/web/src/lib/server/decide-turn-policy.ts index e2b447c..82508d6 100644 --- a/apps/web/src/lib/server/decide-turn-policy.ts +++ b/apps/web/src/lib/server/decide-turn-policy.ts @@ -1,9 +1,10 @@ import { + containsWriteVerb, + deriveAmbiguity, + deriveConsentRequirement, type CommitPolicyOutput, - type ConversationEntity, type TurnAmbiguity, type TurnClassifierOutput, - type TurnInterpretationType, type TurnPolicyDecision, type TurnRoutingInput } from "@atlas/core"; @@ -33,7 +34,12 @@ type StructuredWriteReadiness = export function decideTurnPolicy(input: DecideTurnPolicyInput): TurnPolicyDecision { const { classification, commitResult } = input; const targetEntityId = classification.resolvedEntityIds[0]; - const ambiguity = deriveAmbiguity(classification, commitResult); + const ambiguity = deriveAmbiguity({ + classifierConfidence: classification.confidence, + missingSlots: commitResult.missingSlots, + needsClarification: commitResult.needsClarification, + blockingSlots: [] + }); switch (classification.turnType) { case "informational": @@ -109,17 +115,6 @@ export function decideTurnPolicy(input: DecideTurnPolicyInput): TurnPolicyDecisi } } -function deriveAmbiguity( - classification: TurnClassifierOutput, - commitResult: CommitPolicyOutput -): TurnAmbiguity { - if (classification.confidence < 0.6) return "high"; - if (commitResult.missingSlots.length > 0) return "high"; - if (commitResult.needsClarification.length > 0) return "high"; - if (classification.confidence < 0.8) return "low"; - return "none"; -} - function deriveStructuredWriteReadiness( input: DecideTurnPolicyInput, ambiguity: TurnAmbiguity @@ -152,8 +147,6 @@ function deriveStructuredWriteReadiness( }; } - // Bug 4 fix: if this is a clarification answer and the proposal is already confirmed, - // skip re-presenting and go straight to execution if (classification.turnType === "clarification_answer") { const entityRegistry = input.routingContext.entityRegistry ?? []; const alreadyConfirmed = entityRegistry.some( @@ -171,13 +164,19 @@ function deriveStructuredWriteReadiness( } } - const consentRequirement = deriveConsentRequirement(input); + const consentRequirement = deriveConsentRequirement({ + classification, + entityRegistry: input.routingContext.entityRegistry ?? [], + normalizedText: input.routingContext.normalizedText + }); if (consentRequirement.required) { return { state: "ready_needs_consent", reason: consentRequirement.reason, - ...(consentRequirement.targetProposalId ? { targetProposalId: consentRequirement.targetProposalId } : {}) + ...(consentRequirement.required && "targetProposalId" in consentRequirement + ? { targetProposalId: consentRequirement.targetProposalId } + : {}) }; } @@ -231,145 +230,3 @@ function buildPolicyFromStructuredReadiness( }; } } - -function deriveConsentRequirement(input: DecideTurnPolicyInput) { - const { classification } = input; - // Bug 2 fix: match "presented" status in addition to "active" - const activeProposal = (input.routingContext.entityRegistry ?? []).find( - (entity): entity is Extract => - entity.kind === "proposal_option" && - (entity.status === "active" || entity.status === "presented") && - entity.id === classification.resolvedProposalId && - entity.data.confirmationRequired === true - ); - - if (!activeProposal) { - return { - required: false, - reason: "Deterministic product rules do not require additional consent." - }; - } - - if (!matchesProposalTarget(activeProposal.data.targetEntityId ?? null, classification.resolvedEntityIds)) { - return { - required: false, - reason: "Deterministic product rules do not require additional consent." - }; - } - - const compatibility = deriveProposalCompatibility(input, activeProposal); - - if (!compatibility.compatible) { - return { - required: true, - reason: compatibility.reason - }; - } - - return { - required: true, - reason: "Write request is ready, but deterministic product policy still requires user consent.", - targetProposalId: activeProposal.id - }; -} - -function matchesProposalTarget(targetEntityId: string | null, resolvedEntityIds: string[]) { - if (!targetEntityId || resolvedEntityIds.length === 0) { - return true; - } - - return resolvedEntityIds.includes(targetEntityId); -} - -function deriveProposalCompatibility( - input: DecideTurnPolicyInput, - proposal: Extract -) { - if (input.classification.turnType === "clarification_answer") { - return { - compatible: true, - reason: "Clarification answers may continue the same consent-required proposal." - }; - } - - const currentActionKind = deriveActionKind(input.routingContext.normalizedText, input.classification.turnType); - const proposalActionKind = deriveActionKind( - proposal.data.originatingTurnText ?? proposal.data.replyText, - inferProposalTurnType(proposal) - ); - - if (currentActionKind !== proposalActionKind) { - return { - compatible: false, - reason: "The new turn changes the action type, so it needs fresh consent." - }; - } - - const currentFingerprint = deriveParameterFingerprint(input.routingContext.normalizedText); - const proposalFingerprint = deriveParameterFingerprint(proposal.data.originatingTurnText ?? proposal.data.replyText); - - if (currentFingerprint.explicit && proposalFingerprint.explicit && currentFingerprint.value !== proposalFingerprint.value) { - return { - compatible: false, - reason: "The new turn changes proposal parameters, so it needs fresh consent." - }; - } - - return { - compatible: true, - reason: "The pending proposal still matches the current turn." - }; -} - -function inferProposalTurnType( - proposal: Extract -): TurnInterpretationType { - const source = (proposal.data.originatingTurnText ?? proposal.data.replyText).toLowerCase(); - - if (/\b(move|reschedule|shift|push|pull|complete|archive|cancel|delete|update|change|mark)\b/.test(source)) { - return "edit_request"; - } - - return "planning_request"; -} - -function deriveActionKind(text: string, turnType: TurnInterpretationType) { - if (turnType === "edit_request") { - return "edit"; - } - - if (turnType === "planning_request") { - return "plan"; - } - - const lower = text.toLowerCase(); - - if (/\b(move|reschedule|shift|push|pull|complete|archive|cancel|delete|update|change|mark)\b/.test(lower)) { - return "edit"; - } - - return "plan"; -} - -function deriveParameterFingerprint(text: string) { - const lower = text.toLowerCase(); - const dayTokens = lower.match( - /\b(today|tonight|tomorrow|tmr|monday|tuesday|wednesday|thursday|friday|saturday|sunday|weekend|next week|next month|morning|afternoon|evening)\b/g - ) ?? []; - const timeTokens = - lower.match(/\b\d{1,2}(?::\d{2})?\s?(?:am|pm)?\b|\bnoon\b|\bmidnight\b/g) ?? []; - const durationTokens = - lower.match(/\bfor\s+\d+\s*(?:minutes?|mins?|hours?|hrs?)\b|\b\d+\s*(?:minutes?|mins?|hours?|hrs?)\b/g) ?? []; - const fingerprintParts = [...dayTokens, ...timeTokens, ...durationTokens].map((part) => part.trim()).sort(); - - return { - explicit: fingerprintParts.length > 0, - value: fingerprintParts.join("|") - }; -} - -function containsWriteVerb(text: string) { - return /\b(schedule|plan|move|reschedule|shift|create|add|book|put|mark|complete|archive|cancel|delete|change|update)\b/i.test( - text - ); -} diff --git a/apps/web/src/lib/server/interpret-turn.test.ts b/apps/web/src/lib/server/interpret-turn.test.ts deleted file mode 100644 index 2a89fc1..0000000 --- a/apps/web/src/lib/server/interpret-turn.test.ts +++ /dev/null @@ -1,403 +0,0 @@ -import { describe, expect, it, vi } from "vitest"; - -import type { TurnClassifierOutput } from "@atlas/core"; - -import { interpretTurn } from "./interpret-turn"; - -vi.mock("./llm-classifier", () => ({ - classifyTurn: vi.fn() -})); - -import { classifyTurn } from "./llm-classifier"; - -const mockClassifyTurn = vi.mocked(classifyTurn); - -function mockClassification(output: Partial) { - const full: TurnClassifierOutput = { - turnType: "unknown", - confidence: 0.5, - resolvedEntityIds: [], - ...output - }; - mockClassifyTurn.mockResolvedValue(full); -} - -describe("interpretTurn", () => { - it("exposes only the native input signature", () => { - expect(interpretTurn.length).toBe(1); - }); - - it("treats complete schedule requests as planning requests", async () => { - mockClassification({ - turnType: "planning_request", - confidence: 0.95 - }); - - const result = await interpretTurn({ - rawText: "Schedule gym tomorrow at 6pm for 1 hour", - normalizedText: "Schedule gym tomorrow at 6pm for 1 hour", - recentTurns: [] - }); - - expect(result).toMatchObject({ - turnType: "planning_request", - ambiguity: "none" - }); - expect(result.confidence).toBeGreaterThan(0.9); - }); - - it("treats focused move requests as edit requests", async () => { - mockClassification({ - turnType: "edit_request", - confidence: 0.9, - resolvedEntityIds: ["task-1"] - }); - - const result = await interpretTurn({ - rawText: "Move that to Friday at 3pm", - normalizedText: "Move that to Friday at 3pm", - recentTurns: [], - discourseState: { - focus_entity_id: "task-1", - currently_editable_entity_id: "task-1", - last_user_mentioned_entity_ids: [], - last_presented_items: [], - pending_clarifications: [], - mode: "editing" - } - }); - - expect(result).toMatchObject({ - turnType: "edit_request", - resolvedEntityIds: ["task-1"], - ambiguity: "none" - }); - }); - - it("treats yes with one pending proposal as confirmation", async () => { - // This case is handled by the heuristic pre-filter in classifyTurn, - // so the mock returns confirmation directly - mockClassification({ - turnType: "confirmation", - confidence: 0.97, - resolvedEntityIds: ["task-1"], - resolvedProposalId: "proposal-1" - }); - - const result = await interpretTurn({ - rawText: "Yes", - normalizedText: "Yes", - recentTurns: [], - entityRegistry: [ - { - id: "proposal-1", - conversationId: "conversation-1", - kind: "proposal_option", - label: "Schedule it at 3pm", - status: "active", - createdAt: "2026-03-20T16:00:00.000Z", - updatedAt: "2026-03-20T16:00:00.000Z", - data: { - route: "conversation_then_mutation", - replyText: "Would you like me to schedule it at 3pm?", - confirmationRequired: true, - targetEntityId: "task-1", - mutationInputSource: null, - originatingTurnText: "Schedule dentist reminder" - } - } - ] - }); - - expect(result).toMatchObject({ - turnType: "confirmation", - resolvedProposalId: "proposal-1", - resolvedEntityIds: ["task-1"], - ambiguity: "none" - }); - }); - - it("does not treat yes with multiple proposals as recoverable confirmation", async () => { - mockClassification({ - turnType: "unknown", - confidence: 0.36 - }); - - const result = await interpretTurn({ - rawText: "Yes", - normalizedText: "Yes", - recentTurns: [], - entityRegistry: [ - { - id: "proposal-1", - conversationId: "conversation-1", - kind: "proposal_option", - label: "Schedule it at 3pm", - status: "active", - createdAt: "2026-03-20T16:00:00.000Z", - updatedAt: "2026-03-20T16:00:00.000Z", - data: { - route: "conversation_then_mutation", - replyText: "Would you like me to schedule it at 3pm?", - confirmationRequired: true - } - }, - { - id: "proposal-2", - conversationId: "conversation-1", - kind: "proposal_option", - label: "Schedule it at 4pm", - status: "active", - createdAt: "2026-03-20T16:00:00.000Z", - updatedAt: "2026-03-20T16:00:00.000Z", - data: { - route: "conversation_then_mutation", - replyText: "Would you like me to schedule it at 4pm?", - confirmationRequired: true - } - } - ] - }); - - expect(result).toMatchObject({ - turnType: "unknown", - ambiguity: "high" - }); - }); - - it("does not treat yes with only clarification state as confirmation", async () => { - mockClassification({ - turnType: "unknown", - confidence: 0.32 - }); - - const result = await interpretTurn({ - rawText: "Yes", - normalizedText: "Yes", - recentTurns: [], - entityRegistry: [ - { - id: "clar-1", - conversationId: "conversation-1", - kind: "clarification", - label: "Need a time", - status: "active", - createdAt: "2026-03-20T16:00:00.000Z", - updatedAt: "2026-03-20T16:00:00.000Z", - data: { - prompt: "It sounds like you want me to block out time for planning your Malaysia trip at 3:15 PM for 15 minutes. I can proceed with that now.", - reason: null, - open: true - } - } - ], - discourseState: { - focus_entity_id: "clar-1", - currently_editable_entity_id: null, - last_user_mentioned_entity_ids: [], - last_presented_items: [], - pending_clarifications: [ - { - id: "clar-1", - slot: "unknown", - question: "It sounds like you want me to block out time for planning your Malaysia trip at 3:15 PM for 15 minutes. I can proceed with that now.", - status: "pending", - blocking: true, - createdAt: "2026-03-20T16:00:00.000Z", - createdTurnId: "assistant:1" - } - ], - mode: "clarifying" - } - }); - - expect(result).toMatchObject({ - turnType: "unknown", - ambiguity: "high" - }); - }); - - it("treats short replies to pending clarifications as clarification answers", async () => { - mockClassification({ - turnType: "clarification_answer", - confidence: 0.88 - }); - - const result = await interpretTurn({ - rawText: "Tomorrow afternoon", - normalizedText: "Tomorrow afternoon", - recentTurns: [], - discourseState: { - focus_entity_id: null, - currently_editable_entity_id: null, - last_user_mentioned_entity_ids: [], - last_presented_items: [], - pending_clarifications: [ - { - id: "clar-1", - slot: "time", - question: "What time tomorrow?", - status: "pending", - blocking: true, - createdAt: "2026-03-20T16:00:00.000Z", - createdTurnId: "assistant:1" - } - ], - mode: "clarifying" - } - }); - - expect(result).toMatchObject({ - turnType: "clarification_answer", - missingSlots: ["time"] - }); - }); - - it("clears blocking slots from missingSlots when clarification answer has high confidence", async () => { - mockClassification({ - turnType: "clarification_answer", - confidence: 0.91 - }); - - const result = await interpretTurn({ - rawText: "5pm", - normalizedText: "5pm", - recentTurns: [], - discourseState: { - focus_entity_id: null, - currently_editable_entity_id: null, - last_user_mentioned_entity_ids: [], - last_presented_items: [], - pending_clarifications: [ - { - id: "clar-1", - slot: "time", - question: "What time should I schedule it?", - status: "pending", - blocking: true, - createdAt: "2026-03-20T16:00:00.000Z", - createdTurnId: "assistant:1" - } - ], - mode: "clarifying" - } - }); - - expect(result).toMatchObject({ - turnType: "clarification_answer" - }); - // Blocking slots are still reported as missingSlots (commit policy handles resolution in Phase 4) - expect(result.missingSlots).toEqual(["time"]); - }); - - it("treats punctuated consent as confirmation when one active proposal exists", async () => { - mockClassification({ - turnType: "confirmation", - confidence: 0.97, - resolvedProposalId: "proposal-1" - }); - - const result = await interpretTurn({ - rawText: "Ok,", - normalizedText: "Ok,", - recentTurns: [], - entityRegistry: [ - { - id: "proposal-1", - conversationId: "conversation-1", - kind: "proposal_option", - label: "Schedule it at 5pm", - status: "active", - createdAt: "2026-03-20T16:00:00.000Z", - updatedAt: "2026-03-20T16:00:00.000Z", - data: { - route: "conversation_then_mutation", - replyText: "Would you like me to schedule it at 5pm?", - confirmationRequired: true, - targetEntityId: null, - mutationInputSource: null - } - } - ] - }); - - expect(result).toMatchObject({ - turnType: "confirmation", - resolvedProposalId: "proposal-1", - ambiguity: "none" - }); - }); - - it("treats informational questions as informational", async () => { - mockClassification({ - turnType: "informational", - confidence: 0.93 - }); - - const result = await interpretTurn({ - rawText: "What do I have tomorrow?", - normalizedText: "What do I have tomorrow?", - recentTurns: [] - }); - - expect(result).toMatchObject({ - turnType: "informational", - ambiguity: "none" - }); - }); - - it("treats ambiguous short replies without context as unknown", async () => { - mockClassification({ - turnType: "unknown", - confidence: 0.3 - }); - - const result = await interpretTurn({ - rawText: "Maybe", - normalizedText: "Maybe", - recentTurns: [] - }); - - expect(result).toMatchObject({ - turnType: "unknown", - ambiguity: "high" - }); - }); - - it("marks underspecified write turns with blocking clarification as high ambiguity", async () => { - mockClassification({ - turnType: "edit_request", - confidence: 0.7, - resolvedEntityIds: ["task-1"] - }); - - const result = await interpretTurn({ - rawText: "Move it", - normalizedText: "Move it", - recentTurns: [], - discourseState: { - focus_entity_id: "task-1", - currently_editable_entity_id: "task-1", - last_user_mentioned_entity_ids: [], - last_presented_items: [], - pending_clarifications: [ - { - id: "clar-1", - slot: "time", - question: "What time should I move it to?", - status: "pending", - blocking: true, - createdAt: "2026-03-20T16:00:00.000Z", - createdTurnId: "assistant:1" - } - ], - mode: "clarifying" - } - }); - - expect(result).toMatchObject({ - ambiguity: "high", - missingSlots: ["time"] - }); - }); -}); diff --git a/apps/web/src/lib/server/interpret-turn.ts b/apps/web/src/lib/server/interpret-turn.ts deleted file mode 100644 index 59d9228..0000000 --- a/apps/web/src/lib/server/interpret-turn.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { - createEmptyDiscourseState, - getActivePendingClarifications, - type TurnAmbiguity, - type TurnInterpretation, - type TurnRoutingInput -} from "@atlas/core"; - -import { classifyTurn } from "./llm-classifier"; - -export type InterpretTurnInput = TurnRoutingInput; - -export async function interpretTurn(input: InterpretTurnInput): Promise { - const parsedInput = parseInterpretTurnInput(input); - const discourseState = parsedInput.discourseState ?? createEmptyDiscourseState(); - const activeClarifications = getActivePendingClarifications(discourseState); - const blockingSlots = unique( - activeClarifications.filter((c) => c.blocking).map((c) => c.slot) - ); - - const classification = await classifyTurn({ - normalizedText: parsedInput.normalizedText, - discourseState, - entityRegistry: parsedInput.entityRegistry - }); - - const ambiguity = deriveAmbiguityFromConfidence(classification.confidence, blockingSlots); - - return { - turnType: classification.turnType, - confidence: classification.confidence, - resolvedEntityIds: classification.resolvedEntityIds, - ...(classification.resolvedProposalId ? { resolvedProposalId: classification.resolvedProposalId } : {}), - ambiguity, - ...(ambiguity !== "none" - ? { ambiguityReason: deriveAmbiguityReason(classification.turnType, ambiguity, blockingSlots) } - : {}), - ...(blockingSlots.length > 0 ? { missingSlots: blockingSlots } : {}) - }; -} - -function parseInterpretTurnInput(input: InterpretTurnInput): InterpretTurnInput { - if (!input.rawText.trim() || !input.normalizedText.trim()) { - throw new Error("Turn interpretation input must include non-empty rawText and normalizedText."); - } - - return input; -} - -function deriveAmbiguityFromConfidence(confidence: number, blockingSlots: string[]): TurnAmbiguity { - if (blockingSlots.length > 0) return "high"; - if (confidence < 0.6) return "high"; - if (confidence < 0.8) return "low"; - return "none"; -} - -function deriveAmbiguityReason( - turnType: TurnInterpretation["turnType"], - ambiguity: TurnAmbiguity, - blockingSlots: string[] -): string { - if (blockingSlots.length > 0) { - return "Blocking clarification slots are still open."; - } - - if (ambiguity === "high") { - switch (turnType) { - case "unknown": - return "The turn did not map cleanly to a recognized intent pattern."; - case "clarification_answer": - return "Clarification answer classification has low confidence."; - default: - return "Classification confidence is too low for reliable routing."; - } - } - - return "Classification confidence is moderate."; -} - -function unique(values: string[]) { - return Array.from(new Set(values)); -} diff --git a/apps/web/src/lib/server/turn-router.test.ts b/apps/web/src/lib/server/turn-router.test.ts index 9c3cb80..06d9495 100644 --- a/apps/web/src/lib/server/turn-router.test.ts +++ b/apps/web/src/lib/server/turn-router.test.ts @@ -17,8 +17,10 @@ vi.mock("./slot-extractor", () => ({ })); import { classifyTurn } from "./llm-classifier"; +import { extractSlots } from "./slot-extractor"; const mockClassifyTurn = vi.mocked(classifyTurn); +const mockExtractSlots = vi.mocked(extractSlots); function mockClassification(output: Partial) { const full: TurnClassifierOutput = { @@ -36,6 +38,11 @@ describe("turn router", () => { turnType: "planning_request", confidence: 0.95 }); + mockExtractSlots.mockResolvedValueOnce({ + extractedValues: { day: "tomorrow", time: "18:00" }, + confidence: { day: 0.95, time: 0.95 }, + unresolvable: [] + }); const result = await routeMessageTurn({ rawText: "Schedule gym tomorrow at 6pm for 1 hour", diff --git a/apps/web/src/lib/server/turn-router.ts b/apps/web/src/lib/server/turn-router.ts index 947b189..c0ff967 100644 --- a/apps/web/src/lib/server/turn-router.ts +++ b/apps/web/src/lib/server/turn-router.ts @@ -7,6 +7,7 @@ import { type DiscourseState, type RoutedTurn, type SlotKey, + deriveAmbiguity, type TurnAmbiguity, type TurnClassifierOutput, type TurnInterpretation, @@ -14,7 +15,8 @@ import { type TurnRoute, type TurnRoutingInput, type WriteContract, - type CommitPolicyOutput + type CommitPolicyOutput, + SLOT_COMMITTING_TURN_TYPES } from "@atlas/core"; import { decideTurnPolicy } from "./decide-turn-policy"; @@ -24,14 +26,8 @@ import { extractSlots } from "./slot-extractor"; export type TurnRouterInput = TurnRoutingInput; export type TurnRouterResult = RoutedTurn; -const SLOT_COMMITTING_TURN_TYPES = new Set([ - "clarification_answer", - "planning_request", - "edit_request" -]); - const DEFAULT_CONTRACT: WriteContract = { - requiredSlots: [], + requiredSlots: ["day", "time"], intentKind: "plan" }; @@ -50,13 +46,10 @@ export async function routeMessageTurn(input: TurnRouterInput): Promise resolved[slot] === undefined); +function derivePendingSlots(contract: WriteContract, resolvedSlots: Record): SlotKey[] { + return contract.requiredSlots.filter((slot) => resolvedSlots[slot] === undefined); } function deriveConversationContext(recentTurns: ConversationTurn[]): string { @@ -114,7 +104,12 @@ function buildInterpretation( ...commitResult.needsClarification, ...blockingSlots ]); - const ambiguity = deriveAmbiguity(classification, commitResult, blockingSlots); + const ambiguity = deriveAmbiguity({ + classifierConfidence: classification.confidence, + missingSlots: commitResult.missingSlots, + needsClarification: commitResult.needsClarification, + blockingSlots + }); return { turnType: classification.turnType, @@ -129,19 +124,6 @@ function buildInterpretation( }; } -function deriveAmbiguity( - classification: TurnClassifierOutput, - commitResult: CommitPolicyOutput, - blockingSlots: string[] -): TurnAmbiguity { - if (blockingSlots.length > 0) return "high"; - if (classification.confidence < 0.6) return "high"; - if (commitResult.missingSlots.length > 0) return "high"; - if (commitResult.needsClarification.length > 0) return "high"; - if (classification.confidence < 0.8) return "low"; - return "none"; -} - function deriveAmbiguityReason( turnType: TurnInterpretation["turnType"], ambiguity: TurnAmbiguity, diff --git a/docs/current-work.md b/docs/current-work.md index 17e784c..bdb5964 100644 --- a/docs/current-work.md +++ b/docs/current-work.md @@ -266,20 +266,49 @@ DB rollout hardening is now part of the active release path: `packages/db` owns ## Next Handoff -- Next implementation task: stabilize the planner contract and prompt iteration loop now that the live eval harness is in place. -- Start by using `pnpm eval:planner` and `pnpm eval:all` as the gating loop for any planner or router change. -- Keep the next slice focused: - - eliminate remaining planner output flakiness around `scheduleConstraint` shape so live evals do not intermittently fail on otherwise valid scheduling intent - - add deterministic contract coverage for the planner normalization path that converts the API-facing response shape into the strict runtime schema - - verify the generated `*.prompt-improvement.md` artifacts are useful in practice on at least one real failing eval case, and refine the brief format if it is too noisy or too narrow - - keep `manual-eval-report.json` as the canonical full-suite artifact and suite-specific report/brief files as targeted iteration tools only -- After that slice, return to the locked-down Google Calendar production-readiness work: +PR #53 (`codex/turn-routing-refactor`) implements the 3-layer turn routing pipeline (classify → extract → commit). The 4 multi-turn loop bugs are fixed. A code review surfaced the following issues to address before merge: + +### Medium — fix before merge + +1. **`interpretTurn.ts` is now a divergent partial pipeline.** `turn-router.ts` runs the full 3-layer pipeline and builds `TurnInterpretation` via `buildInterpretation()`. `interpretTurn.ts` does its own classification + ambiguity derivation without slot extraction or commit policy. The two files derive ambiguity differently — `turn-router.ts` considers `commitResult.missingSlots` and `needsClarification`, `interpretTurn.ts` does not. Either consolidate into one entry point or remove `interpretTurn` if nothing calls it directly anymore. + +2. **`SLOT_COMMITTING_TURN_TYPES` is duplicated** in `packages/core/src/commit-policy.ts:20` and `apps/web/src/lib/server/turn-router.ts:27`. Export from one location (core) and import in the other. If they diverge, the pipeline gate breaks silently. + +3. **No hour/minute range validation in `slot-normalizer.ts`.** The zod schema validates `z.number().int()` but not 0–23 for hour or 0–59 for minute. If the LLM returns `{ hour: 25, minute: 70 }`, it becomes `"25:70"` — an invalid time committed as a resolved slot. Add range constraints to the schema or a guard in the normalizer. + +4. **Business logic growing in `decide-turn-policy.ts`** (`deriveConsentRequirement`, `deriveProposalCompatibility`, `deriveParameterFingerprint`, `deriveActionKind`) — these are product-level decision functions that should live in `packages/core` per the architecture rules. The file grew from ~70 to ~375 lines. + +5. **Slot extraction is gated on `pending_write_contract` being truthy** (`turn-router.ts:53-55`), not just on turn type. The first planning request in a conversation (before any contract is set) skips extraction entirely. `DEFAULT_CONTRACT` is defined but only used for `applyCommitPolicy`. This may cause the initial "schedule gym tomorrow at 6pm" to get no extracted slots. Decide whether this is intentional or whether extraction should run against `DEFAULT_CONTRACT` when no contract exists. + +### Low — address in follow-up + +6. **`resolved_slots` is `.optional()` in the discourse state schema** but `createEmptyDiscourseState` always initializes it to `{}`. Defensive `?? {}` fallbacks are scattered throughout. Make it required in the schema. + +7. **Nullable confidence types in schema vs non-nullable in policy.** `slotConfidenceSchema` uses `.nullable().optional()` per slot but `CommitPolicyInput` types confidence as `Partial>`. The `compactConfidence` bridge function handles the mismatch, but the schema should be non-nullable to match. + +8. **`TurnTrace` and `_provenance` from the spec are not implemented.** The observability and provenance section calls for fire-and-forget turn traces and a provenance map on `ResolvedSlots`. These may be intentionally deferred but should be tracked. + +9. **No integration test for multi-turn slot accumulation.** Commit policy tests verify slot preservation in isolation, but no test exercises the full path where `committedSlots` are persisted through `conversation-state.ts` and used as `priorResolvedSlots` in a subsequent turn. + +10. **`deriveParameterFingerprint` regex** (`\b\d{1,2}(?::\d{2})?\s?(?:am|pm)?\b`) matches bare numbers like "3" in any context, which may trigger unnecessary re-consent in proposal compatibility checks. + +### Recommended merge sequence + +1. Fix items 1–5 on the existing branch +2. Run `pnpm typecheck && pnpm --filter @atlas/core test && pnpm --filter @atlas/web test` +3. Merge PR #53 +4. Open follow-up issue for items 6–10 + +### After merge — next priorities + +- Stabilize the planner contract and prompt iteration loop using `pnpm eval:planner` and `pnpm eval:all` as the gating loop +- Return to locked-down Google Calendar production-readiness work: - validate the Telegram-to-browser Google link handoff in a real deployed environment - finish disconnect/revocation UX and operator visibility for linked accounts - - decide and implement retention jobs for `bot_events` and `planner_runs` rather than leaving them as undocumented indefinite operational history + - decide and implement retention jobs for `bot_events` and `planner_runs` - add abuse controls and rate limiting around the remaining public surfaces - - refactor `telegram-webhook.ts` so pre-ingress Google-link gating is a small app-layer helper instead of inline policy inside the main webhook function - - split `google-calendar.ts` into narrower app services rather than keeping link flow, runtime adapter resolution, token refresh, and reconciliation in one module + - refactor `telegram-webhook.ts` so pre-ingress Google-link gating is a small app-layer helper + - split `google-calendar.ts` into narrower app services - separate pre-ingress lazy-link replies from inbox-linked follow-up delivery semantics ### Locked runtime semantics diff --git a/packages/core/src/ambiguity.ts b/packages/core/src/ambiguity.ts new file mode 100644 index 0000000..ba68857 --- /dev/null +++ b/packages/core/src/ambiguity.ts @@ -0,0 +1,17 @@ +import type { TurnAmbiguity } from "./index"; + +export type DeriveAmbiguityInput = { + classifierConfidence: number; + missingSlots: string[]; + needsClarification: string[]; + blockingSlots: string[]; +}; + +export function deriveAmbiguity(input: DeriveAmbiguityInput): TurnAmbiguity { + if (input.blockingSlots.length > 0) return "high"; + if (input.classifierConfidence < 0.6) return "high"; + if (input.missingSlots.length > 0) return "high"; + if (input.needsClarification.length > 0) return "high"; + if (input.classifierConfidence < 0.8) return "low"; + return "none"; +} \ No newline at end of file diff --git a/packages/core/src/commit-policy.ts b/packages/core/src/commit-policy.ts index c50f092..f9250cd 100644 --- a/packages/core/src/commit-policy.ts +++ b/packages/core/src/commit-policy.ts @@ -16,7 +16,7 @@ export type CommitPolicyOutput = { missingSlots: SlotKey[]; }; -const SLOT_COMMITTING_TURN_TYPES = new Set([ +export const SLOT_COMMITTING_TURN_TYPES = new Set([ "clarification_answer", "planning_request", "edit_request" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index fb07974..3880a68 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -7,8 +7,10 @@ import { resolvedSlotsSchema } from "./discourse-state"; +export * from "./ambiguity"; export * from "./commit-policy"; export * from "./discourse-state"; +export * from "./proposal-rules"; export * from "./slot-normalizer"; export * from "./telegram"; @@ -850,12 +852,12 @@ const slotConfidenceSchema = z.object({ }); export const rawSlotExtractionSchema = z.object({ - time: z.object({ hour: z.number().int(), minute: z.number().int() }).nullable(), + time: z.object({ hour: z.number().int().min(0).max(23), minute: z.number().int().min(0).max(59) }).nullable(), day: z.object({ kind: z.enum(["relative", "weekday", "absolute"]), value: z.string() }).nullable(), - duration: z.object({ minutes: z.number().int() }).nullable(), + duration: z.object({ minutes: z.number().int().min(0) }).nullable(), target: z.object({ entityId: z.string() }).nullable(), confidence: slotConfidenceSchema, unresolvable: z.array(slotKeySchema) diff --git a/packages/core/src/proposal-rules.ts b/packages/core/src/proposal-rules.ts new file mode 100644 index 0000000..5de994c --- /dev/null +++ b/packages/core/src/proposal-rules.ts @@ -0,0 +1,159 @@ +import type { + ConversationEntity, + TurnClassifierOutput, + TurnInterpretationType +} from "./index"; + +type ProposalOption = Extract; + +export type ConsentRequirementInput = { + classification: TurnClassifierOutput; + entityRegistry: ConversationEntity[]; + normalizedText: string; +}; + +export function deriveConsentRequirement(input: ConsentRequirementInput) { + const { classification } = input; + const activeProposal = input.entityRegistry.find( + (entity): entity is ProposalOption => + entity.kind === "proposal_option" && + (entity.status === "active" || entity.status === "presented") && + entity.id === classification.resolvedProposalId && + entity.data.confirmationRequired === true + ); + + if (!activeProposal) { + return { + required: false as const, + reason: "Deterministic product rules do not require additional consent." + }; + } + + if (!matchesProposalTarget(activeProposal.data.targetEntityId ?? null, classification.resolvedEntityIds)) { + return { + required: false as const, + reason: "Deterministic product rules do not require additional consent." + }; + } + + const compatibility = deriveProposalCompatibility( + classification.turnType, + input.normalizedText, + activeProposal + ); + + if (!compatibility.compatible) { + return { + required: true as const, + reason: compatibility.reason + }; + } + + return { + required: true as const, + reason: "Write request is ready, but deterministic product policy still requires user consent.", + targetProposalId: activeProposal.id + }; +} + +export function matchesProposalTarget(targetEntityId: string | null, resolvedEntityIds: string[]) { + if (!targetEntityId || resolvedEntityIds.length === 0) { + return true; + } + + return resolvedEntityIds.includes(targetEntityId); +} + +export function deriveProposalCompatibility( + turnType: TurnInterpretationType, + normalizedText: string, + proposal: ProposalOption +) { + if (turnType === "clarification_answer") { + return { + compatible: true, + reason: "Clarification answers may continue the same consent-required proposal." + }; + } + + const currentActionKind = deriveActionKind(normalizedText, turnType); + const proposalActionKind = deriveActionKind( + proposal.data.originatingTurnText ?? proposal.data.replyText, + inferProposalTurnType(proposal) + ); + + if (currentActionKind !== proposalActionKind) { + return { + compatible: false, + reason: "The new turn changes the action type, so it needs fresh consent." + }; + } + + const currentFingerprint = deriveParameterFingerprint(normalizedText); + const proposalFingerprint = deriveParameterFingerprint(proposal.data.originatingTurnText ?? proposal.data.replyText); + + if (currentFingerprint.explicit && proposalFingerprint.explicit && currentFingerprint.value !== proposalFingerprint.value) { + return { + compatible: false, + reason: "The new turn changes proposal parameters, so it needs fresh consent." + }; + } + + return { + compatible: true, + reason: "The pending proposal still matches the current turn." + }; +} + +export function inferProposalTurnType( + proposal: ProposalOption +): TurnInterpretationType { + const source = (proposal.data.originatingTurnText ?? proposal.data.replyText).toLowerCase(); + + if (/\b(move|reschedule|shift|push|pull|complete|archive|cancel|delete|update|change|mark)\b/.test(source)) { + return "edit_request"; + } + + return "planning_request"; +} + +export function deriveActionKind(text: string, turnType: TurnInterpretationType) { + if (turnType === "edit_request") { + return "edit"; + } + + if (turnType === "planning_request") { + return "plan"; + } + + const lower = text.toLowerCase(); + + if (/\b(move|reschedule|shift|push|pull|complete|archive|cancel|delete|update|change|mark)\b/.test(lower)) { + return "edit"; + } + + return "plan"; +} + +export function deriveParameterFingerprint(text: string) { + const lower = text.toLowerCase(); + const dayTokens = lower.match( + /\b(today|tonight|tomorrow|tmr|monday|tuesday|wednesday|thursday|friday|saturday|sunday|weekend|next week|next month|morning|afternoon|evening)\b/g + ) ?? []; + const timeTokens = + lower.match(/\b\d{1,2}(?::\d{2})?\s?(?:am|pm)?\b|\bnoon\b|\bmidnight\b/g) ?? []; + const durationTokens = + lower.match(/\bfor\s+\d+\s*(?:minutes?|mins?|hours?|hrs?)\b|\b\d+\s*(?:minutes?|mins?|hours?|hrs?)\b/g) ?? []; + const fingerprintParts = [...dayTokens, ...timeTokens, ...durationTokens].map((part) => part.trim()).sort(); + + return { + explicit: fingerprintParts.length > 0, + value: fingerprintParts.join("|") + }; +} + +export function containsWriteVerb(text: string) { + return /\b(schedule|plan|move|reschedule|shift|create|add|book|put|mark|complete|archive|cancel|delete|change|update)\b/i.test( + text + ); +} diff --git a/packages/core/src/slot-normalizer.test.ts b/packages/core/src/slot-normalizer.test.ts index b398d48..c10d4d3 100644 --- a/packages/core/src/slot-normalizer.test.ts +++ b/packages/core/src/slot-normalizer.test.ts @@ -46,6 +46,22 @@ describe("normalizeRawExtraction", () => { }); }); + it("drops time when hour is out of range", () => { + expect(normalizeRawExtraction(extraction({ time: { hour: 25, minute: 0 } }))).toEqual({}); + }); + + it("drops time when minute is out of range", () => { + expect(normalizeRawExtraction(extraction({ time: { hour: 10, minute: 70 } }))).toEqual({}); + }); + + it("drops time when hour is negative", () => { + expect(normalizeRawExtraction(extraction({ time: { hour: -1, minute: 0 } }))).toEqual({}); + }); + + it("drops duration when minutes is negative", () => { + expect(normalizeRawExtraction(extraction({ duration: { minutes: -30 } }))).toEqual({}); + }); + it("returns empty object when no slots present", () => { expect(normalizeRawExtraction(extraction({}))).toEqual({}); }); diff --git a/packages/core/src/slot-normalizer.ts b/packages/core/src/slot-normalizer.ts index d7f1c4c..7720843 100644 --- a/packages/core/src/slot-normalizer.ts +++ b/packages/core/src/slot-normalizer.ts @@ -3,7 +3,7 @@ import type { RawSlotExtraction, ResolvedSlots } from "./index"; export function normalizeRawExtraction(raw: RawSlotExtraction): Partial { const result: Partial = {}; - if (raw.time) { + if (raw.time && raw.time.hour >= 0 && raw.time.hour <= 23 && raw.time.minute >= 0 && raw.time.minute <= 59) { result.time = `${String(raw.time.hour).padStart(2, "0")}:${String(raw.time.minute).padStart(2, "0")}`; } @@ -11,7 +11,7 @@ export function normalizeRawExtraction(raw: RawSlotExtraction): Partial= 0) { result.duration = raw.duration.minutes; }