diff --git a/apps/website/app/api/embeddings/openai/small/route.ts b/apps/website/app/api/embeddings/route.ts similarity index 55% rename from apps/website/app/api/embeddings/openai/small/route.ts rename to apps/website/app/api/embeddings/route.ts index 11da80051..faea18b80 100644 --- a/apps/website/app/api/embeddings/openai/small/route.ts +++ b/apps/website/app/api/embeddings/route.ts @@ -1,47 +1,25 @@ import { NextRequest, NextResponse } from "next/server"; -import OpenAI from "openai"; import cors from "~/utils/llm/cors"; - -const apiKey = process.env.OPENAI_API_KEY; - -if (!apiKey) { - console.error( - "Missing OPENAI_API_KEY environment variable. The embeddings API will not function.", - ); -} - -const openai = apiKey ? new OpenAI({ apiKey }) : null; +import { genericEmbedding } from "~/utils/supabase/apiUtils"; type RequestBody = { input: string | string[]; model?: string; dimensions?: number; + provider?: string; encoding_format?: "float" | "base64"; }; -const OPENAI_REQUEST_TIMEOUT_MS = 30000; - export const POST = async (req: NextRequest): Promise => { let response: NextResponse; - if (!apiKey) { - response = NextResponse.json( - { - error: "Server configuration error.", - details: "Embeddings service is not configured.", - }, - { status: 500 }, - ); - return cors(req, response) as NextResponse; - } - try { const body: RequestBody = await req.json(); const { input, model = "text-embedding-3-small", dimensions, - encoding_format = "float", + provider = "openai", } = body; if (!input || (Array.isArray(input) && input.length === 0)) { @@ -52,27 +30,20 @@ export const POST = async (req: NextRequest): Promise => { return cors(req, response) as NextResponse; } - const options: OpenAI.EmbeddingCreateParams = { - model, + const embeddings = await genericEmbedding( input, + model, + provider, dimensions, - encoding_format, - }; - - const embeddingsPromise = openai!.embeddings.create(options); - const timeoutPromise = new Promise((_, reject) => - setTimeout( - () => reject(new Error("OpenAI API request timeout")), - OPENAI_REQUEST_TIMEOUT_MS, - ), ); - - const openAIResponse = (await Promise.race([ - embeddingsPromise, - timeoutPromise, - ])) as OpenAI.CreateEmbeddingResponse; - - response = NextResponse.json(openAIResponse, { status: 200 }); + if (embeddings === undefined) + response = NextResponse.json( + { + error: "Failed to generate embeddings.", + }, + { status: 500 }, + ); + else response = NextResponse.json(embeddings, { status: 200 }); } catch (error: unknown) { console.error("Error calling OpenAI Embeddings API:", error); const errorMessage = diff --git a/apps/website/app/api/supabase/rpc/search-content/route.ts b/apps/website/app/api/supabase/rpc/search-content/route.ts new file mode 100644 index 000000000..fc8a70931 --- /dev/null +++ b/apps/website/app/api/supabase/rpc/search-content/route.ts @@ -0,0 +1,188 @@ +import { createClient } from "~/utils/supabase/server"; +import { NextResponse, NextRequest } from "next/server"; +import type { SupabaseClient } from "@supabase/supabase-js"; +import cors from "~/utils/llm/cors"; +import type { Database } from "@repo/database/types.gen.ts"; +import { get_known_embedding } from "~/utils/supabase/dbUtils"; +import { genericEmbedding } from "~/utils/supabase/apiUtils"; + +type RequestBody = { + currentDocumentId?: number; + queryEmbedding?: number[]; + queryText?: string; + subsetPlatformIds?: string[]; + provider?: string; + model?: string; + dimensions?: number; + limit?: number; + threshold?: number; +}; + +type RpcResponseItem = + Database["public"]["Functions"]["match_content_embeddings"]["Returns"]; + +async function callMatchEmbeddingsRpc( + supabase: SupabaseClient, + query: RequestBody, +): Promise<{ data?: RpcResponseItem; error?: string }> { + const { + currentDocumentId, + queryEmbedding, + queryText, + subsetPlatformIds, + provider = "openai", + model = "text-embedding-3-small", + dimensions = 1536, + limit = 20, + threshold = 0.8, + } = query; + let embedding: number[]; + const table_data = get_known_embedding(model, dimensions, provider); + if (table_data === undefined) { + return { + error: "Invalid model information", + }; + } + if ( + !queryEmbedding || + !Array.isArray(queryEmbedding) || + queryEmbedding.length === 0 + ) { + if (!queryText) { + return { + error: "Provide either query text or embedding", + }; + } + const newEmbedding = await genericEmbedding( + queryText, + model, + provider, + dimensions, + ); + if ( + newEmbedding !== undefined && + newEmbedding.length && + !Array.isArray(newEmbedding[0]) + ) { + embedding = newEmbedding as number[]; + } else { + return { + error: "Could not get the embedding for this text", + }; + } + } else { + if (queryText) { + // TODO: Allow hybrid search + return { + error: "Do not provide both query text and embedding", + }; + } + if (queryEmbedding.length !== dimensions) { + return { + error: "Wrong dimensionality", + }; + } + embedding = queryEmbedding; + } + if (subsetPlatformIds !== undefined) { + if (currentDocumentId !== undefined) { + return { + error: "Do not define both currentDocumentId and subsetPlatformIds", + }; + } + if (!Array.isArray(subsetPlatformIds)) { + console.log( + "[API Route] callMatchEmbeddingsRpc: Invalid subsetPlatformIds.", + ); + return { error: "Invalid subsetPlatformIds" }; + } + + // If subsetPlatformIds is empty, the RPC might not find anything or error, + // depending on its implementation. It might be more efficient to return early. + if (subsetPlatformIds.length === 0) { + console.log( + "[API Route] callMatchEmbeddingsRpc: subsetPlatformIds is empty, returning empty array without calling RPC.", + ); + return { data: [] }; // Return empty array, no need to call RPC + } + + const response = await supabase.rpc("match_embeddings_for_subset_nodes", { + p_query_embedding: JSON.stringify(embedding), + p_subset_roam_uids: subsetPlatformIds, + }); + return { data: response.data || undefined, error: response.error?.message }; + } else { + const response = await supabase.rpc("match_content_embeddings", { + current_document_id: currentDocumentId, + match_count: limit, + match_threshold: threshold, + query_embedding: JSON.stringify(embedding), + }); + return { data: response.data || undefined, error: response.error?.message }; + } +} + +export async function POST(request: NextRequest) { + console.log("[API Route] POST /api/supabase/rpc/search: Request received"); + const supabase = await createClient(); + let response: NextResponse; + + try { + const body: RequestBody = await request.json(); + console.log("[API Route] POST: Parsed request body:", body); + + console.log("[API Route] POST: Calling callMatchEmbeddingsRpc."); + const { data, error } = await callMatchEmbeddingsRpc(supabase, body); + console.log("[API Route] POST: Received from callMatchEmbeddingsRpc:", { + dataLength: data?.length, + error, + }); + + if (error) { + console.error( + "[API Route] POST: Error after callMatchEmbeddingsRpc:", + error, + ); + const statusCode = error?.includes("Invalid") ? 400 : 500; + response = NextResponse.json( + { + error: error || "Failed to match embeddings via RPC.", + }, + { status: statusCode }, + ); + } else { + console.log( + "[API Route] POST: Successfully processed request. Sending data back. Data length:", + data?.length, + ); + response = NextResponse.json(data, { status: 200 }); + } + } catch (e: any) { + console.error( + "[API Route] POST: Exception in POST handler:", + e.message, + e.stack, + ); + if (e instanceof SyntaxError && e.message.toLowerCase().includes("json")) { + response = NextResponse.json( + { error: "Invalid JSON in request body" }, + { status: 400 }, + ); + } else { + response = NextResponse.json( + { error: "An unexpected error occurred processing your request." }, + { status: 500 }, + ); + } + } + console.log( + "[API Route] POST: Sending final response with status:", + response.status, + ); + return cors(request, response) as NextResponse; +} + +export async function OPTIONS(request: NextRequest) { + const response = new NextResponse(null, { status: 204 }); + return cors(request, response) as NextResponse; +} diff --git a/apps/website/app/utils/supabase/apiUtils.ts b/apps/website/app/utils/supabase/apiUtils.ts index d4ce8950c..9f22c7c56 100644 --- a/apps/website/app/utils/supabase/apiUtils.ts +++ b/apps/website/app/utils/supabase/apiUtils.ts @@ -7,6 +7,7 @@ import { import { Database } from "@repo/database/types.gen.ts"; import { createClient } from "~/utils/supabase/server"; import cors from "~/utils/llm/cors"; +import OpenAI from "openai"; /** * Sends a standardized JSON response. @@ -73,6 +74,16 @@ export const handleRouteError = ( return createApiResponse(request, asPostgrestFailure(message, "invalid")); }; +export const openaiApiKey = process.env.OPENAI_API_KEY; + +if (!openaiApiKey) { + console.error( + "Missing OPENAI_API_KEY environment variable. The embeddings API will not function.", + ); +} + +const openai = openaiApiKey ? new OpenAI({ apiKey: openaiApiKey }) : null; + /** * Default OPTIONS handler for CORS preflight requests. */ @@ -154,3 +165,48 @@ export const asPostgrestFailure = ( status, }; }; + +const OPENAI_REQUEST_TIMEOUT_MS = 30000; + +const openaiEmbedding = async ( + input: string | string[], + model: string, + dimensions?: number, +): Promise => { + if (!openai) { + throw new Error("OpenAI client not initialized. Check OPENAI_API_KEY."); + } + + let options: OpenAI.EmbeddingCreateParams = { + model, + input, + }; + if (dimensions) { + options = { ...options, ...{ dimensions } }; + } + + const embeddingsPromise = openai!.embeddings.create(options); + const timeoutPromise = new Promise((_, reject) => + setTimeout( + () => reject(new Error("OpenAI API request timeout")), + OPENAI_REQUEST_TIMEOUT_MS, + ), + ); + + const response = await Promise.race([embeddingsPromise, timeoutPromise]); + const embeddings = response.data.map((d) => d.embedding); + if (Array.isArray(input)) return embeddings; + else return embeddings[0]; +}; + +export const genericEmbedding = async ( + input: string | string[], + model: string, + provider: string, + dimensions?: number, +): Promise => { + provider = provider || "openai"; + if (provider == "openai") { + return await openaiEmbedding(input, model, dimensions); + } +}; diff --git a/apps/website/app/utils/supabase/dbUtils.ts b/apps/website/app/utils/supabase/dbUtils.ts index 6841b797e..e4f3db1e5 100644 --- a/apps/website/app/utils/supabase/dbUtils.ts +++ b/apps/website/app/utils/supabase/dbUtils.ts @@ -5,11 +5,13 @@ import type { } from "@supabase/supabase-js"; import { Database, Tables, TablesInsert } from "@repo/database/types.gen.ts"; +export type EmbeddingTableData = { + tableName: keyof Database["public"]["Tables"]; + tableSize: number; +}; + export const KNOWN_EMBEDDING_TABLES: { - [key: string]: { - tableName: keyof Database["public"]["Tables"]; - tableSize: number; - }; + [key: string]: EmbeddingTableData; } = { openai_text_embedding_3_small_1536: { tableName: "ContentEmbedding_openai_text_embedding_3_small_1536", @@ -17,6 +19,20 @@ export const KNOWN_EMBEDDING_TABLES: { }, }; +const KNOWN_EMBEDDINGS: { [key: string]: string } = { + "openai-text-embedding-3-small-1536": "openai_text_embedding_3_small_1536", +}; + +export function get_known_embedding( + model: string, + dimensions: number, + provider: string, +): EmbeddingTableData | undefined { + const embeddingName = + KNOWN_EMBEDDINGS[`${provider || "openai"}-${model}-${dimensions}`]; + return KNOWN_EMBEDDING_TABLES[embeddingName || ""]; +} + const UNIQUE_KEY_RE = /^Key \(([^)]+)\)=\(([\^)]+)\) already exists\.$/; const UNIQUE_INDEX_RE = /duplicate key value violates unique constraint "(\w+)"/;