From 37aa7e01d304ca54803556f2c68d101dcc050c01 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 17 Aug 2025 16:04:26 +0530 Subject: [PATCH 01/15] sync --- apps/roam/src/utils/cleanupOrphanedNodes.ts | 332 +++++++++++++-- .../roam/src/utils/fetchEmbeddingsForNodes.ts | 27 +- .../src/utils/getAllDiscourseNodesSince.ts | 177 ++++++-- apps/roam/src/utils/syncDgNodesToSupabase.ts | 397 ++++++++++++++++++ .../upsertNodesAsContentWithEmbeddings.ts | 168 ++++++++ 5 files changed, 1034 insertions(+), 67 deletions(-) create mode 100644 apps/roam/src/utils/syncDgNodesToSupabase.ts create mode 100644 apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts diff --git a/apps/roam/src/utils/cleanupOrphanedNodes.ts b/apps/roam/src/utils/cleanupOrphanedNodes.ts index dfdb78e39..31d6f3418 100644 --- a/apps/roam/src/utils/cleanupOrphanedNodes.ts +++ b/apps/roam/src/utils/cleanupOrphanedNodes.ts @@ -1,28 +1,92 @@ +import { + getSupabaseContext, + getLoggedInClient, + type SupabaseContext, +} from "./supabaseContext"; import { type SupabaseClient } from "@supabase/supabase-js"; -import { getSupabaseContext, getLoggedInClient } from "./supabaseContext"; +import { type Database } from "@repo/database/types.gen"; + +type DGSupabaseClient = SupabaseClient; const getAllNodesFromSupabase = async ( - spaceId: number, - supabaseClient: SupabaseClient, + supabaseClient: DGSupabaseClient, + context: SupabaseContext, ): Promise => { try { - const allNodeInstanceIds = await supabaseClient + if (!context) { + console.error("Failed to get Supabase context"); + return []; + } + const { spaceId } = context; + + const { data: schemas, error: schemasError } = await supabaseClient .from("Concept") + .select("id") + .eq("space_id", spaceId) + .eq("is_schema", true) + .eq("arity", 0); + + if (schemasError) { + console.error( + "Failed to get all discourse node schemas from Supabase:", + schemasError, + ); + return []; + } + + const schemaIds = schemas.map((s) => s.id); + let nodeResult: string[] = []; + + if (schemaIds.length > 0) { + const conceptResponse = await supabaseClient + .from("Concept") + .select( + ` + Content!inner ( + source_local_id + ) + `, + ) + .eq("space_id", spaceId) + .eq("is_schema", false) + .in("schema_id", schemaIds) + .not("Content.source_local_id", "is", null); + + if (conceptResponse.error) { + console.error( + "Failed to get concepts from Supabase:", + conceptResponse.error, + ); + return []; + } + nodeResult = + conceptResponse.data + ?.map((c) => c.Content?.source_local_id) + .filter((id): id is string => !!id) || []; + } + + const blockContentResponse = await supabaseClient + .from("Content") .select("source_local_id") - .not("schema_id", "is", null); + .eq("space_id", spaceId) + .eq("scale", "block") + .not("source_local_id", "is", null); - if (allNodeInstanceIds.error) { + if (blockContentResponse.error) { console.error( - "Failed to get all discourse node instances from Supabase:", - allNodeInstanceIds.error, + "Failed to get block content from Supabase:", + blockContentResponse.error, ); return []; } - const result = - allNodeInstanceIds.data + + const blockResult = + blockContentResponse.data ?.map((c) => c.source_local_id) .filter((id): id is string => !!id) || []; + const result = [...new Set([...nodeResult, ...blockResult])]; + return result; } catch (error) { console.error("Error in getAllNodesFromSupabase:", error); @@ -30,6 +94,49 @@ const getAllNodesFromSupabase = async ( } }; +const getAllNodeSchemasFromSupabase = async ( + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +): Promise => { + try { + if (!context) { + console.error("Failed to get Supabase context"); + return []; + } + + const { data, error } = await supabaseClient + .from("Concept") + .select( + ` + Content!inner ( + source_local_id + ) + `, + ) + .eq("space_id", context.spaceId) + .eq("is_schema", true) + .eq("arity", 0) + .not("Content.source_local_id", "is", null); + + if (error) { + console.error( + "Failed to get all discourse node schemas from Supabase:", + error, + ); + return []; + } + + return ( + data + ?.map((c) => c.Content?.source_local_id) + .filter((id): id is string => !!id) || [] + ); + } catch (error) { + console.error("Error in getAllNodeSchemasFromSupabase:", error); + return []; + } +}; + const getNonExistentRoamUids = (nodeUids: string[]): string[] => { try { if (nodeUids.length === 0) { @@ -53,7 +160,7 @@ const getNonExistentRoamUids = (nodeUids: string[]): string[] => { const deleteNodesFromSupabase = async ( uids: string[], spaceId: number, - supabaseClient: SupabaseClient, + supabaseClient: DGSupabaseClient, ): Promise => { try { const { data: contentData, error: contentError } = await supabaseClient @@ -96,25 +203,200 @@ const deleteNodesFromSupabase = async ( } }; -export const cleanupOrphanedNodes = async (): Promise => { - const context = await getSupabaseContext(); - if (!context) { - console.error("Failed to get Supabase context"); - return; +const deleteNodeSchemasFromSupabase = async ( + uids: string[], +): Promise => { + try { + const context = await getSupabaseContext(); + if (!context) { + console.error("Failed to get Supabase context"); + return 0; + } + if (uids.length === 0) return 0; + + const supabaseClient = await getLoggedInClient(); + const { spaceId } = context; + + const { data: schemaContentData, error: contentLookupError } = + await supabaseClient + .from("Content") + .select("id, source_local_id") + .eq("space_id", spaceId) + .in("source_local_id", uids); + + if (contentLookupError) { + console.error( + "deleteNodeSchemasFromSupabase: content lookup failed:", + contentLookupError, + ); + return 0; + } + + if (!schemaContentData || schemaContentData.length === 0) { + return 0; + } + + const schemaContentIds = schemaContentData.map((c) => c.id); + + const { data: schemaConceptData, error: schemaConceptError } = + await supabaseClient + .from("Concept") + .select("id") + .eq("space_id", spaceId) + .eq("is_schema", true) + .in("represented_by_id", schemaContentIds); + + if (schemaConceptError) { + console.error( + "deleteNodeSchemasFromSupabase: schema concept lookup failed:", + schemaConceptError, + ); + return 0; + } + + const schemaConceptIds = (schemaConceptData || []).map((c) => c.id); + + let instanceConceptIds: number[] = []; + let instanceContentIds: number[] = []; + let instanceSourceLocalIds: string[] = []; + + if (schemaConceptIds.length > 0) { + const { data: instanceConceptData, error: instanceConceptError } = + await supabaseClient + .from("Concept") + .select("id, represented_by_id") + .eq("space_id", spaceId) + .eq("is_schema", false) + .in("schema_id", schemaConceptIds); + + if (instanceConceptError) { + console.error( + "deleteNodeSchemasFromSupabase: instance concept lookup failed:", + instanceConceptError, + ); + return 0; + } + + instanceConceptIds = (instanceConceptData || []).map((ic) => ic.id); + instanceContentIds = (instanceConceptData || []) + .map((ic) => ic.represented_by_id) + .filter((x): x is number => typeof x === "number"); + + if (instanceContentIds.length > 0) { + const { data: instanceContentData, error: instanceContentLookupError } = + await supabaseClient + .from("Content") + .select("source_local_id") + .in("id", instanceContentIds); + + if (instanceContentLookupError) { + console.error( + "deleteNodeSchemasFromSupabase: instance content lookup failed:", + instanceContentLookupError, + ); + return 0; + } + instanceSourceLocalIds = (instanceContentData || []) + .map((c) => c.source_local_id) + .filter((id): id is string => !!id); + } + } + + if (instanceConceptIds.length > 0) { + const { error: deleteInstanceConceptError } = await supabaseClient + .from("Concept") + .delete() + .in("id", instanceConceptIds); + if (deleteInstanceConceptError) { + console.error( + "deleteNodeSchemasFromSupabase: delete instance concepts failed:", + deleteInstanceConceptError, + ); + return 0; + } + } + + if (schemaConceptIds.length > 0) { + const { error: deleteSchemaConceptError } = await supabaseClient + .from("Concept") + .delete() + .in("id", schemaConceptIds); + if (deleteSchemaConceptError) { + console.error( + "deleteNodeSchemasFromSupabase: delete schema concepts failed:", + deleteSchemaConceptError, + ); + return 0; + } + } + + const allContentIds = [...schemaContentIds, ...instanceContentIds]; + if (allContentIds.length > 0) { + const { error: deleteContentError } = await supabaseClient + .from("Content") + .delete() + .in("id", allContentIds); + if (deleteContentError) { + console.error( + "deleteNodeSchemasFromSupabase: delete content failed:", + deleteContentError, + ); + return 0; + } + } + + const docLocalIds = [...uids, ...instanceSourceLocalIds]; + let deletedDocsCount = 0; + if (docLocalIds.length > 0) { + const { error: docError, count } = await supabaseClient + .from("Document") + .delete({ count: "exact" }) + .eq("space_id", spaceId) + .in("source_local_id", docLocalIds); + if (docError) { + console.error( + "deleteNodeSchemasFromSupabase: delete documents failed:", + docError, + ); + return 0; + } + deletedDocsCount = count ?? 0; + } + + return deletedDocsCount; + } catch (error) { + console.error("Error in deleteNodeSchemasFromSupabase:", error); + return 0; } - const spaceId = context.spaceId; +}; - const supabaseClient = await getLoggedInClient(); +export const cleanupOrphanedNodes = async ( + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +): Promise => { try { - const supabaseUids = await getAllNodesFromSupabase(spaceId, supabaseClient); - if (supabaseUids.length === 0) { - return; + const supabaseUids = await getAllNodesFromSupabase(supabaseClient, context); + if (supabaseUids.length > 0) { + const orphanedUids = getNonExistentRoamUids(supabaseUids); + if (orphanedUids.length > 0) { + await deleteNodesFromSupabase( + orphanedUids, + context.spaceId, + supabaseClient, + ); + } } - const orphanedUids = getNonExistentRoamUids(supabaseUids); - if (orphanedUids.length === 0) { - return; + + const supabaseSchemaUids = await getAllNodeSchemasFromSupabase( + supabaseClient, + context, + ); + if (supabaseSchemaUids.length > 0) { + const orphanedSchemaUids = getNonExistentRoamUids(supabaseSchemaUids); + if (orphanedSchemaUids.length > 0) { + await deleteNodeSchemasFromSupabase(orphanedSchemaUids); + } } - await deleteNodesFromSupabase(orphanedUids, spaceId, supabaseClient); } catch (error) { console.error("Error in cleanupOrphanedNodes:", error); } diff --git a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts index 3d01e78a7..bff740f8f 100644 --- a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts +++ b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts @@ -1,16 +1,8 @@ -type DiscourseGraphContent = { - author_local_id: string; - source_local_id: string; - scale: string; - created: string; - last_modified: string; - text: string; - model: string; - vector: number[]; -}; +import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; -const EMBEDDING_BATCH_SIZE = 100; +const EMBEDDING_BATCH_SIZE = 200; const API_URL = `https://discoursegraphs.com/api/embeddings/openai/small`; +const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; type EmbeddingApiResponse = { data: { @@ -19,13 +11,18 @@ type EmbeddingApiResponse = { }; export const fetchEmbeddingsForNodes = async ( - nodes: DiscourseGraphContent[], -): Promise => { + nodes: RoamDiscourseNodeData[], +): Promise => { const allEmbeddings: number[][] = []; - const allNodesTexts = nodes.map((node) => node.text); + const allNodesTexts = nodes.map((node) => + node.node_title ? `${node.node_title} ${node.text}` : node.text, + ); for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); + console.log( + `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${allNodesTexts.length / EMBEDDING_BATCH_SIZE}`, + ); const response = await fetch(API_URL, { method: "POST", @@ -67,7 +64,7 @@ export const fetchEmbeddingsForNodes = async ( } return nodes.map((node, i) => ({ ...node, - model: "openai_text_embedding_3_small_1536", + model: EMBEDDING_MODEL, vector: allEmbeddings[i], })); }; diff --git a/apps/roam/src/utils/getAllDiscourseNodesSince.ts b/apps/roam/src/utils/getAllDiscourseNodesSince.ts index b474f224e..115efc850 100644 --- a/apps/roam/src/utils/getAllDiscourseNodesSince.ts +++ b/apps/roam/src/utils/getAllDiscourseNodesSince.ts @@ -1,48 +1,171 @@ -import getDiscourseNodes from "./getDiscourseNodes"; +import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; import findDiscourseNode from "./findDiscourseNode"; +import { OnloadArgs } from "roamjs-components/types"; +import getDiscourseNodeFormatExpression from "./getDiscourseNodeFormatExpression"; -type RoamDiscourseNodeData = { +type ISODateString = string; + +export type RoamDiscourseNodeData = { author_local_id: string; + author_name: string; source_local_id: string; created: string; + vector: number[]; last_modified: string; - author_name: string; text: string; + type: string; + node_title?: string; }; -type ISODateString = string; +export type DiscourseNodesSinceResult = { + pageNodes: RoamDiscourseNodeData[]; + blockNodes: RoamDiscourseNodeData[]; +}; + +export const getDiscourseNodeTypeWithSettingsBlockNodes = ( + node: DiscourseNode, + sinceMs: number, + extensionAPI: OnloadArgs["extensionAPI"], +): RoamDiscourseNodeData[] => { + const settingsKey = `discourse-graph-node-rule-${node.type}`; + const settings = extensionAPI.settings.get(settingsKey) as { + embeddingRef: string; + isFirstChild: boolean; + }; + const regex = getDiscourseNodeFormatExpression(node.format); + const regexPattern = regex.source.replace(/\\/g, "\\\\").replace(/"/g, '\\"'); + const firstChildUid = + settings.embeddingRef?.match(/\(\((.*?)\)\)/)?.[1] ?? ""; + const queryBlock = `[ + :find ?childString ?nodeUid ?nodeCreateTime ?nodeEditTime ?author_local_id ?type ?author_name ?node-title + :keys text source_local_id created last_modified author_local_id type author_name node_title + :in $ ?firstChildUid ?type ?since + :where + [(re-pattern "${regexPattern}") ?title-regex] + [?node :node/title ?node-title] + [(re-find ?title-regex ?node-title)] + [?node :block/uid ?nodeUid] + [?node :create/time ?nodeCreateTime] + [?node :edit/time ?nodeEditTime] + [?s :block/uid ?firstChildUid] + [?s :block/string ?firstChildString] + [?bg :block/page ?node] + [?bg :block/string ?firstChildString] + [?bg :block/children ?child] + [?child :block/order 0] + [?child :block/string ?childString] + [?child :edit/time ?childEditTime] + [?child :create/user ?user-eid] + [?user-eid :user/uid ?author_local_id] + [?child :edit/user ?eu] + [?eu :user/display-name ?author_name] + [or + [(> ?childEditTime ?since)] + [(> ?nodeEditTime ?since)]] + ]`; + + const blockNode = window.roamAlphaAPI.data.q( + queryBlock, + String(firstChildUid), + String(node.type), + sinceMs, + ) as unknown as Omit[]; + return blockNode.map((node) => ({ ...node, vector: [] })); +}; export const getAllDiscourseNodesSince = async ( since: ISODateString, + nodeTypes: DiscourseNode[], + extensionAPI: OnloadArgs["extensionAPI"], ): Promise => { const sinceMs = new Date(since).getTime(); + const result: RoamDiscourseNodeData[] = []; + + if (nodeTypes.length > 0) { + for (const node of nodeTypes) { + const blockNode = getDiscourseNodeTypeWithSettingsBlockNodes( + node, + sinceMs, + extensionAPI, + ); + if (blockNode) { + result.push(...blockNode); + } + } + } - const query = `[:find ?uid ?create-time ?edit-time ?user-uuid ?username ?title - :keys source_local_id created last_modified author_local_id author_name text - :in $ ?since - :where - [?e :node/title ?title] - [?e :block/uid ?uid] - [?e :create/user ?user-id] - [?user-id :user/uid ?user-uuid] - [?user-id :user/display-name ?username] - [?e :create/time ?create-time] - [?e :edit/time ?edit-time] - [(> ?edit-time ?since)]]`; - - // @ts-ignore - backend to be added to roamjs-components - const result = (await window.roamAlphaAPI.data.backend.q( + const query = `[ + :find ?node-title ?uid ?nodeCreateTime ?nodeEditTime ?author_local_id ?author_name + :keys text source_local_id created last_modified author_local_id author_name + :in $ ?since + :where + [?node :node/title ?node-title] + [?node :block/uid ?uid] + [?node :create/time ?nodeCreateTime] + [?node :edit/time ?nodeEditTime] + [?node :create/user ?user-eid] + [?user-eid :user/uid ?author_local_id] + [?node :edit/user ?eu] + [(get-else $ ?eu :user/display-name "Unknown-person") ?author_name] + [(> ?nodeEditTime ?since)] +]`; + + //@ts-ignore - backend to be added to roamjs-components + const allNodes = (await window.roamAlphaAPI.data.backend.q( query, sinceMs, - )) as unknown[][] as RoamDiscourseNodeData[]; + )) as unknown as RoamDiscourseNodeData[]; const discourseNodes = getDiscourseNodes(); + const nodeTypesSet = new Set(nodeTypes.map((nodeType) => nodeType.type)); + + result.push( + ...allNodes + .map((entity) => { + if (!entity.source_local_id) { + return null; + } + const node = findDiscourseNode(entity.source_local_id, discourseNodes); + if ( + !node || + node.backedBy === "default" || + !entity.text || + entity.text.trim() === "" || + nodeTypesSet.has(node.type) + ) { + return null; + } + return { + ...entity, + type: node.type, + }; + }) + .filter((n): n is RoamDiscourseNodeData => n !== null), + ); + return result; +}; + +export const nodeTypeSince = async ( + since: ISODateString, + nodeTypes: DiscourseNode[], +) => { + const sinceMs = new Date(since).getTime(); + const filterMap = await Promise.all( + nodeTypes.map((node) => { + const query = ` + [:find ?node-title + :in $ ?since ?type + :where + [?node :block/uid ?type] + [?node :node/title ?node-title] + [?node :edit/time ?nodeEditTime] + [(> ?nodeEditTime ?since)]] + `; + const result = window.roamAlphaAPI.data.q(query, sinceMs, node.type); + return result.length > 0; + }), + ); - return result.filter((entity) => { - if (!entity.source_local_id) return false; - const node = findDiscourseNode(entity.source_local_id, discourseNodes); - if (!node) return false; - if (node.backedBy === "default") return false; - return Boolean(entity.text && entity.text.trim() !== ""); - }); + const nodesSince = nodeTypes.filter((_, index) => filterMap[index]); + return nodesSince; }; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts new file mode 100644 index 000000000..b45c43cef --- /dev/null +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -0,0 +1,397 @@ +import { + getAllDiscourseNodesSince, + nodeTypeSince, +} from "./getAllDiscourseNodesSince"; +import { cleanupOrphanedNodes } from "./cleanupOrphanedNodes"; +import { + getLoggedInClient, + getSupabaseContext, + SupabaseContext, +} from "./supabaseContext"; +import { fetchEmbeddingsForNodes } from "./fetchEmbeddingsForNodes"; +import { LocalContentDataInput } from "@repo/database/inputTypes"; +import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; +import getDiscourseRelations from "./getDiscourseRelations"; +import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; +import { + discourseNodeBlockToLocalConcept, + discourseNodeSchemaToLocalConcept, + orderConceptsByDependency, + discourseRelationSchemaToLocalConcept, + discourseRelationDataToLocalConcept, +} from "./conceptConversion"; +import getDiscourseRelationTriples from "./getDiscourseRelationTriples"; +import { OnloadArgs } from "roamjs-components/types"; +import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; + +const SYNC_FUNCTION = "embedding"; +const SYNC_INTERVAL = "45s"; +const SYNC_TIMEOUT = "20s"; +const BATCH_SIZE = 200; +const DEFAULT_TIME = "1970-01-01"; +const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; + +type SyncTaskInfo = { + lastUpdateTime: string | null; + spaceId: number; + worker: string; + shouldProceed: boolean; +}; + +export const endSyncTask = async ( + worker: string, + status: "complete" | "failed", +): Promise => { + try { + const supabaseClient = await getLoggedInClient(); + const context = await getSupabaseContext(); + if (!context) { + console.error("endSyncTask: Unable to obtain Supabase context."); + return; + } + const { error } = await supabaseClient.rpc("end_sync_task", { + s_target: context.spaceId, + s_function: "embedding", + s_worker: worker, + s_status: status, + }); + if (error) { + console.error("endSyncTask: Error calling end_sync_task:", error); + } + } catch (error) { + console.error("endSyncTask: Error calling end_sync_task:", error); + } +}; + +export const proposeSyncTask = async (): Promise => { + try { + const supabaseClient = await getLoggedInClient(); + const context = await getSupabaseContext(); + if (!context) { + console.error("proposeSyncTask: Unable to obtain Supabase context."); + return { + lastUpdateTime: null, + spaceId: 0, + worker: "", + shouldProceed: false, + }; + } + const worker = window.roamAlphaAPI.user.uid(); + + const { data, error } = await supabaseClient.rpc("propose_sync_task", { + s_target: context.spaceId, + s_function: SYNC_FUNCTION, + s_worker: worker, + task_interval: SYNC_INTERVAL, + timeout: SYNC_TIMEOUT, + }); + + const { spaceId } = context; + + if (error) { + console.error( + `proposeSyncTask: propose_sync_task failed – ${error.message}`, + ); + return { lastUpdateTime: null, spaceId, worker, shouldProceed: false }; + } + + if (typeof data === "string") { + const timestamp = new Date(data); + const now = new Date(); + + if (timestamp > now) { + console.log( + "proposeSyncTask: Another worker is already running this task", + ); + return { lastUpdateTime: null, spaceId, worker, shouldProceed: false }; + } else { + return { lastUpdateTime: data, spaceId, worker, shouldProceed: true }; + } + } + + return { lastUpdateTime: null, spaceId, worker, shouldProceed: true }; + } catch (error) { + console.error( + `proposeSyncTask: Unexpected error while contacting sync-task API:`, + error, + ); + return { + lastUpdateTime: null, + spaceId: 0, + worker: "", + shouldProceed: false, + }; + } +}; + +const upsertNodeSchemaToContent = async ( + nodeTypesUids: string[], + spaceId: number, + userId: number, + supabaseClient: DGSupabaseClient, +) => { + const query = `[ + :find ?uid ?create-time ?edit-time ?user-uuid ?title ?author-name + :keys source_local_id created last_modified author_local_id text author_name + :in $ [?uid ...] + :where + [?e :block/uid ?uid] + [?e :node/title ?title] + [?e :create/user ?user-eid] + [?user-eid :user/uid ?user-uuid] + [?e :create/time ?create-time] + [?e :edit/time ?edit-time] + [?e :edit/user ?eu] + [(get-else $ ?eu :user/display-name "Unknown-person") ?author-name] + + ] + `; + //@ts-ignore - backend to be added to roamjs-components + const result = await window.roamAlphaAPI.data.backend.q( + query, + nodeTypesUids, + ) as unknown as RoamDiscourseNodeData[]; + + const contentData: LocalContentDataInput[] = result.map((node) => ({ + author_id: userId, + account_local_id: node.author_local_id, + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date(node.last_modified || Date.now()).toISOString(), + text: node.text, + embedding_inline: { + model: EMBEDDING_MODEL, + vector: node.vector, + }, + scale: "document", + })); + const { error } = await supabaseClient.rpc("upsert_content", { + data: contentData as any, + v_space_id: spaceId, + v_creator_id: userId, + content_as_document: true, + }); + if (error) { + console.error("upsert_content failed:", error); + } +}; + +export const convertDgToSupabaseConcepts = async ( + nodesSince: RoamDiscourseNodeData[], + since: string, + allNodeTypes: DiscourseNode[], + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +) => { + const nodeTypes = await nodeTypeSince(since, allNodeTypes); + await upsertNodeSchemaToContent( + nodeTypes.map((node) => node.type), + context.spaceId, + context.userId, + supabaseClient, + ); + + const nodesTypesToLocalConcepts = nodeTypes.map((node) => { + return discourseNodeSchemaToLocalConcept(context, node); + }); + + const relationSchemas = getDiscourseRelations(); + + const relationsToEmbed = relationSchemas.map((relation) => { + const localConcept = discourseRelationSchemaToLocalConcept( + context, + relation, + ); + return localConcept; + }); + + const nodeBlockToLocalConcepts = nodesSince.map((node) => { + const localConcept = discourseNodeBlockToLocalConcept(context, { + nodeUid: node.source_local_id, + schemaUid: node.type, + text: node.node_title ? `${node.node_title} ${node.text}` : node.text, + }); + return localConcept; + }); + + const relationTriples = getDiscourseRelationTriples(); + const relationLabelToId = Object.fromEntries( + relationSchemas.map((r) => [r.label, r.id]), + ); + const relationBlockToLocalConcepts = relationTriples + .map(({ relation, source, target }) => { + const relationSchemaUid = relationLabelToId[relation]; + if (!relationSchemaUid) { + return null; + } + return discourseRelationDataToLocalConcept(context, relationSchemaUid, { + source, + target, + }); + }) + .filter((x): x is NonNullable => x !== null); + + const conceptsToUpsert = [ + ...nodesTypesToLocalConcepts, + ...relationsToEmbed, + ...nodeBlockToLocalConcepts, + ...relationBlockToLocalConcepts, + ]; + const { ordered } = orderConceptsByDependency(conceptsToUpsert); + const { error } = await supabaseClient.rpc("upsert_concepts", { + data: ordered, + v_space_id: context.spaceId, + }); + if (error) { + throw new Error( + `upsert_concepts failed: ${JSON.stringify(error, null, 2)}`, + ); + } +}; + +export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( + roamNodes: RoamDiscourseNodeData[], + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +): Promise => { + const { spaceId, userId } = context; + + if (roamNodes.length === 0) { + return; + } + + let nodesWithEmbeddings: RoamDiscourseNodeData[]; + try { + nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); + } catch (error: any) { + console.error( + `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${error.message}`, + ); + return; + } + + if (nodesWithEmbeddings.length !== roamNodes.length) { + console.error( + "upsertNodesToSupabaseAsContentWithEmbeddings: Mismatch between node and embedding counts.", + ); + return; + } + + const chunk = (array: T[], size: number): T[][] => { + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + return chunks; + }; + + const uploadBatches = async (batches: RoamDiscourseNodeData[][]) => { + for (let idx = 0; idx < batches.length; idx++) { + const batch = batches[idx]; + + const contents: LocalContentDataInput[] = batch.map((node) => { + const variant = node.node_title ? "direct_and_description" : "direct"; + const text = node.node_title + ? `${node.node_title} ${node.text}` + : node.text; + + return { + author_id: userId, + account_local_id: node.author_local_id, + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date( + node.last_modified || Date.now(), + ).toISOString(), + text: text, + variant: variant, + embedding_inline: { + model: EMBEDDING_MODEL, + vector: node.vector, + }, + scale: "document", + }; + }); + + const { error } = await supabaseClient.rpc("upsert_content", { + data: contents as any, + v_space_id: spaceId, + v_creator_id: userId, + content_as_document: true, + }); + + if (error) { + console.error(`upsert_content failed for batch ${idx + 1}:`, error); + throw error; + } + } + }; + + await uploadBatches(chunk(nodesWithEmbeddings, BATCH_SIZE)); +}; + +const getDgNodeTypes = (extensionAPI: OnloadArgs["extensionAPI"]) => { + const allDgNodeTypes = getDiscourseNodes().filter( + (n) => n.backedBy === "user", + ); + const dgNodeTypesWithSettings = allDgNodeTypes.filter((n) => { + const settingsKey = `discourse-graph-node-rule-${n.type}`; + const settings = extensionAPI.settings.get(settingsKey) as + | { + isFirstChild?: boolean; + embeddingRef?: string; + } + | undefined; + return settings?.isFirstChild || settings?.embeddingRef; + }); + return { allDgNodeTypes, dgNodeTypesWithSettings }; +}; + +export const createOrUpdateDiscourseEmbedding = async ( + extensionAPI: OnloadArgs["extensionAPI"], +) => { + const { shouldProceed, lastUpdateTime, worker } = await proposeSyncTask(); + + if (!shouldProceed) { + console.log( + "createOrUpdateDiscourseEmbedding: Task already running or failed to acquire lock. Exiting.", + ); + return; + } + + try { + const time = lastUpdateTime === null ? DEFAULT_TIME : lastUpdateTime; + const { allDgNodeTypes, dgNodeTypesWithSettings } = + getDgNodeTypes(extensionAPI); + + const allNodeInstances = await getAllDiscourseNodesSince( + time, + dgNodeTypesWithSettings, + extensionAPI, + ); + const supabaseClient = await getLoggedInClient(); + const context = await getSupabaseContext(); + if (!context) { + console.error("No Supabase context found."); + return; + } + await upsertNodesToSupabaseAsContentWithEmbeddings( + allNodeInstances, + supabaseClient, + context, + ); + await convertDgToSupabaseConcepts( + allNodeInstances, + time, + allDgNodeTypes, + supabaseClient, + context, + ); + await cleanupOrphanedNodes(supabaseClient, context); + await endSyncTask(worker, "complete"); + } catch (error) { + console.error("createOrUpdateDiscourseEmbedding: Process failed:", error); + await endSyncTask(worker, "failed"); + throw error; + } +}; diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts new file mode 100644 index 000000000..fb929c086 --- /dev/null +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -0,0 +1,168 @@ +import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; +import { SupabaseContext } from "./supabaseContext"; +import { LocalContentDataInput } from "@repo/database/inputTypes"; +import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; + + +const EMBEDDING_BATCH_SIZE = 200; +const API_URL = `https://discoursegraphs.com/api/embeddings/openai/small`; + +type EmbeddingApiResponse = { + data: { + embedding: number[]; + }[]; +}; + +export const fetchEmbeddingsForNodes = async ( + nodes: RoamDiscourseNodeData[], +): Promise => { + const allEmbeddings: number[][] = []; + console.log("nodes", nodes); + const allNodesTexts = nodes.map((node) => + node.node_title ? `${node.node_title} ${node.text}` : node.text, + ); + + for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { + const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); + console.log( + `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${allNodesTexts.length / EMBEDDING_BATCH_SIZE}`, + ); + + const response = await fetch(API_URL, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ input: batch }), + }); + + if (!response.ok) { + let errorData; + try { + errorData = await response.json(); + } catch (e) { + errorData = { + error: `Server responded with ${response.status}: ${await response.text()}`, + }; + } + throw new Error( + `API Error (${response.status}) processing batch ${ + i / EMBEDDING_BATCH_SIZE + 1 + }: ${errorData.error || "Failed to fetch embeddings"}`, + ); + } + + const data: EmbeddingApiResponse = await response.json(); + if (!data || !Array.isArray(data.data)) { + throw new Error( + `Invalid API response format for batch ${ + i / EMBEDDING_BATCH_SIZE + 1 + }. Expected 'data' array.`, + ); + } + const batchEmbeddings = data.data.map((item) => item.embedding); + allEmbeddings.push(...batchEmbeddings); + } + if (nodes.length !== allEmbeddings.length) { + throw new Error( + `Mismatch between nodes (${nodes.length}) and embeddings (${allEmbeddings.length})`, + ); + } + return nodes.map((node, i) => ({ + ...node, + model: "openai_text_embedding_3_small_1536", + vector: allEmbeddings[i], + })); +}; + +const uploadBatches = async ( + batches: RoamDiscourseNodeData[][], + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +) => { + const { spaceId, userId } = context; + for (let idx = 0; idx < batches.length; idx++) { + const batch = batches[idx]; + + const contents: LocalContentDataInput[] = batch.map((node) => { + const variant = node.node_title ? "direct_and_description" : "direct"; + const text = node.node_title + ? `${node.node_title} ${node.text}` + : node.text; + + return { + author_id: userId, + account_local_id: node.author_local_id, + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date(node.last_modified || Date.now()).toISOString(), + text: text, + variant: variant, + embedding_inline: { + model: "openai_text_embedding_3_small_1536", + vector: node.vector, + }, + scale: "document", + }; + }); + + const { error } = await supabaseClient.rpc("upsert_content", { + data: contents as any, + v_space_id: spaceId, + v_creator_id: userId, + content_as_document: true, + }); + + if (error) { + console.error(`upsert_content failed for batch ${idx + 1}:`, error); + throw error; + } + } +}; + +export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( + roamNodes: RoamDiscourseNodeData[], + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +): Promise => { + if (!context) { + console.error("No Supabase context found."); + return; + } + const { spaceId, userId } = context; + + if (roamNodes.length === 0) { + return; + } + + let nodesWithEmbeddings: RoamDiscourseNodeData[]; + try { + nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); + } catch (error: any) { + console.error( + `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${error.message}`, + ); + return; + } + + if (nodesWithEmbeddings.length !== roamNodes.length) { + console.error( + "upsertNodesToSupabaseAsContentWithEmbeddings: Mismatch between node and embedding counts.", + ); + return; + } + + const batchSize = 200; + + const chunk = (array: T[], size: number): T[][] => { + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + return chunks; + }; + + await uploadBatches( + chunk(nodesWithEmbeddings, batchSize), + supabaseClient, + context, + ); +}; From 4f2807b8a7ef4b9e969b12c3b2c419dfe8bea8b5 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 17 Aug 2025 22:46:04 +0530 Subject: [PATCH 02/15] address coderabbit code --- apps/roam/src/utils/cleanupOrphanedNodes.ts | 45 +++++++------------ .../roam/src/utils/fetchEmbeddingsForNodes.ts | 2 +- apps/roam/src/utils/syncDgNodesToSupabase.ts | 7 +-- 3 files changed, 21 insertions(+), 33 deletions(-) diff --git a/apps/roam/src/utils/cleanupOrphanedNodes.ts b/apps/roam/src/utils/cleanupOrphanedNodes.ts index 31d6f3418..9bb850f26 100644 --- a/apps/roam/src/utils/cleanupOrphanedNodes.ts +++ b/apps/roam/src/utils/cleanupOrphanedNodes.ts @@ -1,8 +1,4 @@ -import { - getSupabaseContext, - getLoggedInClient, - type SupabaseContext, -} from "./supabaseContext"; +import { type SupabaseContext } from "./supabaseContext"; import { type SupabaseClient } from "@supabase/supabase-js"; import { type Database } from "@repo/database/types.gen"; @@ -10,14 +6,9 @@ type DGSupabaseClient = SupabaseClient; const getAllNodesFromSupabase = async ( supabaseClient: DGSupabaseClient, - context: SupabaseContext, + spaceId: number, ): Promise => { try { - if (!context) { - console.error("Failed to get Supabase context"); - return []; - } - const { spaceId } = context; const { data: schemas, error: schemasError } = await supabaseClient .from("Concept") @@ -96,14 +87,9 @@ const getAllNodesFromSupabase = async ( const getAllNodeSchemasFromSupabase = async ( supabaseClient: DGSupabaseClient, - context: SupabaseContext, + spaceId: number, ): Promise => { try { - if (!context) { - console.error("Failed to get Supabase context"); - return []; - } - const { data, error } = await supabaseClient .from("Concept") .select( @@ -113,7 +99,7 @@ const getAllNodeSchemasFromSupabase = async ( ) `, ) - .eq("space_id", context.spaceId) + .eq("space_id", spaceId) .eq("is_schema", true) .eq("arity", 0) .not("Content.source_local_id", "is", null); @@ -205,18 +191,12 @@ const deleteNodesFromSupabase = async ( const deleteNodeSchemasFromSupabase = async ( uids: string[], + supabaseClient: DGSupabaseClient, + spaceId: number, ): Promise => { try { - const context = await getSupabaseContext(); - if (!context) { - console.error("Failed to get Supabase context"); - return 0; - } if (uids.length === 0) return 0; - const supabaseClient = await getLoggedInClient(); - const { spaceId } = context; - const { data: schemaContentData, error: contentLookupError } = await supabaseClient .from("Content") @@ -375,7 +355,10 @@ export const cleanupOrphanedNodes = async ( context: SupabaseContext, ): Promise => { try { - const supabaseUids = await getAllNodesFromSupabase(supabaseClient, context); + const supabaseUids = await getAllNodesFromSupabase( + supabaseClient, + context.spaceId, + ); if (supabaseUids.length > 0) { const orphanedUids = getNonExistentRoamUids(supabaseUids); if (orphanedUids.length > 0) { @@ -389,12 +372,16 @@ export const cleanupOrphanedNodes = async ( const supabaseSchemaUids = await getAllNodeSchemasFromSupabase( supabaseClient, - context, + context.spaceId, ); if (supabaseSchemaUids.length > 0) { const orphanedSchemaUids = getNonExistentRoamUids(supabaseSchemaUids); if (orphanedSchemaUids.length > 0) { - await deleteNodeSchemasFromSupabase(orphanedSchemaUids); + await deleteNodeSchemasFromSupabase( + orphanedSchemaUids, + supabaseClient, + context.spaceId, + ); } } } catch (error) { diff --git a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts index bff740f8f..61727dc34 100644 --- a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts +++ b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts @@ -21,7 +21,7 @@ export const fetchEmbeddingsForNodes = async ( for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); console.log( - `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${allNodesTexts.length / EMBEDDING_BATCH_SIZE}`, + `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${Math.ceil(allNodesTexts.length / EMBEDDING_BATCH_SIZE)}`, ); const response = await fetch(API_URL, { diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index b45c43cef..95880e317 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -147,10 +147,10 @@ const upsertNodeSchemaToContent = async ( ] `; //@ts-ignore - backend to be added to roamjs-components - const result = await window.roamAlphaAPI.data.backend.q( + const result = (await window.roamAlphaAPI.data.backend.q( query, nodeTypesUids, - ) as unknown as RoamDiscourseNodeData[]; + )) as unknown as RoamDiscourseNodeData[]; const contentData: LocalContentDataInput[] = result.map((node) => ({ author_id: userId, @@ -238,7 +238,7 @@ export const convertDgToSupabaseConcepts = async ( ...relationBlockToLocalConcepts, ]; const { ordered } = orderConceptsByDependency(conceptsToUpsert); - const { error } = await supabaseClient.rpc("upsert_concepts", { + const { error } = await supabaseClient.rpc("upsert_concepts", { data: ordered, v_space_id: context.spaceId, }); @@ -373,6 +373,7 @@ export const createOrUpdateDiscourseEmbedding = async ( const context = await getSupabaseContext(); if (!context) { console.error("No Supabase context found."); + await endSyncTask(worker, "failed"); return; } await upsertNodesToSupabaseAsContentWithEmbeddings( From 3e430c9dff093b02bfff27598d17b64dd501f2a2 Mon Sep 17 00:00:00 2001 From: sid597 Date: Tue, 19 Aug 2025 21:56:16 +0530 Subject: [PATCH 03/15] address lint errors --- .../roam/src/utils/fetchEmbeddingsForNodes.ts | 11 ++- .../src/utils/getAllDiscourseNodesSince.ts | 1 + apps/roam/src/utils/syncDgNodesToSupabase.ts | 67 +++++++++++-------- .../upsertNodesAsContentWithEmbeddings.ts | 15 +++-- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts index 61727dc34..8e3ace77e 100644 --- a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts +++ b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/naming-convention */ import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; const EMBEDDING_BATCH_SIZE = 200; @@ -20,10 +21,6 @@ export const fetchEmbeddingsForNodes = async ( for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); - console.log( - `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${Math.ceil(allNodesTexts.length / EMBEDDING_BATCH_SIZE)}`, - ); - const response = await fetch(API_URL, { method: "POST", headers: { "Content-Type": "application/json" }, @@ -31,9 +28,9 @@ export const fetchEmbeddingsForNodes = async ( }); if (!response.ok) { - let errorData; + let errorData: { error: string }; try { - errorData = await response.json(); + errorData = (await response.json()) as { error: string }; } catch (e) { errorData = { error: `Server responded with ${response.status}: ${await response.text()}`, @@ -46,7 +43,7 @@ export const fetchEmbeddingsForNodes = async ( ); } - const data: EmbeddingApiResponse = await response.json(); + const data = (await response.json()) as EmbeddingApiResponse; if (!data || !Array.isArray(data.data)) { throw new Error( `Invalid API response format for batch ${ diff --git a/apps/roam/src/utils/getAllDiscourseNodesSince.ts b/apps/roam/src/utils/getAllDiscourseNodesSince.ts index 115efc850..242ac456e 100644 --- a/apps/roam/src/utils/getAllDiscourseNodesSince.ts +++ b/apps/roam/src/utils/getAllDiscourseNodesSince.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/naming-convention */ import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; import findDiscourseNode from "./findDiscourseNode"; import { OnloadArgs } from "roamjs-components/types"; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index 95880e317..fd9c489ed 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/naming-convention */ import { getAllDiscourseNodesSince, nodeTypeSince, @@ -124,12 +125,17 @@ export const proposeSyncTask = async (): Promise => { } }; -const upsertNodeSchemaToContent = async ( - nodeTypesUids: string[], - spaceId: number, - userId: number, - supabaseClient: DGSupabaseClient, -) => { +const upsertNodeSchemaToContent = async ({ + nodeTypesUids, + spaceId, + userId, + supabaseClient, +}: { + nodeTypesUids: string[]; + spaceId: number; + userId: number; + supabaseClient: DGSupabaseClient; +}) => { const query = `[ :find ?uid ?create-time ?edit-time ?user-uuid ?title ?author-name :keys source_local_id created last_modified author_local_id text author_name @@ -166,7 +172,7 @@ const upsertNodeSchemaToContent = async ( scale: "document", })); const { error } = await supabaseClient.rpc("upsert_content", { - data: contentData as any, + data: contentData, v_space_id: spaceId, v_creator_id: userId, content_as_document: true, @@ -176,20 +182,26 @@ const upsertNodeSchemaToContent = async ( } }; -export const convertDgToSupabaseConcepts = async ( - nodesSince: RoamDiscourseNodeData[], - since: string, - allNodeTypes: DiscourseNode[], - supabaseClient: DGSupabaseClient, - context: SupabaseContext, -) => { +export const convertDgToSupabaseConcepts = async ({ + nodesSince, + since, + allNodeTypes, + supabaseClient, + context, +}: { + nodesSince: RoamDiscourseNodeData[]; + since: string; + allNodeTypes: DiscourseNode[]; + supabaseClient: DGSupabaseClient; + context: SupabaseContext; +}) => { const nodeTypes = await nodeTypeSince(since, allNodeTypes); - await upsertNodeSchemaToContent( - nodeTypes.map((node) => node.type), - context.spaceId, - context.userId, + await upsertNodeSchemaToContent({ + nodeTypesUids: nodeTypes.map((node) => node.type), + spaceId: context.spaceId, + userId: context.userId, supabaseClient, - ); + }); const nodesTypesToLocalConcepts = nodeTypes.map((node) => { return discourseNodeSchemaToLocalConcept(context, node); @@ -263,9 +275,10 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( let nodesWithEmbeddings: RoamDiscourseNodeData[]; try { nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); - } catch (error: any) { + } catch (error) { + const message = error instanceof Error ? error.message : String(error); console.error( - `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${error.message}`, + `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${message}`, ); return; } @@ -314,7 +327,7 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( }); const { error } = await supabaseClient.rpc("upsert_content", { - data: contents as any, + data: contents, v_space_id: spaceId, v_creator_id: userId, content_as_document: true, @@ -381,13 +394,13 @@ export const createOrUpdateDiscourseEmbedding = async ( supabaseClient, context, ); - await convertDgToSupabaseConcepts( - allNodeInstances, - time, - allDgNodeTypes, + await convertDgToSupabaseConcepts({ + nodesSince: allNodeInstances, + since: time, + allNodeTypes: allDgNodeTypes, supabaseClient, context, - ); + }); await cleanupOrphanedNodes(supabaseClient, context); await endSyncTask(worker, "complete"); } catch (error) { diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index fb929c086..d31f1367e 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -1,8 +1,9 @@ +/* eslint-disable @typescript-eslint/naming-convention */ import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import { SupabaseContext } from "./supabaseContext"; import { LocalContentDataInput } from "@repo/database/inputTypes"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; - +import { Json } from "@repo/database/types.gen"; const EMBEDDING_BATCH_SIZE = 200; const API_URL = `https://discoursegraphs.com/api/embeddings/openai/small`; @@ -37,7 +38,7 @@ export const fetchEmbeddingsForNodes = async ( if (!response.ok) { let errorData; try { - errorData = await response.json(); + errorData = (await response.json()) as { error: string }; } catch (e) { errorData = { error: `Server responded with ${response.status}: ${await response.text()}`, @@ -50,7 +51,7 @@ export const fetchEmbeddingsForNodes = async ( ); } - const data: EmbeddingApiResponse = await response.json(); + const data = (await response.json()) as EmbeddingApiResponse; if (!data || !Array.isArray(data.data)) { throw new Error( `Invalid API response format for batch ${ @@ -105,7 +106,7 @@ const uploadBatches = async ( }); const { error } = await supabaseClient.rpc("upsert_content", { - data: contents as any, + data: contents as unknown as Json, v_space_id: spaceId, v_creator_id: userId, content_as_document: true, @@ -127,7 +128,6 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( console.error("No Supabase context found."); return; } - const { spaceId, userId } = context; if (roamNodes.length === 0) { return; @@ -136,9 +136,10 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( let nodesWithEmbeddings: RoamDiscourseNodeData[]; try { nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); - } catch (error: any) { + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); console.error( - `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${error.message}`, + `upsertNodesToSupabaseAsContentWithEmbeddings: Embedding service failed – ${errorMessage}`, ); return; } From 0425eeefa88799b5b1b554200b274b492ca6b92a Mon Sep 17 00:00:00 2001 From: sid597 Date: Tue, 19 Aug 2025 21:58:48 +0530 Subject: [PATCH 04/15] use async instead of backend --- apps/roam/src/components/DiscourseContextOverlay.tsx | 2 +- apps/roam/src/utils/getAllDiscourseNodesSince.ts | 2 +- apps/roam/src/utils/syncDgNodesToSupabase.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/roam/src/components/DiscourseContextOverlay.tsx b/apps/roam/src/components/DiscourseContextOverlay.tsx index 25220de50..64dd8c6c8 100644 --- a/apps/roam/src/components/DiscourseContextOverlay.tsx +++ b/apps/roam/src/components/DiscourseContextOverlay.tsx @@ -39,7 +39,7 @@ const getOverlayInfo = async (tag: string): Promise => { relations, }), // @ts-ignore - backend to be added to roamjs-components - window.roamAlphaAPI.data.backend.q( + window.roamAlphaAPI.data.async.q( `[:find ?a :where [?b :node/title "${normalizePageTitle(tag)}"] [?a :block/refs ?b]]`, ), ]); diff --git a/apps/roam/src/utils/getAllDiscourseNodesSince.ts b/apps/roam/src/utils/getAllDiscourseNodesSince.ts index 242ac456e..0c0d6b75a 100644 --- a/apps/roam/src/utils/getAllDiscourseNodesSince.ts +++ b/apps/roam/src/utils/getAllDiscourseNodesSince.ts @@ -112,7 +112,7 @@ export const getAllDiscourseNodesSince = async ( ]`; //@ts-ignore - backend to be added to roamjs-components - const allNodes = (await window.roamAlphaAPI.data.backend.q( + const allNodes = (await window.roamAlphaAPI.data.async.q( query, sinceMs, )) as unknown as RoamDiscourseNodeData[]; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index fd9c489ed..f2095781e 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -153,7 +153,7 @@ const upsertNodeSchemaToContent = async ({ ] `; //@ts-ignore - backend to be added to roamjs-components - const result = (await window.roamAlphaAPI.data.backend.q( + const result = (await window.roamAlphaAPI.data.async.q( query, nodeTypesUids, )) as unknown as RoamDiscourseNodeData[]; From 5622c64e65a8204a913c82a5880e5fe476010afa Mon Sep 17 00:00:00 2001 From: sid597 Date: Thu, 21 Aug 2025 23:52:05 +0530 Subject: [PATCH 05/15] address review --- .../roam/src/utils/fetchEmbeddingsForNodes.ts | 67 -------------- .../src/utils/getAllDiscourseNodesSince.ts | 43 +++++---- apps/roam/src/utils/syncDgNodesToSupabase.ts | 75 ++++++---------- .../upsertNodesAsContentWithEmbeddings.ts | 87 ++++++++++--------- packages/database/package.json | 1 + 5 files changed, 95 insertions(+), 178 deletions(-) delete mode 100644 apps/roam/src/utils/fetchEmbeddingsForNodes.ts diff --git a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts b/apps/roam/src/utils/fetchEmbeddingsForNodes.ts deleted file mode 100644 index 8e3ace77e..000000000 --- a/apps/roam/src/utils/fetchEmbeddingsForNodes.ts +++ /dev/null @@ -1,67 +0,0 @@ -/* eslint-disable @typescript-eslint/naming-convention */ -import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; - -const EMBEDDING_BATCH_SIZE = 200; -const API_URL = `https://discoursegraphs.com/api/embeddings/openai/small`; -const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; - -type EmbeddingApiResponse = { - data: { - embedding: number[]; - }[]; -}; - -export const fetchEmbeddingsForNodes = async ( - nodes: RoamDiscourseNodeData[], -): Promise => { - const allEmbeddings: number[][] = []; - const allNodesTexts = nodes.map((node) => - node.node_title ? `${node.node_title} ${node.text}` : node.text, - ); - - for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { - const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); - const response = await fetch(API_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ input: batch }), - }); - - if (!response.ok) { - let errorData: { error: string }; - try { - errorData = (await response.json()) as { error: string }; - } catch (e) { - errorData = { - error: `Server responded with ${response.status}: ${await response.text()}`, - }; - } - throw new Error( - `API Error (${response.status}) processing batch ${ - i / EMBEDDING_BATCH_SIZE + 1 - }: ${errorData.error || "Failed to fetch embeddings"}`, - ); - } - - const data = (await response.json()) as EmbeddingApiResponse; - if (!data || !Array.isArray(data.data)) { - throw new Error( - `Invalid API response format for batch ${ - i / EMBEDDING_BATCH_SIZE + 1 - }. Expected 'data' array.`, - ); - } - const batchEmbeddings = data.data.map((item) => item.embedding); - allEmbeddings.push(...batchEmbeddings); - } - if (nodes.length !== allEmbeddings.length) { - throw new Error( - `Mismatch between nodes (${nodes.length}) and embeddings (${allEmbeddings.length})`, - ); - } - return nodes.map((node, i) => ({ - ...node, - model: EMBEDDING_MODEL, - vector: allEmbeddings[i], - })); -}; diff --git a/apps/roam/src/utils/getAllDiscourseNodesSince.ts b/apps/roam/src/utils/getAllDiscourseNodesSince.ts index 0c0d6b75a..bb72063a8 100644 --- a/apps/roam/src/utils/getAllDiscourseNodesSince.ts +++ b/apps/roam/src/utils/getAllDiscourseNodesSince.ts @@ -11,7 +11,6 @@ export type RoamDiscourseNodeData = { author_name: string; source_local_id: string; created: string; - vector: number[]; last_modified: string; text: string; type: string; @@ -70,8 +69,8 @@ export const getDiscourseNodeTypeWithSettingsBlockNodes = ( String(firstChildUid), String(node.type), sinceMs, - ) as unknown as Omit[]; - return blockNode.map((node) => ({ ...node, vector: [] })); + ) as unknown as RoamDiscourseNodeData[]; + return blockNode; }; export const getAllDiscourseNodesSince = async ( @@ -121,27 +120,27 @@ export const getAllDiscourseNodesSince = async ( const nodeTypesSet = new Set(nodeTypes.map((nodeType) => nodeType.type)); result.push( - ...allNodes - .map((entity) => { - if (!entity.source_local_id) { - return null; - } - const node = findDiscourseNode(entity.source_local_id, discourseNodes); - if ( - !node || - node.backedBy === "default" || - !entity.text || - entity.text.trim() === "" || - nodeTypesSet.has(node.type) - ) { - return null; - } - return { + ...allNodes.flatMap((entity) => { + if (!entity.source_local_id) { + return []; + } + const node = findDiscourseNode(entity.source_local_id, discourseNodes); + if ( + !node || + node.backedBy === "default" || + !entity.text || + entity.text.trim() === "" || + nodeTypesSet.has(node.type) + ) { + return []; + } + return [ + { ...entity, type: node.type, - }; - }) - .filter((n): n is RoamDiscourseNodeData => n !== null), + }, + ]; + }), ); return result; }; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index f2095781e..2feeee774 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -9,7 +9,6 @@ import { getSupabaseContext, SupabaseContext, } from "./supabaseContext"; -import { fetchEmbeddingsForNodes } from "./fetchEmbeddingsForNodes"; import { LocalContentDataInput } from "@repo/database/inputTypes"; import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import getDiscourseRelations from "./getDiscourseRelations"; @@ -24,13 +23,15 @@ import { import getDiscourseRelationTriples from "./getDiscourseRelationTriples"; import { OnloadArgs } from "roamjs-components/types"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; +import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; +import { Json } from "@repo/database/types.gen"; +import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; const SYNC_FUNCTION = "embedding"; const SYNC_INTERVAL = "45s"; const SYNC_TIMEOUT = "20s"; const BATCH_SIZE = 200; const DEFAULT_TIME = "1970-01-01"; -const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; type SyncTaskInfo = { lastUpdateTime: string | null; @@ -78,6 +79,15 @@ export const proposeSyncTask = async (): Promise => { }; } const worker = window.roamAlphaAPI.user.uid(); + if (!worker) { + console.error("proposeSyncTask: Unable to obtain user UID."); + return { + lastUpdateTime: null, + spaceId: 0, + worker: "", + shouldProceed: false, + }; + } const { data, error } = await supabaseClient.rpc("propose_sync_task", { s_target: context.spaceId, @@ -158,21 +168,12 @@ const upsertNodeSchemaToContent = async ({ nodeTypesUids, )) as unknown as RoamDiscourseNodeData[]; - const contentData: LocalContentDataInput[] = result.map((node) => ({ - author_id: userId, - account_local_id: node.author_local_id, - source_local_id: node.source_local_id, - created: new Date(node.created || Date.now()).toISOString(), - last_modified: new Date(node.last_modified || Date.now()).toISOString(), - text: node.text, - embedding_inline: { - model: EMBEDDING_MODEL, - vector: node.vector, - }, - scale: "document", - })); + const contentData: LocalContentDataInput[] = convertRoamNodeToLocalContent({ + nodes: result, + userId, + }); const { error } = await supabaseClient.rpc("upsert_content", { - data: contentData, + data: contentData as Json, v_space_id: spaceId, v_creator_id: userId, content_as_document: true, @@ -266,15 +267,19 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( supabaseClient: DGSupabaseClient, context: SupabaseContext, ): Promise => { - const { spaceId, userId } = context; + const { userId } = context; if (roamNodes.length === 0) { return; } + const allNodeInstancesAsLocalContent = convertRoamNodeToLocalContent({ + nodes: roamNodes, + userId: context.userId, + }); - let nodesWithEmbeddings: RoamDiscourseNodeData[]; + let nodesWithEmbeddings: LocalContentDataInput[]; try { - nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); + nodesWithEmbeddings = await fetchEmbeddingsForNodes(allNodeInstancesAsLocalContent); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.error( @@ -283,7 +288,7 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( return; } - if (nodesWithEmbeddings.length !== roamNodes.length) { + if (nodesWithEmbeddings.length !== allNodeInstancesAsLocalContent.length) { console.error( "upsertNodesToSupabaseAsContentWithEmbeddings: Mismatch between node and embedding counts.", ); @@ -298,37 +303,13 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( return chunks; }; - const uploadBatches = async (batches: RoamDiscourseNodeData[][]) => { + const uploadBatches = async (batches: LocalContentDataInput[][]) => { for (let idx = 0; idx < batches.length; idx++) { const batch = batches[idx]; - const contents: LocalContentDataInput[] = batch.map((node) => { - const variant = node.node_title ? "direct_and_description" : "direct"; - const text = node.node_title - ? `${node.node_title} ${node.text}` - : node.text; - - return { - author_id: userId, - account_local_id: node.author_local_id, - source_local_id: node.source_local_id, - created: new Date(node.created || Date.now()).toISOString(), - last_modified: new Date( - node.last_modified || Date.now(), - ).toISOString(), - text: text, - variant: variant, - embedding_inline: { - model: EMBEDDING_MODEL, - vector: node.vector, - }, - scale: "document", - }; - }); - const { error } = await supabaseClient.rpc("upsert_content", { - data: contents, - v_space_id: spaceId, + data: batch as Json, + v_space_id: context.spaceId, v_creator_id: userId, content_as_document: true, }); diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index d31f1367e..f00704201 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -4,9 +4,10 @@ import { SupabaseContext } from "./supabaseContext"; import { LocalContentDataInput } from "@repo/database/inputTypes"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; import { Json } from "@repo/database/types.gen"; +import { nextApiRoot } from "@repo/ui/lib/execContext"; const EMBEDDING_BATCH_SIZE = 200; -const API_URL = `https://discoursegraphs.com/api/embeddings/openai/small`; +const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; type EmbeddingApiResponse = { data: { @@ -14,22 +15,40 @@ type EmbeddingApiResponse = { }[]; }; +export const convertRoamNodeToLocalContent = ({ + nodes, + userId, +}: { + nodes: RoamDiscourseNodeData[]; + userId: number; +}): LocalContentDataInput[] => { + return nodes.map((node) => { + const variant = node.node_title ? "direct_and_description" : "direct"; + const text = node.node_title + ? `${node.node_title} ${node.text}` + : node.text; + return { + author_id: userId, + author_local_id: node.author_local_id, + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date(node.last_modified || Date.now()).toISOString(), + text: text, + variant: variant, + scale: "document", + }; + }); +}; + export const fetchEmbeddingsForNodes = async ( - nodes: RoamDiscourseNodeData[], -): Promise => { + nodes: LocalContentDataInput[], +): Promise => { const allEmbeddings: number[][] = []; - console.log("nodes", nodes); - const allNodesTexts = nodes.map((node) => - node.node_title ? `${node.node_title} ${node.text}` : node.text, - ); + const allNodesTexts = nodes.map((node) => node.text || ""); for (let i = 0; i < allNodesTexts.length; i += EMBEDDING_BATCH_SIZE) { const batch = allNodesTexts.slice(i, i + EMBEDDING_BATCH_SIZE); - console.log( - `fetchEmbeddingsForNodes: Fetching batch ${i / EMBEDDING_BATCH_SIZE + 1} of ${allNodesTexts.length / EMBEDDING_BATCH_SIZE}`, - ); - - const response = await fetch(API_URL, { + const response = await fetch(nextApiRoot() + "/embeddings/openai/small", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ input: batch }), @@ -67,46 +86,26 @@ export const fetchEmbeddingsForNodes = async ( `Mismatch between nodes (${nodes.length}) and embeddings (${allEmbeddings.length})`, ); } + return nodes.map((node, i) => ({ ...node, - model: "openai_text_embedding_3_small_1536", - vector: allEmbeddings[i], + embedding_inline: { + model: EMBEDDING_MODEL, + vector: allEmbeddings[i], + }, })); }; const uploadBatches = async ( - batches: RoamDiscourseNodeData[][], + batches: LocalContentDataInput[][], supabaseClient: DGSupabaseClient, context: SupabaseContext, ) => { const { spaceId, userId } = context; for (let idx = 0; idx < batches.length; idx++) { const batch = batches[idx]; - - const contents: LocalContentDataInput[] = batch.map((node) => { - const variant = node.node_title ? "direct_and_description" : "direct"; - const text = node.node_title - ? `${node.node_title} ${node.text}` - : node.text; - - return { - author_id: userId, - account_local_id: node.author_local_id, - source_local_id: node.source_local_id, - created: new Date(node.created || Date.now()).toISOString(), - last_modified: new Date(node.last_modified || Date.now()).toISOString(), - text: text, - variant: variant, - embedding_inline: { - model: "openai_text_embedding_3_small_1536", - vector: node.vector, - }, - scale: "document", - }; - }); - const { error } = await supabaseClient.rpc("upsert_content", { - data: contents as unknown as Json, + data: batch as unknown as Json, v_space_id: spaceId, v_creator_id: userId, content_as_document: true, @@ -124,7 +123,7 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( supabaseClient: DGSupabaseClient, context: SupabaseContext, ): Promise => { - if (!context) { + if (!context?.userId) { console.error("No Supabase context found."); return; } @@ -132,10 +131,14 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( if (roamNodes.length === 0) { return; } + const localContentNodes = convertRoamNodeToLocalContent({ + nodes: roamNodes, + userId: context.userId, + }); - let nodesWithEmbeddings: RoamDiscourseNodeData[]; + let nodesWithEmbeddings: LocalContentDataInput[]; try { - nodesWithEmbeddings = await fetchEmbeddingsForNodes(roamNodes); + nodesWithEmbeddings = await fetchEmbeddingsForNodes(localContentNodes); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); console.error( diff --git a/packages/database/package.json b/packages/database/package.json index 97fa9d2cf..c4e4bb2d7 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -7,6 +7,7 @@ "exports": { "./types.gen.ts": "./types.gen.ts", "./types.gen": "./types.gen.ts", + "./inputTypes": "./inputTypes.ts", "./dbDotEnv": "./dbDotEnv.ts" }, "scripts": { From e3f7352ff36ed87b171f21ad41a51ee369dc3e0a Mon Sep 17 00:00:00 2001 From: sid597 Date: Sat, 23 Aug 2025 00:00:29 +0530 Subject: [PATCH 06/15] bug fixes --- apps/roam/src/index.ts | 4 ++ apps/roam/src/utils/supabaseContext.ts | 7 ++-- apps/roam/src/utils/syncDgNodesToSupabase.ts | 37 ++----------------- .../upsertNodesAsContentWithEmbeddings.ts | 7 ++++ .../supabase/functions/create-space/index.ts | 17 +++++++-- 5 files changed, 31 insertions(+), 41 deletions(-) diff --git a/apps/roam/src/index.ts b/apps/roam/src/index.ts index 959ac3bb5..7e862999e 100644 --- a/apps/roam/src/index.ts +++ b/apps/roam/src/index.ts @@ -27,6 +27,7 @@ import { installDiscourseFloatingMenu, removeDiscourseFloatingMenu, } from "./components/DiscourseFloatingMenu"; +import { createOrUpdateDiscourseEmbedding } from "./utils/syncDgNodesToSupabase"; const initPostHog = () => { posthog.init("phc_SNMmBqwNfcEpNduQ41dBUjtGNEUEKAy6jTn63Fzsrax", { @@ -134,6 +135,9 @@ export default runExtension(async (onloadArgs) => { getDiscourseNodes: getDiscourseNodes, }; + // TODO: REMOVE AFTER TESTING + await createOrUpdateDiscourseEmbedding(onloadArgs.extensionAPI); + installDiscourseFloatingMenu(onloadArgs.extensionAPI); return { diff --git a/apps/roam/src/utils/supabaseContext.ts b/apps/roam/src/utils/supabaseContext.ts index 700126478..e66059f34 100644 --- a/apps/roam/src/utils/supabaseContext.ts +++ b/apps/roam/src/utils/supabaseContext.ts @@ -27,11 +27,10 @@ export type SupabaseContext = { let _contextCache: SupabaseContext | null = null; -const settingsConfigPageUid = getPageUidByPageTitle( - DISCOURSE_CONFIG_PAGE_TITLE, -); - const getOrCreateSpacePassword = () => { + const settingsConfigPageUid = getPageUidByPageTitle( + DISCOURSE_CONFIG_PAGE_TITLE, + ); const props = getBlockProps(settingsConfigPageUid); const existing: string | unknown = props["space-user-password"]; if (existing && typeof existing === "string") return existing; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index 2feeee774..07b767efb 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -11,16 +11,12 @@ import { } from "./supabaseContext"; import { LocalContentDataInput } from "@repo/database/inputTypes"; import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; -import getDiscourseRelations from "./getDiscourseRelations"; import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; import { discourseNodeBlockToLocalConcept, discourseNodeSchemaToLocalConcept, orderConceptsByDependency, - discourseRelationSchemaToLocalConcept, - discourseRelationDataToLocalConcept, } from "./conceptConversion"; -import getDiscourseRelationTriples from "./getDiscourseRelationTriples"; import { OnloadArgs } from "roamjs-components/types"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; @@ -208,16 +204,6 @@ export const convertDgToSupabaseConcepts = async ({ return discourseNodeSchemaToLocalConcept(context, node); }); - const relationSchemas = getDiscourseRelations(); - - const relationsToEmbed = relationSchemas.map((relation) => { - const localConcept = discourseRelationSchemaToLocalConcept( - context, - relation, - ); - return localConcept; - }); - const nodeBlockToLocalConcepts = nodesSince.map((node) => { const localConcept = discourseNodeBlockToLocalConcept(context, { nodeUid: node.source_local_id, @@ -227,28 +213,9 @@ export const convertDgToSupabaseConcepts = async ({ return localConcept; }); - const relationTriples = getDiscourseRelationTriples(); - const relationLabelToId = Object.fromEntries( - relationSchemas.map((r) => [r.label, r.id]), - ); - const relationBlockToLocalConcepts = relationTriples - .map(({ relation, source, target }) => { - const relationSchemaUid = relationLabelToId[relation]; - if (!relationSchemaUid) { - return null; - } - return discourseRelationDataToLocalConcept(context, relationSchemaUid, { - source, - target, - }); - }) - .filter((x): x is NonNullable => x !== null); - const conceptsToUpsert = [ ...nodesTypesToLocalConcepts, - ...relationsToEmbed, ...nodeBlockToLocalConcepts, - ...relationBlockToLocalConcepts, ]; const { ordered } = orderConceptsByDependency(conceptsToUpsert); const { error } = await supabaseClient.rpc("upsert_concepts", { @@ -279,7 +246,9 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( let nodesWithEmbeddings: LocalContentDataInput[]; try { - nodesWithEmbeddings = await fetchEmbeddingsForNodes(allNodeInstancesAsLocalContent); + nodesWithEmbeddings = await fetchEmbeddingsForNodes( + allNodeInstancesAsLocalContent, + ); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.error( diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index f00704201..610347f0d 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -5,6 +5,7 @@ import { LocalContentDataInput } from "@repo/database/inputTypes"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; import { Json } from "@repo/database/types.gen"; import { nextApiRoot } from "@repo/ui/lib/execContext"; +import getCurrentUserDisplayName from "roamjs-components/queries/getCurrentUserDisplayName"; const EMBEDDING_BATCH_SIZE = 200; const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; @@ -36,6 +37,12 @@ export const convertRoamNodeToLocalContent = ({ text: text, variant: variant, scale: "document", + document_inline: { + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date(node.last_modified || Date.now()).toISOString(), + author_local_id: node.author_local_id, + }, }; }); }; diff --git a/packages/database/supabase/functions/create-space/index.ts b/packages/database/supabase/functions/create-space/index.ts index ca4e31ddd..31e34bcbd 100644 --- a/packages/database/supabase/functions/create-space/index.ts +++ b/packages/database/supabase/functions/create-space/index.ts @@ -181,8 +181,19 @@ const processAndGetOrCreateSpace = async ( return result; }; +const corsHeaders = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Headers": + "authorization,x-client-info,apikey,content-type", +}; + // @ts-ignore Deno is not visible to the IDE Deno.serve(async (req) => { + // Handle pre-flight requests + if (req.method === "OPTIONS") { + return new Response("ok", { headers: corsHeaders }); + } + const input = await req.json(); // TODO: We should check whether the request comes from a vetted source, like // the roam or obsidian plugin. A combination of CSRF, headers, etc. @@ -193,7 +204,7 @@ Deno.serve(async (req) => { if (!url || !key) { return new Response("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY", { status: 500, - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); } const supabase: DGSupabaseClient = createClient(url, key); @@ -203,12 +214,12 @@ Deno.serve(async (req) => { const status = error.code === "invalid space" ? 400 : 500; return new Response(JSON.stringify(error), { status, - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); } return new Response(JSON.stringify(data), { - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); }); From 0f2938f48a88ae4daf2ede8743bb742678b2420d Mon Sep 17 00:00:00 2001 From: sid597 Date: Sat, 23 Aug 2025 00:00:29 +0530 Subject: [PATCH 07/15] bug fixes --- apps/roam/src/index.ts | 4 ++ apps/roam/src/utils/supabaseContext.ts | 7 ++-- apps/roam/src/utils/syncDgNodesToSupabase.ts | 37 ++----------------- .../upsertNodesAsContentWithEmbeddings.ts | 6 +++ .../supabase/functions/create-space/index.ts | 17 +++++++-- 5 files changed, 30 insertions(+), 41 deletions(-) diff --git a/apps/roam/src/index.ts b/apps/roam/src/index.ts index 959ac3bb5..7e862999e 100644 --- a/apps/roam/src/index.ts +++ b/apps/roam/src/index.ts @@ -27,6 +27,7 @@ import { installDiscourseFloatingMenu, removeDiscourseFloatingMenu, } from "./components/DiscourseFloatingMenu"; +import { createOrUpdateDiscourseEmbedding } from "./utils/syncDgNodesToSupabase"; const initPostHog = () => { posthog.init("phc_SNMmBqwNfcEpNduQ41dBUjtGNEUEKAy6jTn63Fzsrax", { @@ -134,6 +135,9 @@ export default runExtension(async (onloadArgs) => { getDiscourseNodes: getDiscourseNodes, }; + // TODO: REMOVE AFTER TESTING + await createOrUpdateDiscourseEmbedding(onloadArgs.extensionAPI); + installDiscourseFloatingMenu(onloadArgs.extensionAPI); return { diff --git a/apps/roam/src/utils/supabaseContext.ts b/apps/roam/src/utils/supabaseContext.ts index 700126478..e66059f34 100644 --- a/apps/roam/src/utils/supabaseContext.ts +++ b/apps/roam/src/utils/supabaseContext.ts @@ -27,11 +27,10 @@ export type SupabaseContext = { let _contextCache: SupabaseContext | null = null; -const settingsConfigPageUid = getPageUidByPageTitle( - DISCOURSE_CONFIG_PAGE_TITLE, -); - const getOrCreateSpacePassword = () => { + const settingsConfigPageUid = getPageUidByPageTitle( + DISCOURSE_CONFIG_PAGE_TITLE, + ); const props = getBlockProps(settingsConfigPageUid); const existing: string | unknown = props["space-user-password"]; if (existing && typeof existing === "string") return existing; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index 2feeee774..07b767efb 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -11,16 +11,12 @@ import { } from "./supabaseContext"; import { LocalContentDataInput } from "@repo/database/inputTypes"; import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; -import getDiscourseRelations from "./getDiscourseRelations"; import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; import { discourseNodeBlockToLocalConcept, discourseNodeSchemaToLocalConcept, orderConceptsByDependency, - discourseRelationSchemaToLocalConcept, - discourseRelationDataToLocalConcept, } from "./conceptConversion"; -import getDiscourseRelationTriples from "./getDiscourseRelationTriples"; import { OnloadArgs } from "roamjs-components/types"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; @@ -208,16 +204,6 @@ export const convertDgToSupabaseConcepts = async ({ return discourseNodeSchemaToLocalConcept(context, node); }); - const relationSchemas = getDiscourseRelations(); - - const relationsToEmbed = relationSchemas.map((relation) => { - const localConcept = discourseRelationSchemaToLocalConcept( - context, - relation, - ); - return localConcept; - }); - const nodeBlockToLocalConcepts = nodesSince.map((node) => { const localConcept = discourseNodeBlockToLocalConcept(context, { nodeUid: node.source_local_id, @@ -227,28 +213,9 @@ export const convertDgToSupabaseConcepts = async ({ return localConcept; }); - const relationTriples = getDiscourseRelationTriples(); - const relationLabelToId = Object.fromEntries( - relationSchemas.map((r) => [r.label, r.id]), - ); - const relationBlockToLocalConcepts = relationTriples - .map(({ relation, source, target }) => { - const relationSchemaUid = relationLabelToId[relation]; - if (!relationSchemaUid) { - return null; - } - return discourseRelationDataToLocalConcept(context, relationSchemaUid, { - source, - target, - }); - }) - .filter((x): x is NonNullable => x !== null); - const conceptsToUpsert = [ ...nodesTypesToLocalConcepts, - ...relationsToEmbed, ...nodeBlockToLocalConcepts, - ...relationBlockToLocalConcepts, ]; const { ordered } = orderConceptsByDependency(conceptsToUpsert); const { error } = await supabaseClient.rpc("upsert_concepts", { @@ -279,7 +246,9 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( let nodesWithEmbeddings: LocalContentDataInput[]; try { - nodesWithEmbeddings = await fetchEmbeddingsForNodes(allNodeInstancesAsLocalContent); + nodesWithEmbeddings = await fetchEmbeddingsForNodes( + allNodeInstancesAsLocalContent, + ); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.error( diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index f00704201..dbebc9572 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -36,6 +36,12 @@ export const convertRoamNodeToLocalContent = ({ text: text, variant: variant, scale: "document", + document_inline: { + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date(node.last_modified || Date.now()).toISOString(), + author_local_id: node.author_local_id, + }, }; }); }; diff --git a/packages/database/supabase/functions/create-space/index.ts b/packages/database/supabase/functions/create-space/index.ts index ca4e31ddd..31e34bcbd 100644 --- a/packages/database/supabase/functions/create-space/index.ts +++ b/packages/database/supabase/functions/create-space/index.ts @@ -181,8 +181,19 @@ const processAndGetOrCreateSpace = async ( return result; }; +const corsHeaders = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Headers": + "authorization,x-client-info,apikey,content-type", +}; + // @ts-ignore Deno is not visible to the IDE Deno.serve(async (req) => { + // Handle pre-flight requests + if (req.method === "OPTIONS") { + return new Response("ok", { headers: corsHeaders }); + } + const input = await req.json(); // TODO: We should check whether the request comes from a vetted source, like // the roam or obsidian plugin. A combination of CSRF, headers, etc. @@ -193,7 +204,7 @@ Deno.serve(async (req) => { if (!url || !key) { return new Response("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY", { status: 500, - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); } const supabase: DGSupabaseClient = createClient(url, key); @@ -203,12 +214,12 @@ Deno.serve(async (req) => { const status = error.code === "invalid space" ? 400 : 500; return new Response(JSON.stringify(error), { status, - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); } return new Response(JSON.stringify(data), { - headers: { "Content-Type": "application/json" }, + headers: { "Content-Type": "application/json", ...corsHeaders }, }); }); From 4422b0a5ca28f3fea2e75f0e203ac279e7559f4b Mon Sep 17 00:00:00 2001 From: sid597 Date: Sat, 23 Aug 2025 00:04:29 +0530 Subject: [PATCH 08/15] unused import --- apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index 610347f0d..dbebc9572 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -5,7 +5,6 @@ import { LocalContentDataInput } from "@repo/database/inputTypes"; import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; import { Json } from "@repo/database/types.gen"; import { nextApiRoot } from "@repo/ui/lib/execContext"; -import getCurrentUserDisplayName from "roamjs-components/queries/getCurrentUserDisplayName"; const EMBEDDING_BATCH_SIZE = 200; const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; From ec8109d39559078fbdf979ca5c8c03e476349458 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 23 Aug 2025 10:21:30 -0400 Subject: [PATCH 09/15] Add an input type for platform accounts Add functions to upsert platform accounts (individually or in bulk) Keep the old create_account_in_space function as a shim for now. Use the new upsert_account in upsert_documents and upsert_content, allowing for more complete inline information. --- .../20250823135620_bulk_account_upsert.sql | 351 ++++++++++++++++++ .../database/supabase/schemas/account.sql | 77 +++- .../database/supabase/schemas/content.sql | 32 +- packages/database/types.gen.ts | 30 +- 4 files changed, 461 insertions(+), 29 deletions(-) create mode 100644 packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql diff --git a/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql new file mode 100644 index 000000000..b48fcdc10 --- /dev/null +++ b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql @@ -0,0 +1,351 @@ +CREATE TYPE public.account_local_input AS ( +-- PlatformAccount columns +name VARCHAR, +account_local_id VARCHAR, +-- local values +email VARCHAR, +email_trusted BOOLEAN, +space_editor BOOLEAN +) ; + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( + space_id_ BIGINT, + local_account public.account_local_input +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + INSERT INTO public."PlatformAccount" AS pa ( + account_local_id, name, platform + ) VALUES ( + local_account.account_local_id, local_account.name, platform_ + ) ON CONFLICT (account_local_id, platform) DO UPDATE SET + name = coalesce(local_account.name, pa.name) + RETURNING id INTO STRICT account_id_; + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) + ON CONFLICT (space_id, account_id) + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); + END IF; + RETURN account_id_; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space ( +space_id_ BIGINT, +account_local_id_ varchar, +name_ varchar, +email_ varchar = null, +email_trusted boolean = true, +editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$ ; + +ALTER TYPE public.document_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; + +ALTER TYPE public.content_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; +ALTER TYPE public.content_local_input ALTER ATTRIBUTE creator_inline TYPE public.account_local_input ; + +CREATE OR REPLACE FUNCTION public._local_document_to_db_document(data public.document_local_input) +RETURNS public."Document" LANGUAGE plpgsql STABLE +SET search_path = '' +AS $$ +DECLARE + document public."Document"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + document := jsonb_populate_record(NULL::public."Document", to_jsonb(data)); + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO document.space_id; + END IF; + -- now avoid null defaults + IF document.metadata IS NULL then + document.metadata := '{}'; + END IF; + RETURN document; +END; +$$; + +CREATE OR REPLACE FUNCTION public._local_content_to_db_content(data public.content_local_input) +RETURNS public."Content" STABLE +SET search_path = '' +LANGUAGE plpgsql AS $$ +DECLARE + content public."Content"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + content := jsonb_populate_record(NULL::public."Content", to_jsonb(data)); + IF data.document_local_id IS NOT NULL THEN + SELECT id FROM public."Document" + WHERE source_local_id = data.document_local_id INTO content.document_id; + END IF; + IF data.creator_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; + END IF; + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; + END IF; + IF data.part_of_local_id IS NOT NULL THEN + SELECT id FROM public."Content" + WHERE source_local_id = data.part_of_local_id INTO content.part_of_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO content.space_id; + END IF; + -- now avoid null defaults + IF content.metadata IS NULL then + content.metadata := '{}'; + END IF; + RETURN content; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_documents(v_space_id bigint, data jsonb) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + local_document public.document_local_input; + db_document public."Document"%ROWTYPE; + document_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR document_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_document := jsonb_populate_record(NULL::public.document_local_input, document_row); + local_document.space_id := v_space_id; + IF account_local_id(author_inline(local_document)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_document)), + name(author_inline(local_document)) + ) INTO STRICT upsert_id; + local_document.author_id := upsert_id; + END IF; + db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + db_document.space_id, + db_document.source_local_id, + db_document.url, + db_document.created, + db_document.metadata, + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + created = COALESCE(db_document.created, EXCLUDED.created), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + url = COALESCE(db_document.url, EXCLUDED.url), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata) + RETURNING id INTO STRICT upsert_id; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + db_document public."Document"%ROWTYPE; + document_id BIGINT; + local_content public.content_local_input; + db_content public."Content"%ROWTYPE; + content_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR content_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_content := jsonb_populate_record(NULL::public.content_local_input, content_row); + local_content.space_id := v_space_id; + db_content := public._local_content_to_db_content(local_content); + IF account_local_id(author_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_content)), + name(author_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.author_id := upsert_id; + END IF; + IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(creator_inline(local_content)), + name(creator_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.creator_id := upsert_id; + END IF; + IF content_as_document THEN + db_content.scale = 'document'; + END IF; + IF content_as_document AND document_id(db_content) IS NULL AND source_local_id(document_inline(local_content)) IS NULL THEN + local_content.document_inline.space_id := v_space_id; + local_content.document_inline.source_local_id := db_content.source_local_id; + local_content.document_inline.last_modified := db_content.last_modified; + local_content.document_inline.created := db_content.created; + local_content.document_inline.author_id := db_content.author_id; + END IF; + IF source_local_id(document_inline(local_content)) IS NOT NULL THEN + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + COALESCE(db_document.space_id, v_space_id), + db_document.source_local_id, + db_document.url, + db_document.created, + COALESCE(db_document.metadata, '{}'::jsonb), + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + url = COALESCE(db_document.url, EXCLUDED.url), + created = COALESCE(db_document.created, EXCLUDED.created), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + contents = COALESCE(db_document.contents, EXCLUDED.contents) + RETURNING id INTO STRICT document_id; + db_content.document_id := document_id; + END IF; + INSERT INTO public."Content" ( + document_id, + source_local_id, + variant, + author_id, + creator_id, + created, + text, + metadata, + scale, + space_id, + last_modified, + part_of_id + ) VALUES ( + db_content.document_id, + db_content.source_local_id, + COALESCE(db_content.variant, 'direct'::public."ContentVariant"), + db_content.author_id, + db_content.creator_id, + db_content.created, + db_content.text, + COALESCE(db_content.metadata, '{}'::jsonb), + db_content.scale, + db_content.space_id, + db_content.last_modified, + db_content.part_of_id + ) + ON CONFLICT (space_id, source_local_id, variant) DO UPDATE SET + document_id = COALESCE(db_content.document_id, EXCLUDED.document_id), + author_id = COALESCE(db_content.author_id, EXCLUDED.author_id), + creator_id = COALESCE(db_content.creator_id, EXCLUDED.creator_id), + created = COALESCE(db_content.created, EXCLUDED.created), + text = COALESCE(db_content.text, EXCLUDED.text), + metadata = COALESCE(db_content.metadata, EXCLUDED.metadata), + scale = COALESCE(db_content.scale, EXCLUDED.scale), + last_modified = COALESCE(db_content.last_modified, EXCLUDED.last_modified), + part_of_id = COALESCE(db_content.part_of_id, EXCLUDED.part_of_id) + RETURNING id INTO STRICT upsert_id; + IF model(embedding_inline(local_content)) IS NOT NULL THEN + PERFORM public.upsert_content_embedding(upsert_id, model(embedding_inline(local_content)), vector(embedding_inline(local_content))); + END IF; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql index d06f765ce..5920df38c 100644 --- a/packages/database/supabase/schemas/account.sql +++ b/packages/database/supabase/schemas/account.sql @@ -98,13 +98,19 @@ GRANT ALL ON TABLE public."SpaceAccess" TO anon; GRANT ALL ON TABLE public."SpaceAccess" TO authenticated; GRANT ALL ON TABLE public."SpaceAccess" TO service_role; -CREATE OR REPLACE FUNCTION public.create_account_in_space( +CREATE TYPE public.account_local_input AS ( + -- PlatformAccount columns + name VARCHAR, + account_local_id VARCHAR, + -- local values + email VARCHAR, + email_trusted BOOLEAN, + space_editor BOOLEAN +); + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( space_id_ BIGINT, - account_local_id_ varchar, - name_ varchar, - email_ varchar = null, - email_trusted boolean = true, - editor_ boolean = true + local_account public.account_local_input ) RETURNS BIGINT SECURITY DEFINER SET search_path = '' @@ -114,26 +120,67 @@ DECLARE platform_ public."Platform"; account_id_ BIGINT; BEGIN - SELECT platform INTO platform_ STRICT FROM public."Space" WHERE id = space_id_; + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; INSERT INTO public."PlatformAccount" AS pa ( account_local_id, name, platform ) VALUES ( - account_local_id_, name_, platform_ + local_account.account_local_id, local_account.name, platform_ ) ON CONFLICT (account_local_id, platform) DO UPDATE SET - name = coalesce(name_, pa.name) + name = coalesce(local_account.name, pa.name) RETURNING id INTO STRICT account_id_; - INSERT INTO public."SpaceAccess" (space_id, account_id, editor) values (space_id_, account_id_, editor_) + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) ON CONFLICT (space_id, account_id) - DO UPDATE SET editor = editor_; - IF email_ IS NOT NULL THEN - INSERT INTO public."AgentIdentifier" (account_id, value, identifier_type, trusted) VALUES (account_id_, email_, 'email', email_trusted) - ON CONFLICT (value, identifier_type, account_id) - DO UPDATE SET trusted = email_trusted; + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); END IF; RETURN account_id_; END; $$; +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space( + space_id_ BIGINT, + account_local_id_ varchar, + name_ varchar, + email_ varchar = null, + email_trusted boolean = true, + editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$; + + CREATE OR REPLACE FUNCTION public.my_account(account_id BIGINT) RETURNS boolean STABLE SECURITY DEFINER SET search_path = '' diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index cde97f9cc..8bc6382c4 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -175,7 +175,7 @@ CREATE TYPE public.document_local_input AS ( author_local_id character varying, space_url character varying, -- inline values - author_inline public."PlatformAccount" + author_inline public.account_local_input ); CREATE TYPE public.inline_embedding_input AS ( @@ -187,7 +187,6 @@ CREATE TYPE public.content_local_input AS ( -- content columns document_id bigint, source_local_id character varying, - variant public."ContentVariant", author_id bigint, creator_id bigint, created timestamp without time zone, @@ -205,9 +204,10 @@ CREATE TYPE public.content_local_input AS ( space_url character varying, -- inline values document_inline public.document_local_input, - author_inline public."PlatformAccount", - creator_inline public."PlatformAccount", - embedding_inline public.inline_embedding_input + author_inline public.account_local_input, + creator_inline public.account_local_input, + embedding_inline public.inline_embedding_input, + variant public."ContentVariant" ); @@ -228,6 +228,9 @@ BEGIN IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; END IF; IF data.space_url IS NOT NULL THEN SELECT id FROM public."Space" @@ -264,10 +267,16 @@ BEGIN IF data.creator_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; END IF; IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; END IF; IF data.part_of_local_id IS NOT NULL THEN SELECT id FROM public."Content" @@ -315,6 +324,9 @@ BEGIN local_document.author_id := upsert_id; END IF; db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, @@ -374,6 +386,7 @@ COMMENT ON FUNCTION public.upsert_content_embedding IS 'single content embedding -- This may trigger creation of PlatformAccounts and Documents appropriately. CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) RETURNS SETOF BIGINT +SET search_path = '' LANGUAGE plpgsql AS $$ DECLARE @@ -397,7 +410,7 @@ BEGIN account_local_id(author_inline(local_content)), name(author_inline(local_content)) ) INTO STRICT upsert_id; - local_content.author_id := upsert_id; + db_content.author_id := upsert_id; END IF; IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN SELECT public.create_account_in_space( @@ -405,7 +418,7 @@ BEGIN account_local_id(creator_inline(local_content)), name(creator_inline(local_content)) ) INTO STRICT upsert_id; - local_content.creator_id := upsert_id; + db_content.creator_id := upsert_id; END IF; IF content_as_document THEN db_content.scale = 'document'; @@ -418,7 +431,10 @@ BEGIN local_content.document_inline.author_id := db_content.author_id; END IF; IF source_local_id(document_inline(local_content)) IS NOT NULL THEN - db_document := _local_document_to_db_document(document_inline(local_content)); + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts index 372814d9d..3ace923cf 100644 --- a/packages/database/types.gen.ts +++ b/packages/database/types.gen.ts @@ -639,10 +639,10 @@ export type Database = { query_embedding: string } Returns: { + content_id: number + roam_uid: string similarity: number text_content: string - roam_uid: string - content_id: number }[] } match_embeddings_for_subset_nodes: { @@ -650,8 +650,8 @@ export type Database = { Returns: { content_id: number roam_uid: string - text_content: string similarity: number + text_content: string }[] } my_account: { @@ -672,6 +672,17 @@ export type Database = { Args: { p_account_id: number } Returns: boolean } + upsert_account_in_space: { + Args: { + local_account: Database["public"]["CompositeTypes"]["account_local_input"] + space_id_: number + } + Returns: number + } + upsert_accounts_in_space: { + Args: { accounts: Json; space_id_: number } + Returns: number[] + } upsert_concepts: { Args: { data: Json; v_space_id: number } Returns: number[] @@ -745,6 +756,13 @@ export type Database = { task_status: "active" | "timeout" | "complete" | "failed" } CompositeTypes: { + account_local_input: { + name: string | null + account_local_id: string | null + email: string | null + email_trusted: boolean | null + space_editor: boolean | null + } concept_local_input: { epistemic_status: Database["public"]["Enums"]["EpistemicStatus"] | null name: string | null @@ -785,10 +803,10 @@ export type Database = { | Database["public"]["CompositeTypes"]["document_local_input"] | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null creator_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null embedding_inline: | Database["public"]["CompositeTypes"]["inline_embedding_input"] @@ -807,7 +825,7 @@ export type Database = { author_local_id: string | null space_url: string | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null } inline_embedding_input: { From bdb01808bb9471dc83429d16b6a3bc297a5c55ec Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 23 Aug 2025 10:21:30 -0400 Subject: [PATCH 10/15] Add an input type for platform accounts Add functions to upsert platform accounts (individually or in bulk) Keep the old create_account_in_space function as a shim for now. Use the new upsert_account in upsert_documents and upsert_content, allowing for more complete inline information. --- packages/database/src/dbTypes.ts | 24 +- .../20250823135620_bulk_account_upsert.sql | 351 ++++++++++++++++++ .../database/supabase/schemas/account.sql | 77 +++- .../database/supabase/schemas/content.sql | 32 +- 4 files changed, 458 insertions(+), 26 deletions(-) create mode 100644 packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql diff --git a/packages/database/src/dbTypes.ts b/packages/database/src/dbTypes.ts index 172fecf34..05b113b06 100644 --- a/packages/database/src/dbTypes.ts +++ b/packages/database/src/dbTypes.ts @@ -673,6 +673,17 @@ export type Database = { Args: { p_account_id: number } Returns: boolean } + upsert_account_in_space: { + Args: { + local_account: Database["public"]["CompositeTypes"]["account_local_input"] + space_id_: number + } + Returns: number + } + upsert_accounts_in_space: { + Args: { accounts: Json; space_id_: number } + Returns: number[] + } upsert_concepts: { Args: { data: Json; v_space_id: number } Returns: number[] @@ -746,6 +757,13 @@ export type Database = { task_status: "active" | "timeout" | "complete" | "failed" } CompositeTypes: { + account_local_input: { + name: string | null + account_local_id: string | null + email: string | null + email_trusted: boolean | null + space_editor: boolean | null + } concept_local_input: { epistemic_status: Database["public"]["Enums"]["EpistemicStatus"] | null name: string | null @@ -786,10 +804,10 @@ export type Database = { | Database["public"]["CompositeTypes"]["document_local_input"] | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null creator_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null embedding_inline: | Database["public"]["CompositeTypes"]["inline_embedding_input"] @@ -808,7 +826,7 @@ export type Database = { author_local_id: string | null space_url: string | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null } inline_embedding_input: { diff --git a/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql new file mode 100644 index 000000000..b48fcdc10 --- /dev/null +++ b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql @@ -0,0 +1,351 @@ +CREATE TYPE public.account_local_input AS ( +-- PlatformAccount columns +name VARCHAR, +account_local_id VARCHAR, +-- local values +email VARCHAR, +email_trusted BOOLEAN, +space_editor BOOLEAN +) ; + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( + space_id_ BIGINT, + local_account public.account_local_input +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + INSERT INTO public."PlatformAccount" AS pa ( + account_local_id, name, platform + ) VALUES ( + local_account.account_local_id, local_account.name, platform_ + ) ON CONFLICT (account_local_id, platform) DO UPDATE SET + name = coalesce(local_account.name, pa.name) + RETURNING id INTO STRICT account_id_; + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) + ON CONFLICT (space_id, account_id) + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); + END IF; + RETURN account_id_; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space ( +space_id_ BIGINT, +account_local_id_ varchar, +name_ varchar, +email_ varchar = null, +email_trusted boolean = true, +editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$ ; + +ALTER TYPE public.document_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; + +ALTER TYPE public.content_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; +ALTER TYPE public.content_local_input ALTER ATTRIBUTE creator_inline TYPE public.account_local_input ; + +CREATE OR REPLACE FUNCTION public._local_document_to_db_document(data public.document_local_input) +RETURNS public."Document" LANGUAGE plpgsql STABLE +SET search_path = '' +AS $$ +DECLARE + document public."Document"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + document := jsonb_populate_record(NULL::public."Document", to_jsonb(data)); + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO document.space_id; + END IF; + -- now avoid null defaults + IF document.metadata IS NULL then + document.metadata := '{}'; + END IF; + RETURN document; +END; +$$; + +CREATE OR REPLACE FUNCTION public._local_content_to_db_content(data public.content_local_input) +RETURNS public."Content" STABLE +SET search_path = '' +LANGUAGE plpgsql AS $$ +DECLARE + content public."Content"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + content := jsonb_populate_record(NULL::public."Content", to_jsonb(data)); + IF data.document_local_id IS NOT NULL THEN + SELECT id FROM public."Document" + WHERE source_local_id = data.document_local_id INTO content.document_id; + END IF; + IF data.creator_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; + END IF; + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; + END IF; + IF data.part_of_local_id IS NOT NULL THEN + SELECT id FROM public."Content" + WHERE source_local_id = data.part_of_local_id INTO content.part_of_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO content.space_id; + END IF; + -- now avoid null defaults + IF content.metadata IS NULL then + content.metadata := '{}'; + END IF; + RETURN content; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_documents(v_space_id bigint, data jsonb) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + local_document public.document_local_input; + db_document public."Document"%ROWTYPE; + document_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR document_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_document := jsonb_populate_record(NULL::public.document_local_input, document_row); + local_document.space_id := v_space_id; + IF account_local_id(author_inline(local_document)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_document)), + name(author_inline(local_document)) + ) INTO STRICT upsert_id; + local_document.author_id := upsert_id; + END IF; + db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + db_document.space_id, + db_document.source_local_id, + db_document.url, + db_document.created, + db_document.metadata, + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + created = COALESCE(db_document.created, EXCLUDED.created), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + url = COALESCE(db_document.url, EXCLUDED.url), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata) + RETURNING id INTO STRICT upsert_id; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + db_document public."Document"%ROWTYPE; + document_id BIGINT; + local_content public.content_local_input; + db_content public."Content"%ROWTYPE; + content_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR content_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_content := jsonb_populate_record(NULL::public.content_local_input, content_row); + local_content.space_id := v_space_id; + db_content := public._local_content_to_db_content(local_content); + IF account_local_id(author_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_content)), + name(author_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.author_id := upsert_id; + END IF; + IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(creator_inline(local_content)), + name(creator_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.creator_id := upsert_id; + END IF; + IF content_as_document THEN + db_content.scale = 'document'; + END IF; + IF content_as_document AND document_id(db_content) IS NULL AND source_local_id(document_inline(local_content)) IS NULL THEN + local_content.document_inline.space_id := v_space_id; + local_content.document_inline.source_local_id := db_content.source_local_id; + local_content.document_inline.last_modified := db_content.last_modified; + local_content.document_inline.created := db_content.created; + local_content.document_inline.author_id := db_content.author_id; + END IF; + IF source_local_id(document_inline(local_content)) IS NOT NULL THEN + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + COALESCE(db_document.space_id, v_space_id), + db_document.source_local_id, + db_document.url, + db_document.created, + COALESCE(db_document.metadata, '{}'::jsonb), + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + url = COALESCE(db_document.url, EXCLUDED.url), + created = COALESCE(db_document.created, EXCLUDED.created), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + contents = COALESCE(db_document.contents, EXCLUDED.contents) + RETURNING id INTO STRICT document_id; + db_content.document_id := document_id; + END IF; + INSERT INTO public."Content" ( + document_id, + source_local_id, + variant, + author_id, + creator_id, + created, + text, + metadata, + scale, + space_id, + last_modified, + part_of_id + ) VALUES ( + db_content.document_id, + db_content.source_local_id, + COALESCE(db_content.variant, 'direct'::public."ContentVariant"), + db_content.author_id, + db_content.creator_id, + db_content.created, + db_content.text, + COALESCE(db_content.metadata, '{}'::jsonb), + db_content.scale, + db_content.space_id, + db_content.last_modified, + db_content.part_of_id + ) + ON CONFLICT (space_id, source_local_id, variant) DO UPDATE SET + document_id = COALESCE(db_content.document_id, EXCLUDED.document_id), + author_id = COALESCE(db_content.author_id, EXCLUDED.author_id), + creator_id = COALESCE(db_content.creator_id, EXCLUDED.creator_id), + created = COALESCE(db_content.created, EXCLUDED.created), + text = COALESCE(db_content.text, EXCLUDED.text), + metadata = COALESCE(db_content.metadata, EXCLUDED.metadata), + scale = COALESCE(db_content.scale, EXCLUDED.scale), + last_modified = COALESCE(db_content.last_modified, EXCLUDED.last_modified), + part_of_id = COALESCE(db_content.part_of_id, EXCLUDED.part_of_id) + RETURNING id INTO STRICT upsert_id; + IF model(embedding_inline(local_content)) IS NOT NULL THEN + PERFORM public.upsert_content_embedding(upsert_id, model(embedding_inline(local_content)), vector(embedding_inline(local_content))); + END IF; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql index d06f765ce..5920df38c 100644 --- a/packages/database/supabase/schemas/account.sql +++ b/packages/database/supabase/schemas/account.sql @@ -98,13 +98,19 @@ GRANT ALL ON TABLE public."SpaceAccess" TO anon; GRANT ALL ON TABLE public."SpaceAccess" TO authenticated; GRANT ALL ON TABLE public."SpaceAccess" TO service_role; -CREATE OR REPLACE FUNCTION public.create_account_in_space( +CREATE TYPE public.account_local_input AS ( + -- PlatformAccount columns + name VARCHAR, + account_local_id VARCHAR, + -- local values + email VARCHAR, + email_trusted BOOLEAN, + space_editor BOOLEAN +); + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( space_id_ BIGINT, - account_local_id_ varchar, - name_ varchar, - email_ varchar = null, - email_trusted boolean = true, - editor_ boolean = true + local_account public.account_local_input ) RETURNS BIGINT SECURITY DEFINER SET search_path = '' @@ -114,26 +120,67 @@ DECLARE platform_ public."Platform"; account_id_ BIGINT; BEGIN - SELECT platform INTO platform_ STRICT FROM public."Space" WHERE id = space_id_; + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; INSERT INTO public."PlatformAccount" AS pa ( account_local_id, name, platform ) VALUES ( - account_local_id_, name_, platform_ + local_account.account_local_id, local_account.name, platform_ ) ON CONFLICT (account_local_id, platform) DO UPDATE SET - name = coalesce(name_, pa.name) + name = coalesce(local_account.name, pa.name) RETURNING id INTO STRICT account_id_; - INSERT INTO public."SpaceAccess" (space_id, account_id, editor) values (space_id_, account_id_, editor_) + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) ON CONFLICT (space_id, account_id) - DO UPDATE SET editor = editor_; - IF email_ IS NOT NULL THEN - INSERT INTO public."AgentIdentifier" (account_id, value, identifier_type, trusted) VALUES (account_id_, email_, 'email', email_trusted) - ON CONFLICT (value, identifier_type, account_id) - DO UPDATE SET trusted = email_trusted; + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); END IF; RETURN account_id_; END; $$; +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space( + space_id_ BIGINT, + account_local_id_ varchar, + name_ varchar, + email_ varchar = null, + email_trusted boolean = true, + editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$; + + CREATE OR REPLACE FUNCTION public.my_account(account_id BIGINT) RETURNS boolean STABLE SECURITY DEFINER SET search_path = '' diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index cde97f9cc..8bc6382c4 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -175,7 +175,7 @@ CREATE TYPE public.document_local_input AS ( author_local_id character varying, space_url character varying, -- inline values - author_inline public."PlatformAccount" + author_inline public.account_local_input ); CREATE TYPE public.inline_embedding_input AS ( @@ -187,7 +187,6 @@ CREATE TYPE public.content_local_input AS ( -- content columns document_id bigint, source_local_id character varying, - variant public."ContentVariant", author_id bigint, creator_id bigint, created timestamp without time zone, @@ -205,9 +204,10 @@ CREATE TYPE public.content_local_input AS ( space_url character varying, -- inline values document_inline public.document_local_input, - author_inline public."PlatformAccount", - creator_inline public."PlatformAccount", - embedding_inline public.inline_embedding_input + author_inline public.account_local_input, + creator_inline public.account_local_input, + embedding_inline public.inline_embedding_input, + variant public."ContentVariant" ); @@ -228,6 +228,9 @@ BEGIN IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; END IF; IF data.space_url IS NOT NULL THEN SELECT id FROM public."Space" @@ -264,10 +267,16 @@ BEGIN IF data.creator_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; END IF; IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; END IF; IF data.part_of_local_id IS NOT NULL THEN SELECT id FROM public."Content" @@ -315,6 +324,9 @@ BEGIN local_document.author_id := upsert_id; END IF; db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, @@ -374,6 +386,7 @@ COMMENT ON FUNCTION public.upsert_content_embedding IS 'single content embedding -- This may trigger creation of PlatformAccounts and Documents appropriately. CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) RETURNS SETOF BIGINT +SET search_path = '' LANGUAGE plpgsql AS $$ DECLARE @@ -397,7 +410,7 @@ BEGIN account_local_id(author_inline(local_content)), name(author_inline(local_content)) ) INTO STRICT upsert_id; - local_content.author_id := upsert_id; + db_content.author_id := upsert_id; END IF; IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN SELECT public.create_account_in_space( @@ -405,7 +418,7 @@ BEGIN account_local_id(creator_inline(local_content)), name(creator_inline(local_content)) ) INTO STRICT upsert_id; - local_content.creator_id := upsert_id; + db_content.creator_id := upsert_id; END IF; IF content_as_document THEN db_content.scale = 'document'; @@ -418,7 +431,10 @@ BEGIN local_content.document_inline.author_id := db_content.author_id; END IF; IF source_local_id(document_inline(local_content)) IS NOT NULL THEN - db_document := _local_document_to_db_document(document_inline(local_content)); + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, From 18a0d92adb0a0031f042dd4a05603c1dad78540d Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 23 Aug 2025 10:21:30 -0400 Subject: [PATCH 11/15] Add an input type for platform accounts Add functions to upsert platform accounts (individually or in bulk) Keep the old create_account_in_space function as a shim for now. Use the new upsert_account in upsert_documents and upsert_content, allowing for more complete inline information. --- packages/database/src/dbTypes.ts | 24 +- .../20250823135620_bulk_account_upsert.sql | 351 ++++++++++++++++++ .../database/supabase/schemas/account.sql | 77 +++- .../database/supabase/schemas/content.sql | 32 +- 4 files changed, 458 insertions(+), 26 deletions(-) create mode 100644 packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql diff --git a/packages/database/src/dbTypes.ts b/packages/database/src/dbTypes.ts index 172fecf34..05b113b06 100644 --- a/packages/database/src/dbTypes.ts +++ b/packages/database/src/dbTypes.ts @@ -673,6 +673,17 @@ export type Database = { Args: { p_account_id: number } Returns: boolean } + upsert_account_in_space: { + Args: { + local_account: Database["public"]["CompositeTypes"]["account_local_input"] + space_id_: number + } + Returns: number + } + upsert_accounts_in_space: { + Args: { accounts: Json; space_id_: number } + Returns: number[] + } upsert_concepts: { Args: { data: Json; v_space_id: number } Returns: number[] @@ -746,6 +757,13 @@ export type Database = { task_status: "active" | "timeout" | "complete" | "failed" } CompositeTypes: { + account_local_input: { + name: string | null + account_local_id: string | null + email: string | null + email_trusted: boolean | null + space_editor: boolean | null + } concept_local_input: { epistemic_status: Database["public"]["Enums"]["EpistemicStatus"] | null name: string | null @@ -786,10 +804,10 @@ export type Database = { | Database["public"]["CompositeTypes"]["document_local_input"] | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null creator_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null embedding_inline: | Database["public"]["CompositeTypes"]["inline_embedding_input"] @@ -808,7 +826,7 @@ export type Database = { author_local_id: string | null space_url: string | null author_inline: - | Database["public"]["Tables"]["PlatformAccount"]["Row"] + | Database["public"]["CompositeTypes"]["account_local_input"] | null } inline_embedding_input: { diff --git a/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql new file mode 100644 index 000000000..397481c0b --- /dev/null +++ b/packages/database/supabase/migrations/20250823135620_bulk_account_upsert.sql @@ -0,0 +1,351 @@ +CREATE TYPE public.account_local_input AS ( +-- PlatformAccount columns +name VARCHAR, +account_local_id VARCHAR, +-- local values +email VARCHAR, +email_trusted BOOLEAN, +space_editor BOOLEAN +) ; + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( + space_id_ BIGINT, + local_account public.account_local_input +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + INSERT INTO public."PlatformAccount" AS pa ( + account_local_id, name, platform + ) VALUES ( + local_account.account_local_id, local_account.name, platform_ + ) ON CONFLICT (account_local_id, platform) DO UPDATE SET + name = coalesce(local_account.name, pa.name) + RETURNING id INTO STRICT account_id_; + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) + ON CONFLICT (space_id, account_id) + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); + END IF; + RETURN account_id_; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id_, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space ( +space_id_ BIGINT, +account_local_id_ varchar, +name_ varchar, +email_ varchar = null, +email_trusted boolean = true, +editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$ ; + +ALTER TYPE public.document_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; + +ALTER TYPE public.content_local_input ALTER ATTRIBUTE author_inline TYPE public.account_local_input ; +ALTER TYPE public.content_local_input ALTER ATTRIBUTE creator_inline TYPE public.account_local_input ; + +CREATE OR REPLACE FUNCTION public._local_document_to_db_document(data public.document_local_input) +RETURNS public."Document" LANGUAGE plpgsql STABLE +SET search_path = '' +AS $$ +DECLARE + document public."Document"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + document := jsonb_populate_record(NULL::public."Document", to_jsonb(data)); + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO document.space_id; + END IF; + -- now avoid null defaults + IF document.metadata IS NULL then + document.metadata := '{}'; + END IF; + RETURN document; +END; +$$; + +CREATE OR REPLACE FUNCTION public._local_content_to_db_content(data public.content_local_input) +RETURNS public."Content" STABLE +SET search_path = '' +LANGUAGE plpgsql AS $$ +DECLARE + content public."Content"%ROWTYPE; + reference_content JSONB := jsonb_build_object(); + key varchar; + value JSONB; + ref_single_val BIGINT; + ref_array_val BIGINT[]; +BEGIN + content := jsonb_populate_record(NULL::public."Content", to_jsonb(data)); + IF data.document_local_id IS NOT NULL THEN + SELECT id FROM public."Document" + WHERE source_local_id = data.document_local_id INTO content.document_id; + END IF; + IF data.creator_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; + END IF; + IF data.author_local_id IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; + END IF; + IF data.part_of_local_id IS NOT NULL THEN + SELECT id FROM public."Content" + WHERE source_local_id = data.part_of_local_id INTO content.part_of_id; + END IF; + IF data.space_url IS NOT NULL THEN + SELECT id FROM public."Space" + WHERE url = data.space_url INTO content.space_id; + END IF; + -- now avoid null defaults + IF content.metadata IS NULL then + content.metadata := '{}'; + END IF; + RETURN content; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_documents(v_space_id bigint, data jsonb) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + local_document public.document_local_input; + db_document public."Document"%ROWTYPE; + document_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR document_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_document := jsonb_populate_record(NULL::public.document_local_input, document_row); + local_document.space_id := v_space_id; + IF account_local_id(author_inline(local_document)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_document)), + name(author_inline(local_document)) + ) INTO STRICT upsert_id; + local_document.author_id := upsert_id; + END IF; + db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + db_document.space_id, + db_document.source_local_id, + db_document.url, + db_document.created, + db_document.metadata, + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + created = COALESCE(db_document.created, EXCLUDED.created), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + url = COALESCE(db_document.url, EXCLUDED.url), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata) + RETURNING id INTO STRICT upsert_id; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; + +CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) +RETURNS SETOF BIGINT +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + db_document public."Document"%ROWTYPE; + document_id BIGINT; + local_content public.content_local_input; + db_content public."Content"%ROWTYPE; + content_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR content_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_content := jsonb_populate_record(NULL::public.content_local_input, content_row); + local_content.space_id := v_space_id; + db_content := public._local_content_to_db_content(local_content); + IF account_local_id(author_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_content)), + name(author_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.author_id := upsert_id; + END IF; + IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(creator_inline(local_content)), + name(creator_inline(local_content)) + ) INTO STRICT upsert_id; + db_content.creator_id := upsert_id; + END IF; + IF content_as_document THEN + db_content.scale = 'document'; + END IF; + IF content_as_document AND document_id(db_content) IS NULL AND source_local_id(document_inline(local_content)) IS NULL THEN + local_content.document_inline.space_id := v_space_id; + local_content.document_inline.source_local_id := db_content.source_local_id; + local_content.document_inline.last_modified := db_content.last_modified; + local_content.document_inline.created := db_content.created; + local_content.document_inline.author_id := db_content.author_id; + END IF; + IF source_local_id(document_inline(local_content)) IS NOT NULL THEN + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + COALESCE(db_document.space_id, v_space_id), + db_document.source_local_id, + db_document.url, + db_document.created, + COALESCE(db_document.metadata, '{}'::jsonb), + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + url = COALESCE(db_document.url, EXCLUDED.url), + created = COALESCE(db_document.created, EXCLUDED.created), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + contents = COALESCE(db_document.contents, EXCLUDED.contents) + RETURNING id INTO STRICT document_id; + db_content.document_id := document_id; + END IF; + INSERT INTO public."Content" ( + document_id, + source_local_id, + variant, + author_id, + creator_id, + created, + text, + metadata, + scale, + space_id, + last_modified, + part_of_id + ) VALUES ( + db_content.document_id, + db_content.source_local_id, + COALESCE(db_content.variant, 'direct'::public."ContentVariant"), + db_content.author_id, + db_content.creator_id, + db_content.created, + db_content.text, + COALESCE(db_content.metadata, '{}'::jsonb), + db_content.scale, + db_content.space_id, + db_content.last_modified, + db_content.part_of_id + ) + ON CONFLICT (space_id, source_local_id, variant) DO UPDATE SET + document_id = COALESCE(db_content.document_id, EXCLUDED.document_id), + author_id = COALESCE(db_content.author_id, EXCLUDED.author_id), + creator_id = COALESCE(db_content.creator_id, EXCLUDED.creator_id), + created = COALESCE(db_content.created, EXCLUDED.created), + text = COALESCE(db_content.text, EXCLUDED.text), + metadata = COALESCE(db_content.metadata, EXCLUDED.metadata), + scale = COALESCE(db_content.scale, EXCLUDED.scale), + last_modified = COALESCE(db_content.last_modified, EXCLUDED.last_modified), + part_of_id = COALESCE(db_content.part_of_id, EXCLUDED.part_of_id) + RETURNING id INTO STRICT upsert_id; + IF model(embedding_inline(local_content)) IS NOT NULL THEN + PERFORM public.upsert_content_embedding(upsert_id, model(embedding_inline(local_content)), vector(embedding_inline(local_content))); + END IF; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql index d06f765ce..5ad9ef2bd 100644 --- a/packages/database/supabase/schemas/account.sql +++ b/packages/database/supabase/schemas/account.sql @@ -98,13 +98,19 @@ GRANT ALL ON TABLE public."SpaceAccess" TO anon; GRANT ALL ON TABLE public."SpaceAccess" TO authenticated; GRANT ALL ON TABLE public."SpaceAccess" TO service_role; -CREATE OR REPLACE FUNCTION public.create_account_in_space( +CREATE TYPE public.account_local_input AS ( + -- PlatformAccount columns + name VARCHAR, + account_local_id VARCHAR, + -- local values + email VARCHAR, + email_trusted BOOLEAN, + space_editor BOOLEAN +); + +CREATE OR REPLACE FUNCTION public.upsert_account_in_space( space_id_ BIGINT, - account_local_id_ varchar, - name_ varchar, - email_ varchar = null, - email_trusted boolean = true, - editor_ boolean = true + local_account public.account_local_input ) RETURNS BIGINT SECURITY DEFINER SET search_path = '' @@ -114,26 +120,67 @@ DECLARE platform_ public."Platform"; account_id_ BIGINT; BEGIN - SELECT platform INTO platform_ STRICT FROM public."Space" WHERE id = space_id_; + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; INSERT INTO public."PlatformAccount" AS pa ( account_local_id, name, platform ) VALUES ( - account_local_id_, name_, platform_ + local_account.account_local_id, local_account.name, platform_ ) ON CONFLICT (account_local_id, platform) DO UPDATE SET - name = coalesce(name_, pa.name) + name = coalesce(local_account.name, pa.name) RETURNING id INTO STRICT account_id_; - INSERT INTO public."SpaceAccess" (space_id, account_id, editor) values (space_id_, account_id_, editor_) + INSERT INTO public."SpaceAccess" as sa (space_id, account_id, editor) values (space_id_, account_id_, COALESCE(local_account.space_editor, true)) ON CONFLICT (space_id, account_id) - DO UPDATE SET editor = editor_; - IF email_ IS NOT NULL THEN - INSERT INTO public."AgentIdentifier" (account_id, value, identifier_type, trusted) VALUES (account_id_, email_, 'email', email_trusted) - ON CONFLICT (value, identifier_type, account_id) - DO UPDATE SET trusted = email_trusted; + DO UPDATE SET editor = COALESCE(local_account.space_editor, sa.editor, true); + IF local_account.email IS NOT NULL THEN + -- TODO: how to distinguish basic untrusted from platform placeholder email? + INSERT INTO public."AgentIdentifier" as ai (account_id, value, identifier_type, trusted) VALUES (account_id_, local_account.email, 'email', COALESCE(local_account.email_trusted, false)) + ON CONFLICT (value, identifier_type, account_id) + DO UPDATE SET trusted = COALESCE(local_account.email_trusted, ai.trusted, false); END IF; RETURN account_id_; END; $$; +CREATE OR REPLACE FUNCTION public.upsert_accounts_in_space ( +space_id_ BIGINT, +accounts JSONB +) RETURNS SETOF BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE plpgsql +AS $$ +DECLARE + platform_ public."Platform"; + account_id_ BIGINT; + account_row JSONB; + local_account public.account_local_input; +BEGIN + SELECT platform INTO STRICT platform_ FROM public."Space" WHERE id = space_id_; + FOR account_row IN SELECT * FROM jsonb_array_elements(accounts) + LOOP + local_account := jsonb_populate_record(NULL::public.account_local_input, account_row); + RETURN NEXT public.upsert_account_in_space(space_id_, local_account); + END LOOP; +END; +$$; + +-- legacy +CREATE OR REPLACE FUNCTION public.create_account_in_space( + space_id_ BIGINT, + account_local_id_ varchar, + name_ varchar, + email_ varchar = null, + email_trusted boolean = true, + editor_ boolean = true +) RETURNS BIGINT +SECURITY DEFINER +SET search_path = '' +LANGUAGE sql +AS $$ + SELECT public.upsert_account_in_space(space_id_, ROW(name_, account_local_id_ ,email_, email_trusted, editor_)::public.account_local_input); +$$; + + CREATE OR REPLACE FUNCTION public.my_account(account_id BIGINT) RETURNS boolean STABLE SECURITY DEFINER SET search_path = '' diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index cde97f9cc..8bc6382c4 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -175,7 +175,7 @@ CREATE TYPE public.document_local_input AS ( author_local_id character varying, space_url character varying, -- inline values - author_inline public."PlatformAccount" + author_inline public.account_local_input ); CREATE TYPE public.inline_embedding_input AS ( @@ -187,7 +187,6 @@ CREATE TYPE public.content_local_input AS ( -- content columns document_id bigint, source_local_id character varying, - variant public."ContentVariant", author_id bigint, creator_id bigint, created timestamp without time zone, @@ -205,9 +204,10 @@ CREATE TYPE public.content_local_input AS ( space_url character varying, -- inline values document_inline public.document_local_input, - author_inline public."PlatformAccount", - creator_inline public."PlatformAccount", - embedding_inline public.inline_embedding_input + author_inline public.account_local_input, + creator_inline public.account_local_input, + embedding_inline public.inline_embedding_input, + variant public."ContentVariant" ); @@ -228,6 +228,9 @@ BEGIN IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO document.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO document.author_id; END IF; IF data.space_url IS NOT NULL THEN SELECT id FROM public."Space" @@ -264,10 +267,16 @@ BEGIN IF data.creator_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.creator_local_id INTO content.creator_id; + ELSIF account_local_id(creator_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(creator_inline(data)) INTO content.creator_id; END IF; IF data.author_local_id IS NOT NULL THEN SELECT id FROM public."PlatformAccount" WHERE account_local_id = data.author_local_id INTO content.author_id; + ELSIF account_local_id(author_inline(data)) IS NOT NULL THEN + SELECT id FROM public."PlatformAccount" + WHERE account_local_id = account_local_id(author_inline(data)) INTO content.author_id; END IF; IF data.part_of_local_id IS NOT NULL THEN SELECT id FROM public."Content" @@ -315,6 +324,9 @@ BEGIN local_document.author_id := upsert_id; END IF; db_document := public._local_document_to_db_document(local_document); + IF (db_document.author_id IS NULL AND author_inline(local_document) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_document)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, @@ -374,6 +386,7 @@ COMMENT ON FUNCTION public.upsert_content_embedding IS 'single content embedding -- This may trigger creation of PlatformAccounts and Documents appropriately. CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) RETURNS SETOF BIGINT +SET search_path = '' LANGUAGE plpgsql AS $$ DECLARE @@ -397,7 +410,7 @@ BEGIN account_local_id(author_inline(local_content)), name(author_inline(local_content)) ) INTO STRICT upsert_id; - local_content.author_id := upsert_id; + db_content.author_id := upsert_id; END IF; IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN SELECT public.create_account_in_space( @@ -405,7 +418,7 @@ BEGIN account_local_id(creator_inline(local_content)), name(creator_inline(local_content)) ) INTO STRICT upsert_id; - local_content.creator_id := upsert_id; + db_content.creator_id := upsert_id; END IF; IF content_as_document THEN db_content.scale = 'document'; @@ -418,7 +431,10 @@ BEGIN local_content.document_inline.author_id := db_content.author_id; END IF; IF source_local_id(document_inline(local_content)) IS NOT NULL THEN - db_document := _local_document_to_db_document(document_inline(local_content)); + db_document := public._local_document_to_db_document(document_inline(local_content)); + IF (db_document.author_id IS NULL AND author_inline(local_content) IS NOT NULL) THEN + db_document.author_id := upsert_account_in_space(v_space_id, author_inline(local_content)); + END IF; INSERT INTO public."Document" ( space_id, source_local_id, From a0ea409573c0fab135d14c1137527ef7bd6d3b70 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 24 Aug 2025 14:24:15 +0530 Subject: [PATCH 12/15] bulk upsert accounts, use database function imports --- apps/roam/src/utils/cleanupOrphanedNodes.ts | 2 +- apps/roam/src/utils/supabaseContext.ts | 1 - apps/roam/src/utils/syncDgNodesToSupabase.ts | 46 +++++++++++++++++-- .../upsertNodesAsContentWithEmbeddings.ts | 16 ++----- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/apps/roam/src/utils/cleanupOrphanedNodes.ts b/apps/roam/src/utils/cleanupOrphanedNodes.ts index 9bb850f26..741313dc2 100644 --- a/apps/roam/src/utils/cleanupOrphanedNodes.ts +++ b/apps/roam/src/utils/cleanupOrphanedNodes.ts @@ -1,6 +1,6 @@ import { type SupabaseContext } from "./supabaseContext"; import { type SupabaseClient } from "@supabase/supabase-js"; -import { type Database } from "@repo/database/types.gen"; +import { type Database } from "@repo/database/dbTypes"; type DGSupabaseClient = SupabaseClient; diff --git a/apps/roam/src/utils/supabaseContext.ts b/apps/roam/src/utils/supabaseContext.ts index f14e52d36..0341d2ebc 100644 --- a/apps/roam/src/utils/supabaseContext.ts +++ b/apps/roam/src/utils/supabaseContext.ts @@ -2,7 +2,6 @@ import getCurrentUserEmail from "roamjs-components/queries/getCurrentUserEmail"; import getCurrentUserDisplayName from "roamjs-components/queries/getCurrentUserDisplayName"; import getPageUidByPageTitle from "roamjs-components/queries/getPageUidByPageTitle"; import getRoamUrl from "roamjs-components/dom/getRoamUrl"; - import type { Enums } from "@repo/database/dbTypes"; import { DISCOURSE_CONFIG_PAGE_TITLE } from "~/utils/renderNodeConfigPage"; import getBlockProps from "~/utils/getBlockProps"; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index 07b767efb..a6ded9fbc 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -18,11 +18,14 @@ import { orderConceptsByDependency, } from "./conceptConversion"; import { OnloadArgs } from "roamjs-components/types"; -import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; +import { DGSupabaseClient } from "@repo/database/lib/client"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; -import { Json } from "@repo/database/types.gen"; +import { Database, Json } from "@repo/database/dbTypes"; import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; +type AccountLocalInput = + Database["public"]["CompositeTypes"]["account_local_input"]; + const SYNC_FUNCTION = "embedding"; const SYNC_INTERVAL = "45s"; const SYNC_TIMEOUT = "20s"; @@ -166,7 +169,6 @@ const upsertNodeSchemaToContent = async ({ const contentData: LocalContentDataInput[] = convertRoamNodeToLocalContent({ nodes: result, - userId, }); const { error } = await supabaseClient.rpc("upsert_content", { data: contentData as Json, @@ -241,7 +243,6 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( } const allNodeInstancesAsLocalContent = convertRoamNodeToLocalContent({ nodes: roamNodes, - userId: context.userId, }); let nodesWithEmbeddings: LocalContentDataInput[]; @@ -310,6 +311,41 @@ const getDgNodeTypes = (extensionAPI: OnloadArgs["extensionAPI"]) => { return { allDgNodeTypes, dgNodeTypesWithSettings }; }; +const getAllUsers = async (): Promise => { + const query = `[:find ?author_local_id ?author_name + :keys author_local_id name + :where + [?user-eid :user/uid ?author_local_id] + [(get-else $ ?user-eid :user/display-name "") ?author_name] +]`; + //@ts-ignore - backend to be added to roamjs-components + const result = (await window.roamAlphaAPI.data.async.q(query)) as unknown as { + author_local_id: string; + name: string; + }[]; + return result.map((user) => ({ + account_local_id: user.author_local_id, + name: user.name, + email: null, + email_trusted: null, + space_editor: null, + })); +}; + +const upsertUsers = async ( + users: AccountLocalInput[], + supabaseClient: DGSupabaseClient, + context: SupabaseContext, +) => { + const { error } = await supabaseClient.rpc("upsert_accounts_in_space", { + accounts: users, + space_id_: context.spaceId, + }); + if (error) { + console.error("upsert_accounts_in_space failed:", error); + } +}; + export const createOrUpdateDiscourseEmbedding = async ( extensionAPI: OnloadArgs["extensionAPI"], ) => { @@ -323,6 +359,7 @@ export const createOrUpdateDiscourseEmbedding = async ( } try { + const allUsers = await getAllUsers(); const time = lastUpdateTime === null ? DEFAULT_TIME : lastUpdateTime; const { allDgNodeTypes, dgNodeTypesWithSettings } = getDgNodeTypes(extensionAPI); @@ -339,6 +376,7 @@ export const createOrUpdateDiscourseEmbedding = async ( await endSyncTask(worker, "failed"); return; } + await upsertUsers(allUsers, supabaseClient, context); await upsertNodesToSupabaseAsContentWithEmbeddings( allNodeInstances, supabaseClient, diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index dbebc9572..abee954a8 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -2,9 +2,9 @@ import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import { SupabaseContext } from "./supabaseContext"; import { LocalContentDataInput } from "@repo/database/inputTypes"; -import { DGSupabaseClient } from "@repo/ui/lib/supabase/client"; -import { Json } from "@repo/database/types.gen"; -import { nextApiRoot } from "@repo/ui/lib/execContext"; +import { DGSupabaseClient } from "@repo/database/lib/client"; +import { Json } from "@repo/database/dbTypes"; +import { nextApiRoot } from "@repo/utils/execContext"; const EMBEDDING_BATCH_SIZE = 200; const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536"; @@ -17,10 +17,8 @@ type EmbeddingApiResponse = { export const convertRoamNodeToLocalContent = ({ nodes, - userId, }: { nodes: RoamDiscourseNodeData[]; - userId: number; }): LocalContentDataInput[] => { return nodes.map((node) => { const variant = node.node_title ? "direct_and_description" : "direct"; @@ -28,7 +26,6 @@ export const convertRoamNodeToLocalContent = ({ ? `${node.node_title} ${node.text}` : node.text; return { - author_id: userId, author_local_id: node.author_local_id, source_local_id: node.source_local_id, created: new Date(node.created || Date.now()).toISOString(), @@ -36,12 +33,6 @@ export const convertRoamNodeToLocalContent = ({ text: text, variant: variant, scale: "document", - document_inline: { - source_local_id: node.source_local_id, - created: new Date(node.created || Date.now()).toISOString(), - last_modified: new Date(node.last_modified || Date.now()).toISOString(), - author_local_id: node.author_local_id, - }, }; }); }; @@ -139,7 +130,6 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( } const localContentNodes = convertRoamNodeToLocalContent({ nodes: roamNodes, - userId: context.userId, }); let nodesWithEmbeddings: LocalContentDataInput[]; From 29e815113528984bd84d6bd62dd3eb9c693dcef3 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 24 Aug 2025 14:43:19 +0530 Subject: [PATCH 13/15] add comment for future --- apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index abee954a8..74e28f858 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -51,6 +51,11 @@ export const fetchEmbeddingsForNodes = async ( body: JSON.stringify({ input: batch }), }); + // TODO: Future: https://github.com/DiscourseGraphs/discourse-graph/pull/343#discussion_r2285566007 + //At some point there were a lot of transient errors with openAI, and retrying was expected. Do you know if this is still the case? + // One case where I know this would still be true is if we ever run into request throttling, in which case we probably want incremental backoff. + // I know we're far from that much usage, but that will become an issue with more adopters. Punting that should at least be a conscious decision. + if (!response.ok) { let errorData; try { From a11142a7e8a9ce8012992168a9c4dfff79d4ea88 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 24 Aug 2025 20:27:30 +0530 Subject: [PATCH 14/15] use better import method --- apps/roam/src/utils/getAllDiscourseNodesSince.ts | 4 ++-- apps/roam/src/utils/syncDgNodesToSupabase.ts | 14 +++++++------- .../utils/upsertNodesAsContentWithEmbeddings.ts | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/apps/roam/src/utils/getAllDiscourseNodesSince.ts b/apps/roam/src/utils/getAllDiscourseNodesSince.ts index bb72063a8..929d34e68 100644 --- a/apps/roam/src/utils/getAllDiscourseNodesSince.ts +++ b/apps/roam/src/utils/getAllDiscourseNodesSince.ts @@ -1,7 +1,7 @@ /* eslint-disable @typescript-eslint/naming-convention */ -import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; +import getDiscourseNodes, { type DiscourseNode } from "./getDiscourseNodes"; import findDiscourseNode from "./findDiscourseNode"; -import { OnloadArgs } from "roamjs-components/types"; +import { type OnloadArgs } from "roamjs-components/types"; import getDiscourseNodeFormatExpression from "./getDiscourseNodeFormatExpression"; type ISODateString = string; diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index a6ded9fbc..a94ab962f 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -7,20 +7,20 @@ import { cleanupOrphanedNodes } from "./cleanupOrphanedNodes"; import { getLoggedInClient, getSupabaseContext, - SupabaseContext, + type SupabaseContext, } from "./supabaseContext"; -import { LocalContentDataInput } from "@repo/database/inputTypes"; -import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; -import getDiscourseNodes, { DiscourseNode } from "./getDiscourseNodes"; +import { type LocalContentDataInput } from "@repo/database/inputTypes"; +import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; +import getDiscourseNodes, { type DiscourseNode } from "./getDiscourseNodes"; import { discourseNodeBlockToLocalConcept, discourseNodeSchemaToLocalConcept, orderConceptsByDependency, } from "./conceptConversion"; -import { OnloadArgs } from "roamjs-components/types"; -import { DGSupabaseClient } from "@repo/database/lib/client"; +import { type OnloadArgs } from "roamjs-components/types"; +import { type DGSupabaseClient } from "@repo/database/lib/client"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; -import { Database, Json } from "@repo/database/dbTypes"; +import { type Database, type Json } from "@repo/database/dbTypes"; import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; type AccountLocalInput = diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index 74e28f858..53f61ab17 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -1,9 +1,9 @@ /* eslint-disable @typescript-eslint/naming-convention */ -import { RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; -import { SupabaseContext } from "./supabaseContext"; -import { LocalContentDataInput } from "@repo/database/inputTypes"; -import { DGSupabaseClient } from "@repo/database/lib/client"; -import { Json } from "@repo/database/dbTypes"; +import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; +import { type SupabaseContext } from "./supabaseContext"; +import { type LocalContentDataInput } from "@repo/database/inputTypes"; +import { type DGSupabaseClient } from "@repo/database/lib/client"; +import { type Json } from "@repo/database/dbTypes"; import { nextApiRoot } from "@repo/utils/execContext"; const EMBEDDING_BATCH_SIZE = 200; From 7d93ceef06796570b0499c94c6e0e83ac85f9f34 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 24 Aug 2025 22:16:01 +0530 Subject: [PATCH 15/15] commonjs-to-esm --- apps/roam/src/utils/cleanupOrphanedNodes.ts | 47 +++++++++++-------- apps/roam/src/utils/syncDgNodesToSupabase.ts | 11 ++--- .../upsertNodesAsContentWithEmbeddings.ts | 9 ++-- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/apps/roam/src/utils/cleanupOrphanedNodes.ts b/apps/roam/src/utils/cleanupOrphanedNodes.ts index 741313dc2..49f70684b 100644 --- a/apps/roam/src/utils/cleanupOrphanedNodes.ts +++ b/apps/roam/src/utils/cleanupOrphanedNodes.ts @@ -1,15 +1,12 @@ import { type SupabaseContext } from "./supabaseContext"; -import { type SupabaseClient } from "@supabase/supabase-js"; -import { type Database } from "@repo/database/dbTypes"; - -type DGSupabaseClient = SupabaseClient; +// https://linear.app/discourse-graphs/issue/ENG-766/upgrade-all-commonjs-to-esm +type DGSupabaseClient = any; const getAllNodesFromSupabase = async ( supabaseClient: DGSupabaseClient, spaceId: number, ): Promise => { try { - const { data: schemas, error: schemasError } = await supabaseClient .from("Concept") .select("id") @@ -25,7 +22,7 @@ const getAllNodesFromSupabase = async ( return []; } - const schemaIds = schemas.map((s) => s.id); + const schemaIds = schemas.map((s: { id: string }) => s.id); let nodeResult: string[] = []; if (schemaIds.length > 0) { @@ -52,8 +49,11 @@ const getAllNodesFromSupabase = async ( } nodeResult = conceptResponse.data - ?.map((c) => c.Content?.source_local_id) - .filter((id): id is string => !!id) || []; + ?.map( + (c: { Content?: { source_local_id: string } }) => + c.Content?.source_local_id, + ) + .filter((id: string): id is string => !!id) || []; } const blockContentResponse = await supabaseClient @@ -73,8 +73,8 @@ const getAllNodesFromSupabase = async ( const blockResult = blockContentResponse.data - ?.map((c) => c.source_local_id) - .filter((id): id is string => !!id) || []; + ?.map((c: { source_local_id: string }) => c.source_local_id) + .filter((id: string): id is string => !!id) || []; const result = [...new Set([...nodeResult, ...blockResult])]; @@ -114,8 +114,11 @@ const getAllNodeSchemasFromSupabase = async ( return ( data - ?.map((c) => c.Content?.source_local_id) - .filter((id): id is string => !!id) || [] + ?.map( + (c: { Content?: { source_local_id: string } }) => + c.Content?.source_local_id, + ) + .filter((id: string): id is string => !!id) || [] ); } catch (error) { console.error("Error in getAllNodeSchemasFromSupabase:", error); @@ -159,7 +162,7 @@ const deleteNodesFromSupabase = async ( console.error("Failed to get content from Supabase:", contentError); } - const contentIds = contentData?.map((c) => c.id) || []; + const contentIds = contentData?.map((c: { id: string }) => c.id) || []; if (contentIds.length > 0) { const { error: conceptError } = await supabaseClient @@ -216,7 +219,7 @@ const deleteNodeSchemasFromSupabase = async ( return 0; } - const schemaContentIds = schemaContentData.map((c) => c.id); + const schemaContentIds = schemaContentData.map((c: { id: string }) => c.id); const { data: schemaConceptData, error: schemaConceptError } = await supabaseClient @@ -234,7 +237,9 @@ const deleteNodeSchemasFromSupabase = async ( return 0; } - const schemaConceptIds = (schemaConceptData || []).map((c) => c.id); + const schemaConceptIds = (schemaConceptData || []).map( + (c: { id: string }) => c.id, + ); let instanceConceptIds: number[] = []; let instanceContentIds: number[] = []; @@ -257,10 +262,12 @@ const deleteNodeSchemasFromSupabase = async ( return 0; } - instanceConceptIds = (instanceConceptData || []).map((ic) => ic.id); + instanceConceptIds = (instanceConceptData || []).map( + (ic: { id: string }) => ic.id, + ); instanceContentIds = (instanceConceptData || []) - .map((ic) => ic.represented_by_id) - .filter((x): x is number => typeof x === "number"); + .map((ic: { represented_by_id: number }) => ic.represented_by_id) + .filter((x: number): x is number => typeof x === "number"); if (instanceContentIds.length > 0) { const { data: instanceContentData, error: instanceContentLookupError } = @@ -277,8 +284,8 @@ const deleteNodeSchemasFromSupabase = async ( return 0; } instanceSourceLocalIds = (instanceContentData || []) - .map((c) => c.source_local_id) - .filter((id): id is string => !!id); + .map((c: { source_local_id: string }) => c.source_local_id) + .filter((id: string): id is string => !!id); } } diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index a94ab962f..61af1d921 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -9,7 +9,6 @@ import { getSupabaseContext, type SupabaseContext, } from "./supabaseContext"; -import { type LocalContentDataInput } from "@repo/database/inputTypes"; import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import getDiscourseNodes, { type DiscourseNode } from "./getDiscourseNodes"; import { @@ -18,13 +17,13 @@ import { orderConceptsByDependency, } from "./conceptConversion"; import { type OnloadArgs } from "roamjs-components/types"; -import { type DGSupabaseClient } from "@repo/database/lib/client"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; -import { type Database, type Json } from "@repo/database/dbTypes"; import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; - -type AccountLocalInput = - Database["public"]["CompositeTypes"]["account_local_input"]; +// https://linear.app/discourse-graphs/issue/ENG-766/upgrade-all-commonjs-to-esm +type LocalContentDataInput = any; +type DGSupabaseClient = any; +type Json = any; +type AccountLocalInput = any; const SYNC_FUNCTION = "embedding"; const SYNC_INTERVAL = "45s"; diff --git a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts index 53f61ab17..8f6509c9e 100644 --- a/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts +++ b/apps/roam/src/utils/upsertNodesAsContentWithEmbeddings.ts @@ -1,10 +1,11 @@ /* eslint-disable @typescript-eslint/naming-convention */ import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import { type SupabaseContext } from "./supabaseContext"; -import { type LocalContentDataInput } from "@repo/database/inputTypes"; -import { type DGSupabaseClient } from "@repo/database/lib/client"; -import { type Json } from "@repo/database/dbTypes"; -import { nextApiRoot } from "@repo/utils/execContext"; +// https://linear.app/discourse-graphs/issue/ENG-766/upgrade-all-commonjs-to-esm +const { nextApiRoot } = require("@repo/utils/execContext"); +type LocalContentDataInput = any; +type DGSupabaseClient = any; +type Json = any; const EMBEDDING_BATCH_SIZE = 200; const EMBEDDING_MODEL = "openai_text_embedding_3_small_1536";