diff --git a/packages/database/schema.yaml b/packages/database/schema.yaml index 114d914be..9526ef1b7 100644 --- a/packages/database/schema.yaml +++ b/packages/database/schema.yaml @@ -22,6 +22,12 @@ enums: quote: sentence: phrase: + ContentVariant: + description: Is the text taken as-is, or is it a computed variant? + permissible_values: + direct: + direct_and_children: + direct_and_description: Validation: description: Whether a given value was given by a person, or suggested by an automated agent (and then possibly infirmed.) permissible_values: @@ -139,7 +145,7 @@ classes: active: range: boolean required: true - ifabsent: 'true' + ifabsent: "true" agent_type: required: true range: AgentType @@ -147,7 +153,7 @@ classes: metadata: range: JSON description: Additional platform-specific information about the account - ifabsent: '{}' + ifabsent: "{}" dg_account: range: Users unique_keys: @@ -298,7 +304,7 @@ classes: obsolete: description: Whether this embedding is obsolete (becauses the Content was modified) range: boolean - ifabsent: 'false' + ifabsent: "false" ContentEmbedding_openai_text_embedding_3_small_1536: is_a: ContentEmbedding description: The table for the openai text_embedding_3_small model (1536 dimensions) @@ -318,7 +324,7 @@ classes: arity: range: integer required: true - ifabsent: '0' + ifabsent: "0" description: The number of roles in this relation; nodes have zero, binary relations have 2, etc. schema: range: ConceptSchema @@ -327,16 +333,16 @@ classes: description: "Aspects of the concept that reference other concepts. `{[key: string]: number|number[]}`" range: JSON required: true - ifabsent: '{}' + ifabsent: "{}" literal_content: range: JSON required: true description: "Aspects of the concept that have literal values. `{[key: string]: any}`" - ifabsent: '{}' + ifabsent: "{}" is_schema: range: boolean required: true - ifabsent: 'false' + ifabsent: "false" represented_by: description: This concept is explicitly represented by a given content unit range: Content @@ -445,13 +451,13 @@ slots: position: description: The ordinal position of the content within its parent, wrt other content units of the same scale range: integer - ifabsent: '0' + ifabsent: "0" required: true char_position: description: The character position of the content within its parent. # Does not apply to outline sub-elements range: integer - ifabsent: '0' + ifabsent: "0" validation: range: Validation required: true diff --git a/packages/database/supabase/migrations/20250718131540_content_variant.sql b/packages/database/supabase/migrations/20250718131540_content_variant.sql new file mode 100644 index 000000000..2bb2f294e --- /dev/null +++ b/packages/database/supabase/migrations/20250718131540_content_variant.sql @@ -0,0 +1,137 @@ +CREATE TYPE public."ContentVariant" AS ENUM ( + 'direct', + 'direct_and_children', + 'direct_and_description' +); + +ALTER TYPE public."ContentVariant" OWNER TO postgres; + +ALTER TABLE public."Content" ADD COLUMN variant public."ContentVariant" NOT NULL DEFAULT 'direct'; + +DROP INDEX IF EXISTS public."content_space_and_local_id_idx"; + +CREATE UNIQUE INDEX content_space_local_id_variant_idx ON public."Content" USING btree (space_id, source_local_id, variant); + +ALTER TYPE public.content_local_input ADD ATTRIBUTE variant public."ContentVariant"; + +CREATE OR REPLACE FUNCTION public.upsert_content(v_space_id bigint, data jsonb, v_creator_id BIGINT, content_as_document boolean DEFAULT true) +RETURNS SETOF BIGINT +LANGUAGE plpgsql +AS $$ +DECLARE + v_platform public."Platform"; + db_document public."Document"%ROWTYPE; + document_id BIGINT; + local_content public.content_local_input; + db_content public."Content"%ROWTYPE; + content_row JSONB; + upsert_id BIGINT; +BEGIN + SELECT platform INTO STRICT v_platform FROM public."Space" WHERE id=v_space_id; + FOR content_row IN SELECT * FROM jsonb_array_elements(data) + LOOP + local_content := jsonb_populate_record(NULL::public.content_local_input, content_row); + local_content.space_id := v_space_id; + db_content := public._local_content_to_db_content(local_content); + IF account_local_id(author_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(author_inline(local_content)), + name(author_inline(local_content)) + ) INTO STRICT upsert_id; + local_content.author_id := upsert_id; + END IF; + IF account_local_id(creator_inline(local_content)) IS NOT NULL THEN + SELECT public.create_account_in_space( + v_space_id, + account_local_id(creator_inline(local_content)), + name(creator_inline(local_content)) + ) INTO STRICT upsert_id; + local_content.creator_id := upsert_id; + END IF; + IF content_as_document THEN + db_content.scale = 'document'; + END IF; + IF content_as_document AND document_id(db_content) IS NULL AND source_local_id(document_inline(local_content)) IS NULL THEN + local_content.document_inline.space_id := v_space_id; + local_content.document_inline.source_local_id := db_content.source_local_id; + local_content.document_inline.last_modified := db_content.last_modified; + local_content.document_inline.created := db_content.created; + local_content.document_inline.author_id := db_content.author_id; + END IF; + IF source_local_id(document_inline(local_content)) IS NOT NULL THEN + db_document := _local_document_to_db_document(document_inline(local_content)); + INSERT INTO public."Document" ( + space_id, + source_local_id, + url, + created, + metadata, + last_modified, + author_id, + contents + ) VALUES ( + COALESCE(db_document.space_id, v_space_id), + db_document.source_local_id, + db_document.url, + db_document.created, + COALESCE(db_document.metadata, '{}'::jsonb), + db_document.last_modified, + db_document.author_id, + db_document.contents + ) + ON CONFLICT (space_id, source_local_id) DO UPDATE SET + url = COALESCE(db_document.url, EXCLUDED.url), + created = COALESCE(db_document.created, EXCLUDED.created), + metadata = COALESCE(db_document.metadata, EXCLUDED.metadata), + last_modified = COALESCE(db_document.last_modified, EXCLUDED.last_modified), + author_id = COALESCE(db_document.author_id, EXCLUDED.author_id), + contents = COALESCE(db_document.contents, EXCLUDED.contents) + RETURNING id INTO STRICT document_id; + db_content.document_id := document_id; + END IF; + INSERT INTO public."Content" ( + document_id, + source_local_id, + variant, + author_id, + creator_id, + created, + text, + metadata, + scale, + space_id, + last_modified, + part_of_id + ) VALUES ( + db_content.document_id, + db_content.source_local_id, + COALESCE(db_content.variant, 'direct'::public."ContentVariant"), + db_content.author_id, + db_content.creator_id, + db_content.created, + db_content.text, + COALESCE(db_content.metadata, '{}'::jsonb), + db_content.scale, + db_content.space_id, + db_content.last_modified, + db_content.part_of_id + ) + ON CONFLICT (space_id, source_local_id, variant) DO UPDATE SET + document_id = COALESCE(db_content.document_id, EXCLUDED.document_id), + author_id = COALESCE(db_content.author_id, EXCLUDED.author_id), + creator_id = COALESCE(db_content.creator_id, EXCLUDED.creator_id), + created = COALESCE(db_content.created, EXCLUDED.created), + text = COALESCE(db_content.text, EXCLUDED.text), + metadata = COALESCE(db_content.metadata, EXCLUDED.metadata), + scale = COALESCE(db_content.scale, EXCLUDED.scale), + last_modified = COALESCE(db_content.last_modified, EXCLUDED.last_modified), + part_of_id = COALESCE(db_content.part_of_id, EXCLUDED.part_of_id) + RETURNING id INTO STRICT upsert_id; + IF model(embedding_inline(local_content)) IS NOT NULL THEN + PERFORM public.upsert_content_embedding(upsert_id, model(embedding_inline(local_content)), vector(embedding_inline(local_content))); + END IF; + RETURN NEXT upsert_id; + END LOOP; +END; +$$; diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index bbe79b796..1021c2508 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -13,6 +13,14 @@ CREATE TYPE public."Scale" AS ENUM ( ALTER TYPE public."Scale" OWNER TO postgres; +CREATE TYPE public."ContentVariant" AS ENUM ( + 'direct', + 'direct_and_children', + 'direct_and_description' +); + +ALTER TYPE public."ContentVariant" OWNER TO postgres; + CREATE TABLE IF NOT EXISTS public."Document" ( id bigint DEFAULT nextval( 'public.entity_id_seq'::regclass @@ -68,6 +76,7 @@ CREATE TABLE IF NOT EXISTS public."Content" ( ) NOT NULL, document_id bigint NOT NULL, source_local_id character varying, + variant public."ContentVariant" NOT NULL DEFAULT 'direct', author_id bigint, creator_id bigint, created timestamp without time zone NOT NULL, @@ -119,8 +128,8 @@ CREATE INDEX "Content_part_of" ON public."Content" USING btree ( CREATE INDEX "Content_space" ON public."Content" USING btree (space_id); -CREATE UNIQUE INDEX content_space_and_local_id_idx ON public."Content" USING btree ( - space_id, source_local_id +CREATE UNIQUE INDEX content_space_local_id_variant_idx ON public."Content" USING btree ( + space_id, source_local_id, variant ) NULLS DISTINCT; CREATE INDEX "Content_text" ON public."Content" USING pgroonga (text); @@ -178,6 +187,7 @@ CREATE TYPE public.content_local_input AS ( -- content columns document_id bigint, source_local_id character varying, + variant public."ContentVariant", author_id bigint, creator_id bigint, created timestamp without time zone, @@ -406,7 +416,6 @@ BEGIN local_content.document_inline.last_modified := db_content.last_modified; local_content.document_inline.created := db_content.created; local_content.document_inline.author_id := db_content.author_id; - local_content.document_inline.metadata := '{}'; END IF; IF source_local_id(document_inline(local_content)) IS NOT NULL THEN db_document := _local_document_to_db_document(document_inline(local_content)); @@ -424,7 +433,7 @@ BEGIN db_document.source_local_id, db_document.url, db_document.created, - db_document.metadata, + COALESCE(db_document.metadata, '{}'::jsonb), db_document.last_modified, db_document.author_id, db_document.contents @@ -442,6 +451,7 @@ BEGIN INSERT INTO public."Content" ( document_id, source_local_id, + variant, author_id, creator_id, created, @@ -454,17 +464,18 @@ BEGIN ) VALUES ( db_content.document_id, db_content.source_local_id, + COALESCE(db_content.variant, 'direct'::public."ContentVariant"), db_content.author_id, db_content.creator_id, db_content.created, db_content.text, - db_content.metadata, + COALESCE(db_content.metadata, '{}'::jsonb), db_content.scale, db_content.space_id, db_content.last_modified, db_content.part_of_id ) - ON CONFLICT (space_id, source_local_id) DO UPDATE SET + ON CONFLICT (space_id, source_local_id, variant) DO UPDATE SET document_id = COALESCE(db_content.document_id, EXCLUDED.document_id), author_id = COALESCE(db_content.author_id, EXCLUDED.author_id), creator_id = COALESCE(db_content.creator_id, EXCLUDED.creator_id), @@ -508,9 +519,9 @@ COMMENT ON FUNCTION public.document_in_space IS 'security utility: does current ALTER TABLE public."Document" ENABLE ROW LEVEL SECURITY; DROP POLICY IF EXISTS document_policy ON public."Document"; -CREATE POLICY document_policy ON public."Document" FOR ALL USING (public.in_space (space_id)); +CREATE POLICY document_policy ON public."Document" FOR ALL USING (public.in_space(space_id)); ALTER TABLE public."Content" ENABLE ROW LEVEL SECURITY; DROP POLICY IF EXISTS content_policy ON public."Content"; -CREATE POLICY content_policy ON public."Content" FOR ALL USING (public.in_space (space_id)); +CREATE POLICY content_policy ON public."Content" FOR ALL USING (public.in_space(space_id)); diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts index c9a0c8650..823501ee3 100644 --- a/packages/database/types.gen.ts +++ b/packages/database/types.gen.ts @@ -197,6 +197,7 @@ export type Database = { source_local_id: string | null space_id: number | null text: string + variant: Database["public"]["Enums"]["ContentVariant"] } Insert: { author_id?: number | null @@ -211,6 +212,7 @@ export type Database = { source_local_id?: string | null space_id?: number | null text: string + variant?: Database["public"]["Enums"]["ContentVariant"] } Update: { author_id?: number | null @@ -225,6 +227,7 @@ export type Database = { source_local_id?: string | null space_id?: number | null text?: string + variant?: Database["public"]["Enums"]["ContentVariant"] } Relationships: [ { @@ -547,6 +550,7 @@ export type Database = { source_local_id: string | null space_id: number | null text: string + variant: Database["public"]["Enums"]["ContentVariant"] } } _local_document_to_db_document: { @@ -602,11 +606,11 @@ export type Database = { } create_account_in_space: { Args: { - email_trusted?: boolean space_id_: number account_local_id_: string name_: string email_?: string + email_trusted?: boolean editor_?: boolean } Returns: number @@ -630,8 +634,8 @@ export type Database = { } generic_entity_access: { Args: { - target_id: number target_type: Database["public"]["Enums"]["EntityType"] + target_id: number } Returns: boolean } @@ -643,8 +647,8 @@ export type Database = { } get_space_anonymous_email: { Args: { - space_id: number platform: Database["public"]["Enums"]["Platform"] + space_id: number } Returns: string } @@ -681,11 +685,11 @@ export type Database = { } propose_sync_task: { Args: { + s_function: string s_target: number - s_worker: string - timeout: unknown task_interval: unknown - s_function: string + timeout: unknown + s_worker: string } Returns: string } @@ -694,20 +698,20 @@ export type Database = { Returns: boolean } upsert_concepts: { - Args: { v_space_id: number; data: Json } + Args: { data: Json; v_space_id: number } Returns: number[] } upsert_content: { Args: { v_space_id: number - content_as_document?: boolean v_creator_id: number + content_as_document?: boolean data: Json } Returns: number[] } upsert_content_embedding: { - Args: { model: string; embedding_array: number[]; content_id: number } + Args: { content_id: number; embedding_array: number[]; model: string } Returns: undefined } upsert_discourse_nodes: { @@ -731,13 +735,17 @@ export type Database = { }[] } upsert_documents: { - Args: { data: Json; v_space_id: number } + Args: { v_space_id: number; data: Json } Returns: number[] } } Enums: { AgentIdentifierType: "email" | "orcid" AgentType: "person" | "organization" | "automated_agent" | "anonymous" + ContentVariant: + | "direct" + | "direct_and_children" + | "direct_and_description" EmbeddingName: | "openai_text_embedding_ada2_1536" | "openai_text_embedding_3_small_512" @@ -830,6 +838,7 @@ export type Database = { embedding_inline: | Database["public"]["CompositeTypes"]["inline_embedding_input"] | null + variant: Database["public"]["Enums"]["ContentVariant"] | null } document_local_input: { space_id: number | null @@ -964,6 +973,11 @@ export const Constants = { Enums: { AgentIdentifierType: ["email", "orcid"], AgentType: ["person", "organization", "automated_agent", "anonymous"], + ContentVariant: [ + "direct", + "direct_and_children", + "direct_and_description", + ], EmbeddingName: [ "openai_text_embedding_ada2_1536", "openai_text_embedding_3_small_512",