From 75af65f5d6162d386f0094778852cb08796f026b Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 6 May 2025 11:37:35 -0400 Subject: [PATCH 01/40] Database structure --- packages/supabase/config.toml | 308 +++++++++++++ .../20250504193643_remote_schema.sql | 0 .../20250504195841_remote_schema.sql | 9 + .../20250504202930_content_tables.sql | 211 +++++++++ packages/supabase/schema.yaml | 419 ++++++++++++++++++ 5 files changed, 947 insertions(+) create mode 100644 packages/supabase/config.toml create mode 100644 packages/supabase/migrations/20250504193643_remote_schema.sql create mode 100644 packages/supabase/migrations/20250504195841_remote_schema.sql create mode 100644 packages/supabase/migrations/20250504202930_content_tables.sql create mode 100644 packages/supabase/schema.yaml diff --git a/packages/supabase/config.toml b/packages/supabase/config.toml new file mode 100644 index 000000000..d52366c38 --- /dev/null +++ b/packages/supabase/config.toml @@ -0,0 +1,308 @@ +# For detailed configuration reference documentation, visit: +# https://supabase.com/docs/guides/local-development/cli/config +# A string used to distinguish different Supabase projects on the same host. Defaults to the +# working directory name when running `supabase init`. +project_id = "apps" + +[api] +enabled = true +# Port to use for the API URL. +port = 54321 +# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API +# endpoints. `public` and `graphql_public` schemas are included by default. +schemas = ["public", "graphql_public"] +# Extra schemas to add to the search_path of every request. +extra_search_path = ["public", "extensions"] +# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size +# for accidental or malicious requests. +max_rows = 1000 + +[api.tls] +# Enable HTTPS endpoints locally using a self-signed certificate. +enabled = false + +[db] +# Port to use for the local database URL. +port = 54322 +# Port used by db diff command to initialize the shadow database. +shadow_port = 54320 +# The database major version to use. This has to be the same as your remote database's. Run `SHOW +# server_version;` on the remote database to check. +major_version = 15 + +[db.pooler] +enabled = false +# Port to use for the local connection pooler. +port = 54329 +# Specifies when a server connection can be reused by other clients. +# Configure one of the supported pooler modes: `transaction`, `session`. +pool_mode = "transaction" +# How many server connections to allow per user/database pair. +default_pool_size = 20 +# Maximum number of client connections allowed. +max_client_conn = 100 + +# [db.vault] +# secret_key = "env(SECRET_VALUE)" + +[db.migrations] +# Specifies an ordered list of schema files that describe your database. +# Supports glob patterns relative to supabase directory: "./schemas/*.sql" +schema_paths = [] + +[db.seed] +# If enabled, seeds the database after migrations during a db reset. +enabled = true +# Specifies an ordered list of seed files to load during db reset. +# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +sql_paths = ["./seed.sql"] + +[realtime] +enabled = true +# Bind realtime via either IPv4 or IPv6. (default: IPv4) +# ip_version = "IPv6" +# The maximum length in bytes of HTTP request headers. (default: 4096) +# max_header_length = 4096 + +[studio] +enabled = true +# Port to use for Supabase Studio. +port = 54323 +# External URL of the API server that frontend connects to. +api_url = "http://127.0.0.1" +# OpenAI API Key to use for Supabase AI in the Supabase Studio. +openai_api_key = "env(OPENAI_API_KEY)" + +# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they +# are monitored, and you can view the emails that would have been sent from the web interface. +[inbucket] +enabled = true +# Port to use for the email testing server web interface. +port = 54324 +# Uncomment to expose additional ports for testing user applications that send emails. +# smtp_port = 54325 +# pop3_port = 54326 +# admin_email = "admin@email.com" +# sender_name = "Admin" + +[storage] +enabled = true +# The maximum file size allowed (e.g. "5MB", "500KB"). +file_size_limit = "50MiB" + +# Image transformation API is available to Supabase Pro plan. +# [storage.image_transformation] +# enabled = true + +# Uncomment to configure local storage buckets +# [storage.buckets.images] +# public = false +# file_size_limit = "50MiB" +# allowed_mime_types = ["image/png", "image/jpeg"] +# objects_path = "./images" + +[auth] +enabled = true +# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used +# in emails. +site_url = "http://127.0.0.1:3000" +# A list of *exact* URLs that auth providers are permitted to redirect to post authentication. +additional_redirect_urls = ["https://127.0.0.1:3000"] +# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week). +jwt_expiry = 3600 +# If disabled, the refresh token will never expire. +enable_refresh_token_rotation = true +# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds. +# Requires enable_refresh_token_rotation = true. +refresh_token_reuse_interval = 10 +# Allow/disallow new user signups to your project. +enable_signup = true +# Allow/disallow anonymous sign-ins to your project. +enable_anonymous_sign_ins = false +# Allow/disallow testing manual linking of accounts +enable_manual_linking = false +# Passwords shorter than this value will be rejected as weak. Minimum 6, recommended 8 or more. +minimum_password_length = 6 +# Passwords that do not meet the following requirements will be rejected as weak. Supported values +# are: `letters_digits`, `lower_upper_letters_digits`, `lower_upper_letters_digits_symbols` +password_requirements = "" + +[auth.rate_limit] +# Number of emails that can be sent per hour. Requires auth.email.smtp to be enabled. +email_sent = 2 +# Number of SMS messages that can be sent per hour. Requires auth.sms to be enabled. +sms_sent = 30 +# Number of anonymous sign-ins that can be made per hour per IP address. Requires enable_anonymous_sign_ins = true. +anonymous_users = 30 +# Number of sessions that can be refreshed in a 5 minute interval per IP address. +token_refresh = 150 +# Number of sign up and sign-in requests that can be made in a 5 minute interval per IP address (excludes anonymous users). +sign_in_sign_ups = 30 +# Number of OTP / Magic link verifications that can be made in a 5 minute interval per IP address. +token_verifications = 30 + +# Configure one of the supported captcha providers: `hcaptcha`, `turnstile`. +# [auth.captcha] +# enabled = true +# provider = "hcaptcha" +# secret = "" + +[auth.email] +# Allow/disallow new user signups via email to your project. +enable_signup = true +# If enabled, a user will be required to confirm any email change on both the old, and new email +# addresses. If disabled, only the new email is required to confirm. +double_confirm_changes = true +# If enabled, users need to confirm their email address before signing in. +enable_confirmations = false +# If enabled, users will need to reauthenticate or have logged in recently to change their password. +secure_password_change = false +# Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email. +max_frequency = "1s" +# Number of characters used in the email OTP. +otp_length = 6 +# Number of seconds before the email OTP expires (defaults to 1 hour). +otp_expiry = 3600 + +# Use a production-ready SMTP server +# [auth.email.smtp] +# enabled = true +# host = "smtp.sendgrid.net" +# port = 587 +# user = "apikey" +# pass = "env(SENDGRID_API_KEY)" +# admin_email = "admin@email.com" +# sender_name = "Admin" + +# Uncomment to customize email template +# [auth.email.template.invite] +# subject = "You have been invited" +# content_path = "./supabase/templates/invite.html" + +[auth.sms] +# Allow/disallow new user signups via SMS to your project. +enable_signup = false +# If enabled, users need to confirm their phone number before signing in. +enable_confirmations = false +# Template for sending OTP to users +template = "Your code is {{ .Code }}" +# Controls the minimum amount of time that must pass before sending another sms otp. +max_frequency = "5s" + +# Use pre-defined map of phone number to OTP for testing. +# [auth.sms.test_otp] +# 4152127777 = "123456" + +# Configure logged in session timeouts. +# [auth.sessions] +# Force log out after the specified duration. +# timebox = "24h" +# Force log out if the user has been inactive longer than the specified duration. +# inactivity_timeout = "8h" + +# This hook runs before a token is issued and allows you to add additional claims based on the authentication method used. +# [auth.hook.custom_access_token] +# enabled = true +# uri = "pg-functions:////" + +# Configure one of the supported SMS providers: `twilio`, `twilio_verify`, `messagebird`, `textlocal`, `vonage`. +[auth.sms.twilio] +enabled = false +account_sid = "" +message_service_sid = "" +# DO NOT commit your Twilio auth token to git. Use environment variable substitution instead: +auth_token = "env(SUPABASE_AUTH_SMS_TWILIO_AUTH_TOKEN)" + +# Multi-factor-authentication is available to Supabase Pro plan. +[auth.mfa] +# Control how many MFA factors can be enrolled at once per user. +max_enrolled_factors = 10 + +# Control MFA via App Authenticator (TOTP) +[auth.mfa.totp] +enroll_enabled = false +verify_enabled = false + +# Configure MFA via Phone Messaging +[auth.mfa.phone] +enroll_enabled = false +verify_enabled = false +otp_length = 6 +template = "Your code is {{ .Code }}" +max_frequency = "5s" + +# Configure MFA via WebAuthn +# [auth.mfa.web_authn] +# enroll_enabled = true +# verify_enabled = true + +# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`, +# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin_oidc`, `notion`, `twitch`, +# `twitter`, `slack`, `spotify`, `workos`, `zoom`. +[auth.external.apple] +enabled = false +client_id = "" +# DO NOT commit your OAuth provider secret to git. Use environment variable substitution instead: +secret = "env(SUPABASE_AUTH_EXTERNAL_APPLE_SECRET)" +# Overrides the default auth redirectUrl. +redirect_uri = "" +# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure, +# or any other third-party OIDC providers. +url = "" +# If enabled, the nonce check will be skipped. Required for local sign in with Google auth. +skip_nonce_check = false + +# Use Firebase Auth as a third-party provider alongside Supabase Auth. +[auth.third_party.firebase] +enabled = false +# project_id = "my-firebase-project" + +# Use Auth0 as a third-party provider alongside Supabase Auth. +[auth.third_party.auth0] +enabled = false +# tenant = "my-auth0-tenant" +# tenant_region = "us" + +# Use AWS Cognito (Amplify) as a third-party provider alongside Supabase Auth. +[auth.third_party.aws_cognito] +enabled = false +# user_pool_id = "my-user-pool-id" +# user_pool_region = "us-east-1" + +# Use Clerk as a third-party provider alongside Supabase Auth. +[auth.third_party.clerk] +enabled = false +# Obtain from https://clerk.com/setup/supabase +# domain = "example.clerk.accounts.dev" + +[edge_runtime] +enabled = true +# Configure one of the supported request policies: `oneshot`, `per_worker`. +# Use `oneshot` for hot reload, or `per_worker` for load testing. +policy = "oneshot" +# Port to attach the Chrome inspector for debugging edge functions. +inspector_port = 8083 +# The Deno major version to use. +deno_version = 1 + +# [edge_runtime.secrets] +# secret_key = "env(SECRET_VALUE)" + +[analytics] +enabled = true +port = 54327 +# Configure one of the supported backends: `postgres`, `bigquery`. +backend = "postgres" + +# Experimental features may be deprecated any time +[experimental] +# Configures Postgres storage engine to use OrioleDB (S3) +orioledb_version = "" +# Configures S3 bucket URL, eg. .s3-.amazonaws.com +s3_host = "env(S3_HOST)" +# Configures S3 bucket region, eg. us-east-1 +s3_region = "env(S3_REGION)" +# Configures AWS_ACCESS_KEY_ID for S3 bucket +s3_access_key = "env(S3_ACCESS_KEY)" +# Configures AWS_SECRET_ACCESS_KEY for S3 bucket +s3_secret_key = "env(S3_SECRET_KEY)" diff --git a/packages/supabase/migrations/20250504193643_remote_schema.sql b/packages/supabase/migrations/20250504193643_remote_schema.sql new file mode 100644 index 000000000..e69de29bb diff --git a/packages/supabase/migrations/20250504195841_remote_schema.sql b/packages/supabase/migrations/20250504195841_remote_schema.sql new file mode 100644 index 000000000..89b3d4e4c --- /dev/null +++ b/packages/supabase/migrations/20250504195841_remote_schema.sql @@ -0,0 +1,9 @@ +create extension if not exists "pg_jsonschema" with schema "extensions"; + +create extension if not exists "pg_stat_monitor" with schema "extensions"; + +create extension if not exists "pgroonga" with schema "extensions"; + +create extension if not exists "vector" with schema "extensions"; + + diff --git a/packages/supabase/migrations/20250504202930_content_tables.sql b/packages/supabase/migrations/20250504202930_content_tables.sql new file mode 100644 index 000000000..52f3cda7c --- /dev/null +++ b/packages/supabase/migrations/20250504202930_content_tables.sql @@ -0,0 +1,211 @@ + +CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq + AS BIGINT + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +CREATE TYPE "EntityType" AS ENUM ('Platform', 'Space', 'Account', 'Person', 'AutomatedAgent', 'Document', 'Content', 'Concept', 'ConceptSchema', 'ContentLink', 'Occurrence'); + +CREATE TYPE "Scale" AS ENUM ('document', 'post', 'chunk_unit', 'section', 'block', 'field', 'paragraph', 'quote', 'sentence', 'phrase'); + +CREATE TYPE "EmbeddingName" AS ENUM ('openai_text_embedding_ada2_1536', 'openai_text_embedding_3_small_512', 'openai_text_embedding_3_small_1536', 'openai_text_embedding_3_large_256', 'openai_text_embedding_3_large_1024', 'openai_text_embedding_3_large_3072'); + +CREATE TYPE "EpistemicStatus" AS ENUM ('certainly_not', 'strong_evidence_against', 'could_be_false', 'unknown', 'uncertain', 'contentious', 'could_be_true', 'strong_evidence_for', 'certain'); + +CREATE TABLE "Agent" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + type "EntityType" NOT NULL +); +COMMENT ON TABLE "Agent" IS 'An agent that acts in the system'; + + +CREATE TABLE "Person" ( + id BIGINT NOT NULL PRIMARY KEY, + name VARCHAR NOT NULL, + orcid VARCHAR(20), + email VARCHAR NOT NULL, + CONSTRAINT person_id_fkey FOREIGN KEY (id) + REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "Person" IS 'A person using the system'; + + +CREATE TABLE "AutomatedAgent" ( + id BIGINT NOT NULL PRIMARY KEY, + name VARCHAR NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + deterministic BOOLEAN DEFAULT FALSE, + version VARCHAR, + CONSTRAINT person_id_fkey FOREIGN KEY (id) + REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "AutomatedAgent" IS 'An automated agent'; + + +CREATE TABLE "DiscoursePlatform" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + name VARCHAR NOT NULL, + url VARCHAR NOT NULL +); +COMMENT ON TABLE "DiscoursePlatform" IS 'A data platform where discourse happens'; + + +CREATE TABLE "Account" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + platform_id BIGINT NOT NULL, + person_id BIGINT NOT NULL, + write_permission BOOLEAN NOT NULL, + active BOOLEAN NOT NULL DEFAULT TRUE, + FOREIGN KEY(platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(person_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "Account" IS 'A user account on a discourse platform'; + + +CREATE TABLE "DiscourseSpace" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + url VARCHAR, + name VARCHAR NOT NULL, + discourse_platform_id BIGINT NOT NULL, + FOREIGN KEY(discourse_platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "DiscourseSpace" IS 'A space on a discourse platform representing a community engaged in a conversation'; + + +CREATE TABLE "SpaceAccess" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + space_id BIGINT, + account_id BIGINT NOT NULL, + editor BOOLEAN NOT NULL, + UNIQUE (account_id, space_id), + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(account_id) REFERENCES "Account" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "SpaceAccess" IS 'An access control entry for a space'; +COMMENT ON COLUMN "SpaceAccess".space_id IS 'The space in which the content is located'; + + +CREATE TABLE "Document" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + space_id BIGINT, + source_local_id VARCHAR, + url VARCHAR, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + author_id BIGINT NOT NULL, + contents OID, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON COLUMN "Document".space_id IS 'The space in which the content is located'; +COMMENT ON COLUMN "Document".source_local_id IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN "Document".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Document".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Document".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Document".author_id IS 'The author of content'; +COMMENT ON COLUMN "Document".contents IS 'A large object OID for the downloaded raw content'; + +CREATE TABLE "Concept" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + epistemic_status "EpistemicStatus" NOT NULL DEFAULT 'unknown', + name VARCHAR NOT NULL, + description TEXT, + author_id BIGINT, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + space_id BIGINT, + arity SMALLINT NOT NULL DEFAULT 0, + schema_id BIGINT, + content JSONB NOT NULL DEFAULT '{}', + is_schema BOOLEAN NOT NULL DEFAULT FALSE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(schema_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE +); +CREATE INDEX "Concept_space" ON "Concept" (space_id); +CREATE INDEX "Concept_schema" ON "Concept" (schema_id); +CREATE INDEX "Concept_content" ON "Concept" USING GIN (content jsonb_path_ops); + + +COMMENT ON TABLE "Concept" IS 'An abstract concept, claim or relation'; +COMMENT ON COLUMN "Concept".author_id IS 'The author of content'; +COMMENT ON COLUMN "Concept".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Concept".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Concept".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Concept".space_id IS 'The space in which the content is located'; + + +CREATE TABLE "Content" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + document_id BIGINT NOT NULL, + source_local_id VARCHAR, + author_id BIGINT, + creator_id BIGINT, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + scale "Scale" NOT NULL, + space_id BIGINT, + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + part_of_id BIGINT, + represents_id BIGINT, + FOREIGN KEY(document_id) REFERENCES "Document" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(creator_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(part_of_id) REFERENCES "Content" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(represents_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE +); + +CREATE INDEX "Content_text" ON "Content" USING pgroonga (text); +CREATE INDEX "Content_space" ON "Content" (space_id); +CREATE INDEX "Content_document" ON "Content" (document_id); +CREATE INDEX "Content_part_of" ON "Content" (part_of_id); +CREATE INDEX "Content_represents" ON "Content" (represents_id); + +COMMENT ON TABLE "Content" IS 'A unit of content'; +COMMENT ON COLUMN "Content".source_local_id IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN "Content".author_id IS 'The author of content'; +COMMENT ON COLUMN "Content".creator_id IS 'The creator of a logical structure, such as a content subdivision'; +COMMENT ON COLUMN "Content".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Content".space_id IS 'The space in which the content is located'; +COMMENT ON COLUMN "Content".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Content".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Content".part_of_id IS 'This content is part of a larger content unit'; +COMMENT ON COLUMN "Content".represents_id IS 'This content explicitly represents a concept'; + + +CREATE TABLE concept_contributors ( + concept_id BIGINT, + contributor_id BIGINT, + PRIMARY KEY (concept_id, contributor_id), + FOREIGN KEY(concept_id) REFERENCES "Concept" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); + + +CREATE TABLE "ContentEmbedding_openai_text_embedding_3_small_1536" ( + target_id BIGINT NOT NULL, + model "EmbeddingName" NOT NULL DEFAULT 'openai_text_embedding_3_small_1536', + vector extensions.vector(1536) NOT NULL, + obsolete BOOLEAN DEFAULT FALSE, + PRIMARY KEY (target_id), + FOREIGN KEY(target_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE +); + + +CREATE TABLE content_contributors ( + content_id BIGINT, + contributor_id BIGINT, + PRIMARY KEY (content_id, contributor_id), + FOREIGN KEY(content_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/packages/supabase/schema.yaml b/packages/supabase/schema.yaml new file mode 100644 index 000000000..9047a437b --- /dev/null +++ b/packages/supabase/schema.yaml @@ -0,0 +1,419 @@ +id: https://discoursegraphs.com/schemas/v0# +name: discoursegraphs +prefixes: + linkml: https://w3id.org/linkml/ + dg: https://discoursegraphs.com/schemas/v0# +default_prefix: dg +default_range: string +imports: + - linkml:types +enums: + Scale: + description: scale value of a Content + permissible_values: + document: + post: + chunk_unit: + section: + block: + description: A block of content in an outline system, such as a Roam node + field: + paragraph: + quote: + sentence: + phrase: + Validation: + description: Whether a given value was given by a person, or suggested by an automated agent (and then possibly infirmed.) + permissible_values: + infirmed: + description: Infirmed by a trusted agent + suggested: + description: Suggested by a fallible agent (probabilistic, heuristic, person in training...) + confirmed: + description: Confirmed by a trusted agent + intrinsic: + description: No validation needed, this is intrinsic to the data + EpistemicStatus: + description: The epistemic status of a claim + permissible_values: + certainly_not: + strong_evidence_against: + could_be_false: + unknown: + uncertain: + contentious: + could_be_true: + strong_evidence_for: + certain: + EntityType: + description: The type of an entity + permissible_values: + Platform: + Space: + Account: + Person: + AutomatedAgent: + Document: + Content: + Concept: + ConceptSchema: + ContentLink: + Occurrence: + EmbeddingName: + description: an embedding name + permissible_values: + openai_text_embedding_ada2_1536: + openai_text_embedding_3_small_512: + openai_text_embedding_3_small_1536: + openai_text_embedding_3_large_256: + openai_text_embedding_3_large_1024: + openai_text_embedding_3_large_3072: + DerivedTextVariant: + description: Is the text taken as-is, or is it a computed variant? + permissible_values: + as_is: + neighbourhood_parent_and_children: + neighbourhood_parent_and_level_2_descendants: + neighbourhood_children: + neighbourhood_level_2_descendants: + summary: +types: + JSON: + uri: xsd:string + # base: dict + base: str + description: JSON data + # annotations: + # sql_type: sqlalchemy.dialects.postgresql.JSONB + JSONSchema: + uri: xsd:string + base: dict + description: A Json schema + # annotations: + # sql_type: sqlalchemy.dialects.postgresql.JSONB + vector: + uri: xsd:float + base: float + description: A vector of floats + annotations: + sql_type: pgvector.sqlalchemy.Vector + blob: + uri: xsd:base64Binary + base: bytes + annotations: + sql_type: sqlalchemy.dialects.postgresql.BLOB + description: A binary large object +classes: + Agent: + description: An agent that acts in the system + abstract: true + slots: + - id + - type + Person: + description: A person using the system + is_a: Agent + slots: + - name + - orcid + attributes: + email: + required: true + # TODO: known skills, i.e. what processes can they confirm. + AutomatedAgent: + description: An automated agent + is_a: Agent + slots: + - metadata + attributes: + deterministic: + range: boolean + ifabsent: false + version: + range: string + + DiscoursePlatform: + description: A data platform where discourse happens + slots: + - id + - name + attributes: + url: + required: true + Account: + description: A user account on a discourse platform + slots: + - id + - platform + attributes: + person: + range: Agent + required: true + write_permission: + range: boolean + required: true + active: + range: boolean + required: true + ifabsent: true + DiscourseSpace: + description: A space on a discourse platform representing a community engaged in a conversation + slots: + - id + - url + - name + attributes: + discourse_platform: + range: DiscoursePlatform + required: true + SpaceAccess: + description: An access control entry for a space + slots: + - space + attributes: + account: + range: Account + required: true + editor: + range: boolean + required: true + unique_keys: + main: + description: Primary key for space access + unique_key_slots: + - account + - space + Content: + description: A unit of content + slots: + - id + - document + - source_local_id + - author + - creator + - created + - text + - metadata + - scale + # - position + # - char_position + - space + - contributors + - last_modified + - last_synced + attributes: + part_of: + description: This content is part of a larger content unit + range: Content + represents: + description: This content explicitly represents a concept + range: Concept + # ContentDerivation: + # description: A derivation relation between content units + # attributes: + # derived_content: + # description: The derived content unit + # range: Content + # required: true + # identifier: true + # derived_from: + # description: The content unit that this variant was derived from + # range: Content + # required: true + # derived_variant: + # description: This content is a variant derived from another content unit + # range: DerivedTextVariant + # required: true + Document: + slots: + - id + - space + - source_local_id + - url + - last_synced + - created + - metadata + - last_modified + - author + - contributors + attributes: + contents: + range: blob + # Article: + # description: an article + # is_a: Document + # slots: + # - issn + # - abstract + ContentEmbedding: + # abstract: true + attributes: + target: + range: Content + required: true + identifier: true + model: + range: EmbeddingName + required: true + vector: + range: vector + array: + minimum_number_dimensions: 1 + maximum_number_dimensions: 1 + required: true + obsolete: + description: Whether this embedding is obsolete (becauses the Content was modified) + range: boolean + ifabsent: false + Concept: + description: An abstract concept, claim or relation + slots: + - id + - epistemic_status + - name + - description + - author + - contributors + - created + - last_modified + - last_synced + - space + attributes: + arity: + range: integer + required: true + ifabsent: 0 + description: The number of roles in this relation; nodes have zero, binary relations have 2, etc. + schema: + range: ConceptSchema + required: true + content: + range: JSON + required: true + is_schema: + range: boolean + required: true + ifabsent: false + # update status + # concept has occurences and possibly a representation in a space. + # Are concepts space-specific? Tending to yes. So the point of convergence should be distinct. + # Can a concept have multiple representations? One case is a reprentation + # of an equivalent concept in another space. + # do non-claim concepts have epistemic status? + # The other big deal is who has authority on concept definition. + # Finally... concept schema. Yeah. Is it per-space? Likely. + # Damn, concept schema is a concept, is it not? + # Now, if a concept has a complex structwre based on a complex content... + # AH, it should be based on occurences. + + ConceptSchema: + is_a: Concept + + # Reference: + # abstract: true + # description: A link from a content fragment to something else + # attributes: + # source: + # range: Content + # required: true + # slots: + # - creator + # - created + # - validation + # - type + # # This is an aggregate of validation events + # # Q: What is the relationship between occurences and links? Links to Concepts in particular? + # # What if the concept has been materialized as content? + # ContentLink: + # description: An explicit link from a content fragment to another content. + # is_a: Reference + # attributes: + # target: + # range: Content + # required: true + # Occurrence: + # description: A link from a content fragment to a Concept. May be an interpretation. + # is_a: Reference + # attributes: + # target: + # range: Concept + # required: true +slots: + id: + range: integer + identifier: true + required: true + type: + range: EntityType + required: true + designates_type: true + name: + required: true + author: + range: Agent + description: The author of content + required: true + creator: + range: Agent + description: The creator of a logical structure, such as a content subdivision + required: true + contributors: + multivalued: true + range: Agent + text: + required: true + description: + created: + range: datetime + required: true + description: The time when the content was created in the remote source + last_modified: + range: datetime + required: true + description: The last time the content was modified in the remote source + last_synced: + range: datetime + required: true + description: The last time the content was synced with the remote source + metadata: + range: JSON + orcid: + range: string + url: + range: string + platform: + range: DiscoursePlatform + required: true + issn: + abstract: + scale: + range: Scale + required: true + position: + description: The ordinal position of the content within its parent, wrt other content units of the same scale + range: integer + ifabsent: 0 + required: true + char_position: + description: The character position of the content within its parent. + # Does not apply to outline sub-elements + range: integer + ifabsent: 0 + validation: + range: Validation + required: true + ifabsent: Validation(suggested) + epistemic_status: + range: EpistemicStatus + required: true + ifabsent: EpistemicStatus(unknown) + space: + range: DiscourseSpace + description: The space in which the content is located + document: + range: Document + required: true + source_local_id: + range: string + description: The unique identifier of the content in the remote source + # In the case of a document, could it be the URL? From 9d334a07479c6c069e961d75e059f100f3d2f0b5 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 6 May 2025 13:32:02 -0400 Subject: [PATCH 02/40] add an example --- packages/supabase/example.md | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 packages/supabase/example.md diff --git a/packages/supabase/example.md b/packages/supabase/example.md new file mode 100644 index 000000000..4907a08c2 --- /dev/null +++ b/packages/supabase/example.md @@ -0,0 +1,57 @@ +# example... + +Content: + +* (nt1pgid) discourse-graphs/nodes/Claim +* (nt2pgid) discourse-graphs/nodes/Hypothesis +* (et1pgid) discourse-graphs/edges/OpposedBy + * (anyid1) roles + * (et1r1bkid) source + * (et1r2bkid) target + * (anyid2) arity: 2 +* (somepgid) Some page + * (hyp1bkid) [HYP] Some hypothesis + * (opp1bkid) OpposedBy + * (clm1bkid) [CLM] Some Claim + +Documents: + ++----+-----------------+ +| id | source_local_id | ++----+-----------------+ +| 1 | nt1pgid | +| 2 | nt2pgid | +| 3 | et1pgid | +| 4 | somepgid | ++----+-----------------+ + +Content: + ++----+-------------+-------------+----------+---------------+-----------------------------------+ +| id | source_local_id | page_id | scale | represents_id | text | ++----+-------------+-------------+----------+---------------+-----------------------------------+ +| 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | +| 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | +| 7 | et1pgid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | +| 8 | somepgid | 4 | document | | Some page | +| 9 | hyp1bkid | 4 | block | 20 | [HYP] Some hypothesis | +| 10 | opp1bkid | 4 | block | 21 | OpposedBy | +| 11 | clm1bkid | 4 | block | 19 | [CLM] Some claim | +| 12 | anyid1 | 3 | block | | roles | +| 13 | et1r1bkid | 3 | block | | source | +| 14 | et1r2bkid | 3 | block | | target | +| 15 | anyid2 | 3 | block | | arity: 2 | ++----+-------------+-------------+----------+---------------+-----------------------------------+ + +Concept: + ++----+-----------+-------+-----------------------+-----------... +| id | is_schema | arity | name | content ++----+-----------+-------+-----------------------+-----------... +| 16 | true | 0 | Claim | {} +| 17 | true | 0 | Hypothesis | {} +| 18 | true | 2 | Opposed-by | { "roles": ["source", "target"] } +| 19 | false | 0 | [CLM] Some Claim | {} +| 20 | false | 0 | [HYP] Some Hypothesis | {} +| 21 | false | 2 | OpposedBy | { "concepts": {"source": 19, "target": 21}, "occurences": [{"source": 11, target: 9 }] } ++----+-----------+-------+-----------------------+-----------... From 67c362f0c4da78c43f70e4acfc616333ee75e22b Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 6 May 2025 13:46:27 -0400 Subject: [PATCH 03/40] unique source_id --- packages/supabase/migrations/20250506174523_content_idx_id.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 packages/supabase/migrations/20250506174523_content_idx_id.sql diff --git a/packages/supabase/migrations/20250506174523_content_idx_id.sql b/packages/supabase/migrations/20250506174523_content_idx_id.sql new file mode 100644 index 000000000..8e9ddfc55 --- /dev/null +++ b/packages/supabase/migrations/20250506174523_content_idx_id.sql @@ -0,0 +1,2 @@ +CREATE UNIQUE INDEX "Content_space_and_id" ON "Content" (space_id, source_local_id) WHERE +source_local_id IS NOT NULL; From 00cccac5f1099167e05f26cddc8945f201bb866d Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 6 May 2025 14:03:49 -0400 Subject: [PATCH 04/40] add svg/puml --- packages/supabase/schema.puml | 113 ++++++++++++++++++++++++++++++++++ packages/supabase/schema.svg | 1 + 2 files changed, 114 insertions(+) create mode 100644 packages/supabase/schema.puml create mode 100644 packages/supabase/schema.svg diff --git a/packages/supabase/schema.puml b/packages/supabase/schema.puml new file mode 100644 index 000000000..9a7d0c610 --- /dev/null +++ b/packages/supabase/schema.puml @@ -0,0 +1,113 @@ +@startuml +skinparam nodesep 10 +hide circle +hide empty members +class "SpaceAccess" [[{An access control entry for a space}]] { + {field} editor : boolean +} +class "Account" [[{A user account on a discourse platform}]] { + {field} id : integer + {field} write_permission : boolean + {field} active : boolean +} +class "DiscourseSpace" [[{A space on a discourse platform representing a community engaged in a conversation}]] { + {field} id : integer + {field} url : string + {field} name : string +} +"SpaceAccess" --> "1" "Account" : "account" +"SpaceAccess" --> "0..1" "DiscourseSpace" : "space" +class "Document" [[{None}]] { + {field} id : integer + {field} source_local_id : string + {field} url : string + {field} last_synced : datetime + {field} created : datetime + {field} metadata : JSON + {field} last_modified : datetime + {field} contents : blob +} +abstract "Agent" [[{An agent that acts in the system}]] { + {field} id : integer + {field} type : EntityType +} +class "Content" [[{A unit of content}]] { + {field} id : integer + {field} source_local_id : string + {field} created : datetime + {field} text : string + {field} metadata : JSON + {field} scale : Scale + {field} last_modified : datetime + {field} last_synced : datetime +} +"Document" --> "0..*" "Agent" : "contributors" +"Document" --> "1" "Agent" : "author" +"Document" --> "0..1" "DiscourseSpace" : "space" +"Content" --> "1" "Document" : "document" +class "DiscoursePlatform" [[{A data platform where discourse happens}]] { + {field} id : integer + {field} name : string + {field} url : string +} +class "Concept" [[{An abstract concept, claim or relation}]] { + {field} id : integer + {field} epistemic_status : EpistemicStatus + {field} name : string + {field} description : string + {field} created : datetime + {field} last_modified : datetime + {field} last_synced : datetime + {field} arity : integer + {field} content : JSON + {field} is_schema : boolean +} +"DiscourseSpace" --> "1" "DiscoursePlatform" : "discourse_platform" +"Content" --> "0..1" "DiscourseSpace" : "space" +"Concept" --> "0..1" "DiscourseSpace" : "space" +"Account" --> "1" "DiscoursePlatform" : "platform" +class "ContentEmbedding" [[{None}]] { + {field} model : EmbeddingName + {field} vector : vector + {field} obsolete : boolean +} +"ContentEmbedding" --> "1" "Content" : "target" +"Content" --> "0..1" "Concept" : "represents" +"Content" --> "0..1" "Content" : "part_of" +"Content" --> "0..*" "Agent" : "contributors" +"Content" --> "1" "Agent" : "creator" +"Content" --> "1" "Agent" : "author" +class "ConceptSchema" [[{None}]] { + {field} id(i) : integer + {field} epistemic_status(i) : EpistemicStatus + {field} name(i) : string + {field} description(i) : string + {field} created(i) : datetime + {field} last_modified(i) : datetime + {field} last_synced(i) : datetime + {field} arity(i) : integer + {field} content(i) : JSON + {field} is_schema(i) : boolean +} +"Concept" --> "1" "ConceptSchema" : "schema" +"Concept" --> "0..*" "Agent" : "contributors" +"Concept" --> "1" "Agent" : "author" +"Concept" ^-- "ConceptSchema" +class "Person" [[{A person using the system}]] { + {field} name : string + {field} orcid : string + {field} email : string + {field} id(i) : integer + {field} type(i) : EntityType +} +class "AutomatedAgent" [[{An automated agent}]] { + {field} metadata : JSON + {field} deterministic : boolean + {field} version : string + {field} id(i) : integer + {field} type(i) : EntityType +} +"Account" --> "1" "Agent" : "person" +"Agent" ^-- "Person" +"Agent" ^-- "AutomatedAgent" +@enduml diff --git a/packages/supabase/schema.svg b/packages/supabase/schema.svg new file mode 100644 index 000000000..4d9b1aefb --- /dev/null +++ b/packages/supabase/schema.svg @@ -0,0 +1 @@ +SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanDiscourseSpaceid : integerurl : stringname : stringDocumentid : integersource_local_id : stringurl : stringlast_synced : datetimecreated : datetimemetadata : JSONlast_modified : datetimecontents : blobAgentid : integertype : EntityTypeContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimelast_synced : datetimeDiscoursePlatformid : integername : stringurl : stringConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimelast_synced : datetimearity : integercontent : JSONis_schema : booleanContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimelast_synced(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1contributors0..*author1space0..1document1discourse_platform1space0..1space0..1platform1target1represents0..1part_of0..1contributors0..*creator1author1schema1contributors0..*author1person1 \ No newline at end of file From f0783c1df407b4e8010e95f0955dbaeaec0d8b7c Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 8 May 2025 12:21:13 -0400 Subject: [PATCH 05/40] changes to example --- packages/supabase/example.md | 78 +++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/packages/supabase/example.md b/packages/supabase/example.md index 4907a08c2..a1eeb89fc 100644 --- a/packages/supabase/example.md +++ b/packages/supabase/example.md @@ -9,10 +9,12 @@ Content: * (et1r1bkid) source * (et1r2bkid) target * (anyid2) arity: 2 +* (hyp1pgid) [HYP] Some hypothesis +* (clm1pgid) [CLM] Some claim * (somepgid) Some page - * (hyp1bkid) [HYP] Some hypothesis + * (hyp1refbkid) a block referring to [[HYP] Some hypothesis] * (opp1bkid) OpposedBy - * (clm1bkid) [CLM] Some Claim + * (clm1refbkid) a block referring to [[CLM] Some Claim] Documents: @@ -22,36 +24,58 @@ Documents: | 1 | nt1pgid | | 2 | nt2pgid | | 3 | et1pgid | +| 22 | hyp1pgid | +| 23 | clm1pgid | | 4 | somepgid | +----+-----------------+ Content: -+----+-------------+-------------+----------+---------------+-----------------------------------+ -| id | source_local_id | page_id | scale | represents_id | text | -+----+-------------+-------------+----------+---------------+-----------------------------------+ -| 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | -| 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | -| 7 | et1pgid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | -| 8 | somepgid | 4 | document | | Some page | -| 9 | hyp1bkid | 4 | block | 20 | [HYP] Some hypothesis | -| 10 | opp1bkid | 4 | block | 21 | OpposedBy | -| 11 | clm1bkid | 4 | block | 19 | [CLM] Some claim | -| 12 | anyid1 | 3 | block | | roles | -| 13 | et1r1bkid | 3 | block | | source | -| 14 | et1r2bkid | 3 | block | | target | -| 15 | anyid2 | 3 | block | | arity: 2 | -+----+-------------+-------------+----------+---------------+-----------------------------------+ ++----+-------------+-------------+----------+---------------+----------------------------------------------+ +| id | source_local_id | page_id | scale | represents_id | text | ++----+-------------+-------------+----------+---------------+----------------------------------------------+ +| 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | +| 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | +| 7 | et1pgid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | +| 8 | somepgid | 4 | document | | Some page | +| 24 | hyp1pgid | 22 | document | 20 | [HYP] Some hypothesis | +| 25 | clm1pgid | 23 | document | 19 | [HYP] Some claim | +| 9 | hyp1refbkid | 4 | block | | a block referring to [[HYP] Some hypothesis] | +| 10 | opp1bkid | 4 | block | 21 | OpposedBy | +| 11 | clm1refbkid | 4 | block | | a block referring to [[CLM] Some claim] | +| 12 | anyid1 | 3 | block | | roles | +| 13 | et1r1bkid | 3 | block | | source | +| 14 | et1r2bkid | 3 | block | | target | +| 15 | anyid2 | 3 | block | | arity: 2 | ++----+-------------+-------------+----------+---------------+----------------------------------------------+ Concept: -+----+-----------+-------+-----------------------+-----------... -| id | is_schema | arity | name | content -+----+-----------+-------+-----------------------+-----------... -| 16 | true | 0 | Claim | {} -| 17 | true | 0 | Hypothesis | {} -| 18 | true | 2 | Opposed-by | { "roles": ["source", "target"] } -| 19 | false | 0 | [CLM] Some Claim | {} -| 20 | false | 0 | [HYP] Some Hypothesis | {} -| 21 | false | 2 | OpposedBy | { "concepts": {"source": 19, "target": 21}, "occurences": [{"source": 11, target: 9 }] } -+----+-----------+-------+-----------------------+-----------... ++----+-----------+-------+--------+-----------------------+-----------... +| id | is_schema | arity | schema | name | content ++----+-----------+-------+--------+-----------------------+-----------... +| 16 | true | 0 | | Claim | {} +| 17 | true | 0 | | Hypothesis | {} +| 18 | true | 2 | | Opposed-by | + { "roles": ["source", "target"], "representation": ["source", "sourceref", "target", "targetref", "predicate"] } +| 19 | false | 0 | | [CLM] Some claim | {} +| 20 | false | 0 | | [HYP] Some hypothesis | {} +| 21 | false | 2 | 18 | OpposedBy | + { "concepts": {"source": 19, "target": 20}, "occurences": + [{"sourceref": 11, "targetref": 9, "source": 25, "target": 24, "predicate": 10 }] } ++----+-----------+-------+--------+-----------------------+-----------... + +Note: Open question whether the occurence structure matters, and whether it should be materialized in another table. +(I would tend to say yes to both.) + +ContentLink + ++--------+--------+ +| source | target | ++--------+--------+ +| 9 | 24 | +| 11 | 25 | ++--------+--------+ + +Note: I would probably create a sub-Content for the link text and use this as source. +OR use a char_start, char_end. From 34bb91e8e0526393ee2b9516a3c4ebfb00abc95d Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 9 May 2025 10:02:49 -0400 Subject: [PATCH 06/40] some corrections to example --- packages/supabase/example.md | 38 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/packages/supabase/example.md b/packages/supabase/example.md index a1eeb89fc..306ea0623 100644 --- a/packages/supabase/example.md +++ b/packages/supabase/example.md @@ -4,11 +4,17 @@ Content: * (nt1pgid) discourse-graphs/nodes/Claim * (nt2pgid) discourse-graphs/nodes/Hypothesis -* (et1pgid) discourse-graphs/edges/OpposedBy - * (anyid1) roles - * (et1r1bkid) source - * (et1r2bkid) target - * (anyid2) arity: 2 +* (dgpgid) roam/js/discourse-graph + * (et1bkid) Opposes + * (et1r1bkid) source + * (et1r2bkid) destination + * (anyid1) If + * (et1sr1bkid) Page + * (et1sr2bkid) Block + * (et1sr3bkid) ParentPage + * (et1sr4bkid) PBlock + * (et1sr5bkid) SPage + * (et1sr6bkid) SBlock * (hyp1pgid) [HYP] Some hypothesis * (clm1pgid) [CLM] Some claim * (somepgid) Some page @@ -23,7 +29,7 @@ Documents: +----+-----------------+ | 1 | nt1pgid | | 2 | nt2pgid | -| 3 | et1pgid | +| 3 | dgpgid | | 22 | hyp1pgid | | 23 | clm1pgid | | 4 | somepgid | @@ -36,17 +42,15 @@ Content: +----+-------------+-------------+----------+---------------+----------------------------------------------+ | 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | | 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | -| 7 | et1pgid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | +| 7 | et1bkid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | | 8 | somepgid | 4 | document | | Some page | | 24 | hyp1pgid | 22 | document | 20 | [HYP] Some hypothesis | | 25 | clm1pgid | 23 | document | 19 | [HYP] Some claim | | 9 | hyp1refbkid | 4 | block | | a block referring to [[HYP] Some hypothesis] | | 10 | opp1bkid | 4 | block | 21 | OpposedBy | | 11 | clm1refbkid | 4 | block | | a block referring to [[CLM] Some claim] | -| 12 | anyid1 | 3 | block | | roles | | 13 | et1r1bkid | 3 | block | | source | -| 14 | et1r2bkid | 3 | block | | target | -| 15 | anyid2 | 3 | block | | arity: 2 | +| 14 | et1r2bkid | 3 | block | | destination | +----+-------------+-------------+----------+---------------+----------------------------------------------+ Concept: @@ -57,12 +61,12 @@ Concept: | 16 | true | 0 | | Claim | {} | 17 | true | 0 | | Hypothesis | {} | 18 | true | 2 | | Opposed-by | - { "roles": ["source", "target"], "representation": ["source", "sourceref", "target", "targetref", "predicate"] } -| 19 | false | 0 | | [CLM] Some claim | {} -| 20 | false | 0 | | [HYP] Some hypothesis | {} + { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } +| 19 | false | 0 | 16 | [CLM] Some claim | {} +| 20 | false | 0 | 17 | [HYP] Some hypothesis | {} | 21 | false | 2 | 18 | OpposedBy | - { "concepts": {"source": 19, "target": 20}, "occurences": - [{"sourceref": 11, "targetref": 9, "source": 25, "target": 24, "predicate": 10 }] } + { "concepts": {"source": 19, "destination": 20}, "occurences": + [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } +----+-----------+-------+--------+-----------------------+-----------... Note: Open question whether the occurence structure matters, and whether it should be materialized in another table. @@ -71,7 +75,7 @@ Note: Open question whether the occurence structure matters, and whether it shou ContentLink +--------+--------+ -| source | target | +| source | destination | +--------+--------+ | 9 | 24 | | 11 | 25 | @@ -79,3 +83,5 @@ ContentLink Note: I would probably create a sub-Content for the link text and use this as source. OR use a char_start, char_end. + +Missing: Ontology From d75b1a06b014385ccf815a8593260ddc634c8735 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 9 May 2025 13:59:09 -0400 Subject: [PATCH 07/40] github mardown tables --- packages/supabase/example.md | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/packages/supabase/example.md b/packages/supabase/example.md index 306ea0623..3e7922d03 100644 --- a/packages/supabase/example.md +++ b/packages/supabase/example.md @@ -24,22 +24,19 @@ Content: Documents: -+----+-----------------+ | id | source_local_id | -+----+-----------------+ +|----|-----------------| | 1 | nt1pgid | | 2 | nt2pgid | | 3 | dgpgid | | 22 | hyp1pgid | | 23 | clm1pgid | | 4 | somepgid | -+----+-----------------+ Content: -+----+-------------+-------------+----------+---------------+----------------------------------------------+ | id | source_local_id | page_id | scale | represents_id | text | -+----+-------------+-------------+----------+---------------+----------------------------------------------+ +|----|-------------|-------------|----------|---------------|----------------------------------------------| | 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | | 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | | 7 | et1bkid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | @@ -51,35 +48,27 @@ Content: | 11 | clm1refbkid | 4 | block | | a block referring to [[CLM] Some claim] | | 13 | et1r1bkid | 3 | block | | source | | 14 | et1r2bkid | 3 | block | | destination | -+----+-------------+-------------+----------+---------------+----------------------------------------------+ Concept: -+----+-----------+-------+--------+-----------------------+-----------... | id | is_schema | arity | schema | name | content -+----+-----------+-------+--------+-----------------------+-----------... +|----|-----------|-------|--------|-----------------------|-----------... | 16 | true | 0 | | Claim | {} | 17 | true | 0 | | Hypothesis | {} -| 18 | true | 2 | | Opposed-by | - { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } +| 18 | true | 2 | | Opposed-by | { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } | 19 | false | 0 | 16 | [CLM] Some claim | {} | 20 | false | 0 | 17 | [HYP] Some hypothesis | {} -| 21 | false | 2 | 18 | OpposedBy | - { "concepts": {"source": 19, "destination": 20}, "occurences": - [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } -+----+-----------+-------+--------+-----------------------+-----------... +| 21 | false | 2 | 18 | OpposedBy | { "concepts": {"source": 19, "destination": 20}, "occurences": [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } Note: Open question whether the occurence structure matters, and whether it should be materialized in another table. (I would tend to say yes to both.) ContentLink -+--------+--------+ | source | destination | -+--------+--------+ -| 9 | 24 | -| 11 | 25 | -+--------+--------+ +|--------|-------------| +| 9 | 24 | +| 11 | 25 | Note: I would probably create a sub-Content for the link text and use this as source. OR use a char_start, char_end. From 8014a87dead0b93fe5c033386529a63c19fea6ca Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 9 May 2025 14:00:17 -0400 Subject: [PATCH 08/40] github mardown tables --- packages/supabase/example.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/supabase/example.md b/packages/supabase/example.md index 3e7922d03..f7a4166e4 100644 --- a/packages/supabase/example.md +++ b/packages/supabase/example.md @@ -51,14 +51,14 @@ Content: Concept: -| id | is_schema | arity | schema | name | content -|----|-----------|-------|--------|-----------------------|-----------... -| 16 | true | 0 | | Claim | {} -| 17 | true | 0 | | Hypothesis | {} -| 18 | true | 2 | | Opposed-by | { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } -| 19 | false | 0 | 16 | [CLM] Some claim | {} -| 20 | false | 0 | 17 | [HYP] Some hypothesis | {} -| 21 | false | 2 | 18 | OpposedBy | { "concepts": {"source": 19, "destination": 20}, "occurences": [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } +| id | is_schema | arity | schema | name | content | +|----|-----------|-------|--------|-----------------------|-----------| +| 16 | true | 0 | | Claim | {} | +| 17 | true | 0 | | Hypothesis | {} | +| 18 | true | 2 | | Opposed-by | { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } | +| 19 | false | 0 | 16 | [CLM] Some claim | {} | +| 20 | false | 0 | 17 | [HYP] Some hypothesis | {} | +| 21 | false | 2 | 18 | OpposedBy | { "concepts": {"source": 19, "destination": 20}, "occurences": [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } | Note: Open question whether the occurence structure matters, and whether it should be materialized in another table. (I would tend to say yes to both.) From 247e7dcb9c82ccc3f5b06564856fe06bcbfddafc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 12 May 2025 23:06:28 -0400 Subject: [PATCH 09/40] sync table --- .../migrations/20250512142307_sync_table.sql | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 packages/supabase/migrations/20250512142307_sync_table.sql diff --git a/packages/supabase/migrations/20250512142307_sync_table.sql b/packages/supabase/migrations/20250512142307_sync_table.sql new file mode 100644 index 000000000..51a7b7a14 --- /dev/null +++ b/packages/supabase/migrations/20250512142307_sync_table.sql @@ -0,0 +1,107 @@ +CREATE TYPE task_status AS ENUM ('active', 'timeout', 'complete', 'failed'); + +CREATE TABLE sync_info ( + id SERIAL PRIMARY KEY, + sync_target BIGINT, + sync_function VARCHAR(20), + status task_status DEFAULT 'active', + worker varchar(100) NOT NULL, + failure_count SMALLINT DEFAULT 0, + last_task_start TIMESTAMP WITH TIME ZONE, + last_task_end TIMESTAMP WITH TIME ZONE, + task_times_out_at TIMESTAMP WITH TIME ZONE +); + +CREATE UNIQUE INDEX sync_info_u_idx on sync_info (sync_target, sync_function); + +CREATE OR REPLACE FUNCTION propose_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), timeout INTERVAL, task_interval INTERVAL) + RETURNS INTERVAL AS $$ +DECLARE s_id INTEGER; +DECLARE timeout_as TIMESTAMP WITH TIME ZONE; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_worker VARCHAR; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT worker, status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_worker, t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION end_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), s_status task_status) RETURNS VOID AS $$ +DECLARE t_id INTEGER; +DECLARE t_target varchar; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, "Wrong worker"; + ASSERT s_status >= t_status, "do not go back in status"; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$$ LANGUAGE plpgsql; From d0860e6c54d3209dbbf10a2f58a54b6948a0053b Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 15 May 2025 11:19:32 -0400 Subject: [PATCH 10/40] Supabase with declarative schemas. Migrations for switching representations from Content to Concept --- packages/{supabase => database}/example.md | 0 packages/{supabase => database}/schema.puml | 0 packages/{supabase => database}/schema.svg | 0 packages/{supabase => database}/schema.yaml | 9 +- packages/{ => database}/supabase/config.toml | 35 +++- .../20250504195841_remote_schema.sql | 0 .../20250504202930_content_tables.sql | 0 .../20250506174523_content_idx_id.sql | 0 .../migrations/20250512142307_sync_table.sql | 0 .../20250513173724_content_concept_key.sql | 91 ++++++++++ .../database/supabase/schemas/account.sql | 62 +++++++ packages/database/supabase/schemas/agent.sql | 60 +++++++ packages/database/supabase/schemas/base.sql | 67 +++++++ .../database/supabase/schemas/concept.sql | 86 +++++++++ .../database/supabase/schemas/content.sql | 120 +++++++++++++ .../database/supabase/schemas/contributor.sql | 40 +++++ .../database/supabase/schemas/embedding.sql | 77 +++++++++ .../database/supabase/schemas/extensions.sql | 11 ++ packages/database/supabase/schemas/space.sql | 37 ++++ packages/database/supabase/schemas/sync.sql | 163 ++++++++++++++++++ .../20250504193643_remote_schema.sql | 0 21 files changed, 843 insertions(+), 15 deletions(-) rename packages/{supabase => database}/example.md (100%) rename packages/{supabase => database}/schema.puml (100%) rename packages/{supabase => database}/schema.svg (100%) rename packages/{supabase => database}/schema.yaml (98%) rename packages/{ => database}/supabase/config.toml (92%) rename packages/{ => database}/supabase/migrations/20250504195841_remote_schema.sql (100%) rename packages/{ => database}/supabase/migrations/20250504202930_content_tables.sql (100%) rename packages/{ => database}/supabase/migrations/20250506174523_content_idx_id.sql (100%) rename packages/{ => database}/supabase/migrations/20250512142307_sync_table.sql (100%) create mode 100644 packages/database/supabase/migrations/20250513173724_content_concept_key.sql create mode 100644 packages/database/supabase/schemas/account.sql create mode 100644 packages/database/supabase/schemas/agent.sql create mode 100644 packages/database/supabase/schemas/base.sql create mode 100644 packages/database/supabase/schemas/concept.sql create mode 100644 packages/database/supabase/schemas/content.sql create mode 100644 packages/database/supabase/schemas/contributor.sql create mode 100644 packages/database/supabase/schemas/embedding.sql create mode 100644 packages/database/supabase/schemas/extensions.sql create mode 100644 packages/database/supabase/schemas/space.sql create mode 100644 packages/database/supabase/schemas/sync.sql delete mode 100644 packages/supabase/migrations/20250504193643_remote_schema.sql diff --git a/packages/supabase/example.md b/packages/database/example.md similarity index 100% rename from packages/supabase/example.md rename to packages/database/example.md diff --git a/packages/supabase/schema.puml b/packages/database/schema.puml similarity index 100% rename from packages/supabase/schema.puml rename to packages/database/schema.puml diff --git a/packages/supabase/schema.svg b/packages/database/schema.svg similarity index 100% rename from packages/supabase/schema.svg rename to packages/database/schema.svg diff --git a/packages/supabase/schema.yaml b/packages/database/schema.yaml similarity index 98% rename from packages/supabase/schema.yaml rename to packages/database/schema.yaml index 9047a437b..1efec9b0a 100644 --- a/packages/supabase/schema.yaml +++ b/packages/database/schema.yaml @@ -200,14 +200,10 @@ classes: - space - contributors - last_modified - - last_synced attributes: part_of: description: This content is part of a larger content unit range: Content - represents: - description: This content explicitly represents a concept - range: Concept # ContentDerivation: # description: A derivation relation between content units # attributes: @@ -230,7 +226,6 @@ classes: - space - source_local_id - url - - last_synced - created - metadata - last_modified @@ -276,7 +271,6 @@ classes: - contributors - created - last_modified - - last_synced - space attributes: arity: @@ -294,6 +288,9 @@ classes: range: boolean required: true ifabsent: false + represented_by: + description: This concept is explicitly represented by a given content unit + range: Content # update status # concept has occurences and possibly a representation in a space. # Are concepts space-specific? Tending to yes. So the point of convergence should be distinct. diff --git a/packages/supabase/config.toml b/packages/database/supabase/config.toml similarity index 92% rename from packages/supabase/config.toml rename to packages/database/supabase/config.toml index d52366c38..c443a628a 100644 --- a/packages/supabase/config.toml +++ b/packages/database/supabase/config.toml @@ -2,7 +2,7 @@ # https://supabase.com/docs/guides/local-development/cli/config # A string used to distinguish different Supabase projects on the same host. Defaults to the # working directory name when running `supabase init`. -project_id = "apps" +project_id = "discourse-graphs" [api] enabled = true @@ -48,7 +48,18 @@ max_client_conn = 100 [db.migrations] # Specifies an ordered list of schema files that describe your database. # Supports glob patterns relative to supabase directory: "./schemas/*.sql" -schema_paths = [] +schema_paths = [ + './schemas/base.sql', + './schemas/extensions.sql', + './schemas/agent.sql', + './schemas/space.sql', + './schemas/account.sql', + './schemas/content.sql', + './schemas/embedding.sql', + './schemas/concept.sql', + './schemas/contributor.sql', + './schemas/sync.sql', +] [db.seed] # If enabled, seeds the database after migrations during a db reset. @@ -105,9 +116,15 @@ file_size_limit = "50MiB" enabled = true # The base URL of your website. Used as an allow-list for redirects and for constructing URLs used # in emails. -site_url = "http://127.0.0.1:3000" +# site_url = "http://127.0.0.1:3000" +site_url = "https://discourse-graph-discourse-graphs.vercel.app/" # A list of *exact* URLs that auth providers are permitted to redirect to post authentication. -additional_redirect_urls = ["https://127.0.0.1:3000"] +additional_redirect_urls = [ + "https://discourse-graph-discourse-graphs.vercel.app/", + "https://discourse-graph-discourse-graphs.vercel.app/**", + "https://discourse-*-graph-discourse-graphs.vercel.app", + "https://discourse-*-graph-discourse-graphs.vercel.app/**", +] # How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week). jwt_expiry = 3600 # If disabled, the refresh token will never expire. @@ -154,15 +171,15 @@ enable_signup = true # addresses. If disabled, only the new email is required to confirm. double_confirm_changes = true # If enabled, users need to confirm their email address before signing in. -enable_confirmations = false +enable_confirmations = true # If enabled, users will need to reauthenticate or have logged in recently to change their password. secure_password_change = false # Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email. -max_frequency = "1s" +max_frequency = "1m0s" # Number of characters used in the email OTP. otp_length = 6 # Number of seconds before the email OTP expires (defaults to 1 hour). -otp_expiry = 3600 +otp_expiry = 86400 # Use a production-ready SMTP server # [auth.email.smtp] @@ -220,8 +237,8 @@ max_enrolled_factors = 10 # Control MFA via App Authenticator (TOTP) [auth.mfa.totp] -enroll_enabled = false -verify_enabled = false +enroll_enabled = true +verify_enabled = true # Configure MFA via Phone Messaging [auth.mfa.phone] diff --git a/packages/supabase/migrations/20250504195841_remote_schema.sql b/packages/database/supabase/migrations/20250504195841_remote_schema.sql similarity index 100% rename from packages/supabase/migrations/20250504195841_remote_schema.sql rename to packages/database/supabase/migrations/20250504195841_remote_schema.sql diff --git a/packages/supabase/migrations/20250504202930_content_tables.sql b/packages/database/supabase/migrations/20250504202930_content_tables.sql similarity index 100% rename from packages/supabase/migrations/20250504202930_content_tables.sql rename to packages/database/supabase/migrations/20250504202930_content_tables.sql diff --git a/packages/supabase/migrations/20250506174523_content_idx_id.sql b/packages/database/supabase/migrations/20250506174523_content_idx_id.sql similarity index 100% rename from packages/supabase/migrations/20250506174523_content_idx_id.sql rename to packages/database/supabase/migrations/20250506174523_content_idx_id.sql diff --git a/packages/supabase/migrations/20250512142307_sync_table.sql b/packages/database/supabase/migrations/20250512142307_sync_table.sql similarity index 100% rename from packages/supabase/migrations/20250512142307_sync_table.sql rename to packages/database/supabase/migrations/20250512142307_sync_table.sql diff --git a/packages/database/supabase/migrations/20250513173724_content_concept_key.sql b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql new file mode 100644 index 000000000..2891c3ac2 --- /dev/null +++ b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql @@ -0,0 +1,91 @@ +-- rename constraint + +alter table "public"."AutomatedAgent" drop constraint "person_id_fkey"; + +alter table "public"."AutomatedAgent" add constraint "automated_agent_id_fkey" FOREIGN KEY (id) REFERENCES "Agent"(id) ON UPDATE CASCADE ON DELETE CASCADE; + +-- now handled by sync_table + +alter table "public"."Concept" drop column "last_synced"; + +alter table "public"."Content" drop column "last_synced"; + +alter table "public"."Document" drop column "last_synced"; + +-- transfer of column + +alter table "public"."Concept" add column "represented_by_id" bigint; + +alter table "public"."Concept" add constraint "Concept_represented_by_id_fkey" FOREIGN KEY (represented_by_id) REFERENCES "Content"(id) ON UPDATE CASCADE ON DELETE SET NULL; + +CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" (represented_by_id); + +-- transfer data + +UPDATE public."Concept" SET represented_by_id = public."Content".id + FROM public."Content" + WHERE public."Concept".id=represents_id; + +-- drop the Content column + +alter table "public"."Content" drop constraint "Content_represents_id_fkey"; + +drop index if exists "public"."Content_represents"; + +alter table "public"."Content" drop column "represents_id"; + +-- Content embedding functions + +set check_function_bodies = off; + +-- strangely the check fails to interpret <=>, despite the vector extension being installed. + +CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN "public"."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$function$ +; + +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[]) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM "public"."Content" AS c + JOIN "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$function$ +; + +set check_function_bodies = on; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql new file mode 100644 index 000000000..2c911c39b --- /dev/null +++ b/packages/database/supabase/schemas/account.sql @@ -0,0 +1,62 @@ + + + +CREATE TABLE IF NOT EXISTS "public"."Account" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "platform_id" bigint NOT NULL, + "person_id" bigint NOT NULL, + "write_permission" boolean NOT NULL, + "active" boolean DEFAULT true NOT NULL +); + +ALTER TABLE "public"."Account" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."Account" IS 'A user account on a discourse platform'; + + +ALTER TABLE ONLY "public"."Account" + ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY ("person_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."Account" + ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY ("platform_id") REFERENCES "public"."DiscoursePlatform"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."Account" + ADD CONSTRAINT "Account_pkey" PRIMARY KEY ("id"); + + +CREATE TABLE IF NOT EXISTS "public"."SpaceAccess" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "space_id" bigint, + "account_id" bigint NOT NULL, + "editor" boolean NOT NULL +); + +ALTER TABLE ONLY "public"."SpaceAccess" + ADD CONSTRAINT "SpaceAccess_account_id_space_id_key" UNIQUE ("account_id", "space_id"); + +ALTER TABLE ONLY "public"."SpaceAccess" + ADD CONSTRAINT "SpaceAccess_pkey" PRIMARY KEY ("id"); + + +ALTER TABLE "public"."SpaceAccess" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."SpaceAccess" IS 'An access control entry for a space'; + +COMMENT ON COLUMN "public"."SpaceAccess"."space_id" IS 'The space in which the content is located'; + + + +ALTER TABLE ONLY "public"."SpaceAccess" + ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY ("account_id") REFERENCES "public"."Account"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."SpaceAccess" + ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +GRANT ALL ON TABLE "public"."SpaceAccess" TO "anon"; +GRANT ALL ON TABLE "public"."SpaceAccess" TO "authenticated"; +GRANT ALL ON TABLE "public"."SpaceAccess" TO "service_role"; + + +GRANT ALL ON TABLE "public"."Account" TO "anon"; +GRANT ALL ON TABLE "public"."Account" TO "authenticated"; +GRANT ALL ON TABLE "public"."Account" TO "service_role"; diff --git a/packages/database/supabase/schemas/agent.sql b/packages/database/supabase/schemas/agent.sql new file mode 100644 index 000000000..f4f9154c7 --- /dev/null +++ b/packages/database/supabase/schemas/agent.sql @@ -0,0 +1,60 @@ + +CREATE TABLE IF NOT EXISTS "public"."Agent" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "type" "public"."EntityType" NOT NULL +); + + +ALTER TABLE ONLY "public"."Agent" + ADD CONSTRAINT "Agent_pkey" PRIMARY KEY ("id"); + +ALTER TABLE "public"."Agent" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."Agent" IS 'An agent that acts in the system'; + +CREATE TABLE IF NOT EXISTS "public"."AutomatedAgent" ( + "id" bigint NOT NULL, + "name" character varying NOT NULL, + "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, + "deterministic" boolean DEFAULT false, + "version" character varying +); + +ALTER TABLE ONLY "public"."AutomatedAgent" + ADD CONSTRAINT "AutomatedAgent_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."AutomatedAgent" + ADD CONSTRAINT "automated_agent_id_fkey" FOREIGN KEY ("id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + + +ALTER TABLE "public"."AutomatedAgent" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."AutomatedAgent" IS 'An automated agent'; + +CREATE TABLE IF NOT EXISTS "public"."Person" ( + "id" bigint NOT NULL, + "name" character varying NOT NULL, + "orcid" character varying(20), + "email" character varying NOT NULL +); + +ALTER TABLE ONLY "public"."Person" + ADD CONSTRAINT "person_id_fkey" FOREIGN KEY ("id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + + +ALTER TABLE "public"."Person" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."Person" IS 'A person using the system'; + + +GRANT ALL ON TABLE "public"."Agent" TO "anon"; +GRANT ALL ON TABLE "public"."Agent" TO "authenticated"; +GRANT ALL ON TABLE "public"."Agent" TO "service_role"; + +GRANT ALL ON TABLE "public"."AutomatedAgent" TO "anon"; +GRANT ALL ON TABLE "public"."AutomatedAgent" TO "authenticated"; +GRANT ALL ON TABLE "public"."AutomatedAgent" TO "service_role"; + +GRANT ALL ON TABLE "public"."Person" TO "anon"; +GRANT ALL ON TABLE "public"."Person" TO "authenticated"; +GRANT ALL ON TABLE "public"."Person" TO "service_role"; diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql new file mode 100644 index 000000000..b363f380a --- /dev/null +++ b/packages/database/supabase/schemas/base.sql @@ -0,0 +1,67 @@ + + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = on; +SET default_tablespace = ''; +SET default_table_access_method = "heap"; + +COMMENT ON SCHEMA "public" IS 'standard public schema'; + + +ALTER PUBLICATION "supabase_realtime" OWNER TO "postgres"; + +GRANT USAGE ON SCHEMA "public" TO "postgres"; +GRANT USAGE ON SCHEMA "public" TO "anon"; +GRANT USAGE ON SCHEMA "public" TO "authenticated"; +GRANT USAGE ON SCHEMA "public" TO "service_role"; + +CREATE TYPE "public"."EntityType" AS ENUM ( + 'Platform', + 'Space', + 'Account', + 'Person', + 'AutomatedAgent', + 'Document', + 'Content', + 'Concept', + 'ConceptSchema', + 'ContentLink', + 'Occurrence' +); + +ALTER TYPE "public"."EntityType" OWNER TO "postgres"; + +CREATE SEQUENCE IF NOT EXISTS "public"."entity_id_seq" + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER TABLE "public"."entity_id_seq" OWNER TO "postgres"; + + +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "postgres"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "anon"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "authenticated"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "service_role"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "postgres"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "anon"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "authenticated"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "service_role"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "postgres"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "anon"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "authenticated"; +ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "service_role"; + +GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "anon"; +GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "authenticated"; +GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "service_role"; diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql new file mode 100644 index 000000000..99b87b87f --- /dev/null +++ b/packages/database/supabase/schemas/concept.sql @@ -0,0 +1,86 @@ + + + +CREATE TYPE "public"."EpistemicStatus" AS ENUM ( + 'certainly_not', + 'strong_evidence_against', + 'could_be_false', + 'unknown', + 'uncertain', + 'contentious', + 'could_be_true', + 'strong_evidence_for', + 'certain' +); + +ALTER TYPE "public"."EpistemicStatus" OWNER TO "postgres"; + + + +CREATE TABLE IF NOT EXISTS "public"."Concept" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "epistemic_status" "public"."EpistemicStatus" DEFAULT 'unknown'::"public"."EpistemicStatus" NOT NULL, + "name" character varying NOT NULL, + "description" "text", + "author_id" bigint, + "created" timestamp without time zone NOT NULL, + "last_modified" timestamp without time zone NOT NULL, + "space_id" bigint, + "arity" smallint DEFAULT 0 NOT NULL, + "schema_id" bigint, + "content" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, + "is_schema" boolean DEFAULT false NOT NULL, + "represented_by_id" BIGINT +); + +ALTER TABLE "public"."Concept" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."Concept" IS 'An abstract concept, claim or relation'; + +COMMENT ON COLUMN "public"."Concept"."author_id" IS 'The author of content'; + +COMMENT ON COLUMN "public"."Concept"."created" IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN "public"."Concept"."last_modified" IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN "public"."Concept"."space_id" IS 'The space in which the content is located'; + + + + +ALTER TABLE ONLY "public"."Concept" + ADD CONSTRAINT "Concept_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."Concept" + ADD FOREIGN KEY (represented_by_id) REFERENCES "public"."Content" (id) ON DELETE SET NULL ON UPDATE CASCADE; + +ALTER TABLE ONLY "public"."Person" + ADD CONSTRAINT "Person_pkey" PRIMARY KEY ("id"); + +CREATE INDEX "Concept_content" ON "public"."Concept" USING "gin" ("content" "jsonb_path_ops"); + +CREATE INDEX "Concept_schema" ON "public"."Concept" USING "btree" ("schema_id"); + +CREATE INDEX "Concept_space" ON "public"."Concept" USING "btree" ("space_id"); + +CREATE UNIQUE INDEX "Concept_represented_by" ON "public"."Concept" ("represented_by_id"); + + +ALTER TABLE ONLY "public"."Concept" + ADD CONSTRAINT "Concept_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY "public"."Concept" + ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY ("schema_id") REFERENCES "public"."Concept"("id") ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY "public"."Concept" + ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; + + + +GRANT ALL ON TABLE "public"."Concept" TO "anon"; +GRANT ALL ON TABLE "public"."Concept" TO "authenticated"; +GRANT ALL ON TABLE "public"."Concept" TO "service_role"; + + + +RESET ALL; diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql new file mode 100644 index 000000000..f433f574c --- /dev/null +++ b/packages/database/supabase/schemas/content.sql @@ -0,0 +1,120 @@ +CREATE TYPE "public"."Scale" AS ENUM ( + 'document', + 'post', + 'chunk_unit', + 'section', + 'block', + 'field', + 'paragraph', + 'quote', + 'sentence', + 'phrase' +); + +ALTER TYPE "public"."Scale" OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS "public"."Document" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "space_id" bigint, + "source_local_id" character varying, + "url" character varying, + "created" timestamp without time zone NOT NULL, + "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, + "last_modified" timestamp without time zone NOT NULL, + "author_id" bigint NOT NULL, + "contents" "oid" +); + +ALTER TABLE ONLY "public"."Document" + ADD CONSTRAINT "Document_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."Document" + ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."Document" + ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE "public"."Document" OWNER TO "postgres"; + +COMMENT ON COLUMN "public"."Document"."space_id" IS 'The space in which the content is located'; + +COMMENT ON COLUMN "public"."Document"."source_local_id" IS 'The unique identifier of the content in the remote source'; + +COMMENT ON COLUMN "public"."Document"."created" IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN "public"."Document"."last_modified" IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN "public"."Document"."author_id" IS 'The author of content'; + +COMMENT ON COLUMN "public"."Document"."contents" IS 'A large object OID for the downloaded raw content'; + + +CREATE TABLE IF NOT EXISTS "public"."Content" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "document_id" bigint NOT NULL, + "source_local_id" character varying, + "author_id" bigint, + "creator_id" bigint, + "created" timestamp without time zone NOT NULL, + "text" "text" NOT NULL, + "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, + "scale" "public"."Scale" NOT NULL, + "space_id" bigint, + "last_modified" timestamp without time zone NOT NULL, + "part_of_id" bigint +); + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_creator_id_fkey" FOREIGN KEY ("creator_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_document_id_fkey" FOREIGN KEY ("document_id") REFERENCES "public"."Document"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY ("part_of_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY "public"."Content" + ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +CREATE INDEX "Content_document" ON "public"."Content" USING "btree" ("document_id"); + +CREATE INDEX "Content_part_of" ON "public"."Content" USING "btree" ("part_of_id"); + +CREATE INDEX "Content_space" ON "public"."Content" USING "btree" ("space_id"); + +CREATE UNIQUE INDEX "Content_space_and_id" ON "public"."Content" USING "btree" ("space_id", "source_local_id") WHERE ("source_local_id" IS NOT NULL); + +CREATE INDEX "Content_text" ON "public"."Content" USING "pgroonga" ("text"); + +ALTER TABLE "public"."Content" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."Content" IS 'A unit of content'; + +COMMENT ON COLUMN "public"."Content"."source_local_id" IS 'The unique identifier of the content in the remote source'; + +COMMENT ON COLUMN "public"."Content"."author_id" IS 'The author of content'; + +COMMENT ON COLUMN "public"."Content"."creator_id" IS 'The creator of a logical structure, such as a content subdivision'; + +COMMENT ON COLUMN "public"."Content"."created" IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN "public"."Content"."space_id" IS 'The space in which the content is located'; + +COMMENT ON COLUMN "public"."Content"."last_modified" IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN "public"."Content"."part_of_id" IS 'This content is part of a larger content unit'; + + +GRANT ALL ON TABLE "public"."Document" TO "anon"; +GRANT ALL ON TABLE "public"."Document" TO "authenticated"; +GRANT ALL ON TABLE "public"."Document" TO "service_role"; + +GRANT ALL ON TABLE "public"."Content" TO "anon"; +GRANT ALL ON TABLE "public"."Content" TO "authenticated"; +GRANT ALL ON TABLE "public"."Content" TO "service_role"; diff --git a/packages/database/supabase/schemas/contributor.sql b/packages/database/supabase/schemas/contributor.sql new file mode 100644 index 000000000..8105f65ab --- /dev/null +++ b/packages/database/supabase/schemas/contributor.sql @@ -0,0 +1,40 @@ +CREATE TABLE IF NOT EXISTS "public"."content_contributors" ( + "content_id" bigint NOT NULL, + "contributor_id" bigint NOT NULL +); + +ALTER TABLE ONLY "public"."content_contributors" + ADD CONSTRAINT "content_contributors_pkey" PRIMARY KEY ("content_id", "contributor_id"); + +ALTER TABLE ONLY "public"."content_contributors" + ADD CONSTRAINT "content_contributors_content_id_fkey" FOREIGN KEY ("content_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."content_contributors" + ADD CONSTRAINT "content_contributors_contributor_id_fkey" FOREIGN KEY ("contributor_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE "public"."content_contributors" OWNER TO "postgres"; + + +CREATE TABLE IF NOT EXISTS "public"."concept_contributors" ( + "concept_id" bigint NOT NULL, + "contributor_id" bigint NOT NULL +); + +ALTER TABLE "public"."concept_contributors" OWNER TO "postgres"; + +ALTER TABLE ONLY "public"."concept_contributors" + ADD CONSTRAINT "concept_contributors_concept_id_fkey" FOREIGN KEY ("concept_id") REFERENCES "public"."Concept"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."concept_contributors" + ADD CONSTRAINT "concept_contributors_contributor_id_fkey" FOREIGN KEY ("contributor_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY "public"."concept_contributors" + ADD CONSTRAINT "concept_contributors_pkey" PRIMARY KEY ("concept_id", "contributor_id"); + +GRANT ALL ON TABLE "public"."concept_contributors" TO "anon"; +GRANT ALL ON TABLE "public"."concept_contributors" TO "authenticated"; +GRANT ALL ON TABLE "public"."concept_contributors" TO "service_role"; + +GRANT ALL ON TABLE "public"."content_contributors" TO "anon"; +GRANT ALL ON TABLE "public"."content_contributors" TO "authenticated"; +GRANT ALL ON TABLE "public"."content_contributors" TO "service_role"; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql new file mode 100644 index 000000000..8b3051416 --- /dev/null +++ b/packages/database/supabase/schemas/embedding.sql @@ -0,0 +1,77 @@ +CREATE TYPE "public"."EmbeddingName" AS ENUM ( + 'openai_text_embedding_ada2_1536', + 'openai_text_embedding_3_small_512', + 'openai_text_embedding_3_small_1536', + 'openai_text_embedding_3_large_256', + 'openai_text_embedding_3_large_1024', + 'openai_text_embedding_3_large_3072' +); + +ALTER TYPE "public"."EmbeddingName" OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS "public"."ContentEmbedding_openai_text_embedding_3_small_1536" ( + "target_id" bigint NOT NULL, + "model" "public"."EmbeddingName" DEFAULT 'openai_text_embedding_3_small_1536'::"public"."EmbeddingName" NOT NULL, + "vector" "extensions"."vector"(1536) NOT NULL, + "obsolete" boolean DEFAULT false +); + +ALTER TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" OWNER TO "postgres"; + +ALTER TABLE ONLY "public"."ContentEmbedding_openai_text_embedding_3_small_1536" + ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1536_pkey" PRIMARY KEY ("target_id"); + +ALTER TABLE ONLY "public"."ContentEmbedding_openai_text_embedding_3_small_1536" + ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" FOREIGN KEY ("target_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "anon"; +GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated"; +GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role"; + + + +CREATE OR REPLACE FUNCTION "public"."match_content_embeddings"("query_embedding" "extensions"."vector", "match_threshold" double precision, "match_count" integer, "current_document_id" integer DEFAULT NULL::integer) RETURNS TABLE("content_id" bigint, "roam_uid" "text", "text_content" "text", "similarity" double precision) + LANGUAGE "sql" STABLE + AS $$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN "public"."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$$; + +ALTER FUNCTION "public"."match_content_embeddings"("query_embedding" "extensions"."vector", "match_threshold" double precision, "match_count" integer, "current_document_id" integer) OWNER TO "postgres"; + +CREATE OR REPLACE FUNCTION "public"."match_embeddings_for_subset_nodes"("p_query_embedding" "extensions"."vector", "p_subset_roam_uids" "text"[]) RETURNS TABLE("content_id" bigint, "roam_uid" "text", "text_content" "text", "similarity" double precision) + LANGUAGE "sql" STABLE + AS $$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM "public"."Content" AS c + JOIN "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$$; + +ALTER FUNCTION "public"."match_embeddings_for_subset_nodes"("p_query_embedding" "extensions"."vector", "p_subset_roam_uids" "text"[]) OWNER TO "postgres"; diff --git a/packages/database/supabase/schemas/extensions.sql b/packages/database/supabase/schemas/extensions.sql new file mode 100644 index 000000000..7d7eb678f --- /dev/null +++ b/packages/database/supabase/schemas/extensions.sql @@ -0,0 +1,11 @@ +CREATE EXTENSION IF NOT EXISTS "pg_cron" WITH SCHEMA "pg_catalog"; +CREATE EXTENSION IF NOT EXISTS "pgroonga" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "pg_graphql" WITH SCHEMA "graphql"; +CREATE EXTENSION IF NOT EXISTS "pg_jsonschema" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "pg_stat_monitor" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "pg_stat_statements" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "pgcrypto" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "pgjwt" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "supabase_vault" WITH SCHEMA "vault"; +CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "extensions"; diff --git a/packages/database/supabase/schemas/space.sql b/packages/database/supabase/schemas/space.sql new file mode 100644 index 000000000..9d6003bb3 --- /dev/null +++ b/packages/database/supabase/schemas/space.sql @@ -0,0 +1,37 @@ +CREATE TABLE IF NOT EXISTS "public"."DiscoursePlatform" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "name" character varying NOT NULL, + "url" character varying NOT NULL +); + +ALTER TABLE "public"."DiscoursePlatform" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."DiscoursePlatform" IS 'A data platform where discourse happens'; + +CREATE TABLE IF NOT EXISTS "public"."DiscourseSpace" ( + "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, + "url" character varying, + "name" character varying NOT NULL, + "discourse_platform_id" bigint NOT NULL +); + +ALTER TABLE "public"."DiscourseSpace" OWNER TO "postgres"; + +COMMENT ON TABLE "public"."DiscourseSpace" IS 'A space on a discourse platform representing a community engaged in a conversation'; + +ALTER TABLE ONLY "public"."DiscoursePlatform" + ADD CONSTRAINT "DiscoursePlatform_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."DiscourseSpace" + ADD CONSTRAINT "DiscourseSpace_pkey" PRIMARY KEY ("id"); + +ALTER TABLE ONLY "public"."DiscourseSpace" + ADD CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" FOREIGN KEY ("discourse_platform_id") REFERENCES "public"."DiscoursePlatform"("id") ON UPDATE CASCADE ON DELETE CASCADE; + +GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "anon"; +GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "authenticated"; +GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "service_role"; + +GRANT ALL ON TABLE "public"."DiscourseSpace" TO "anon"; +GRANT ALL ON TABLE "public"."DiscourseSpace" TO "authenticated"; +GRANT ALL ON TABLE "public"."DiscourseSpace" TO "service_role"; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql new file mode 100644 index 000000000..4fcd5cc35 --- /dev/null +++ b/packages/database/supabase/schemas/sync.sql @@ -0,0 +1,163 @@ + +CREATE TYPE "public"."task_status" AS ENUM ( + 'active', + 'timeout', + 'complete', + 'failed' +); + +ALTER TYPE "public"."task_status" OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS "public"."sync_info" ( + "id" integer NOT NULL, + "sync_target" bigint, + "sync_function" character varying(20), + "status" "public"."task_status" DEFAULT 'active'::"public"."task_status", + "worker" character varying(100) NOT NULL, + "failure_count" smallint DEFAULT 0, + "last_task_start" timestamp with time zone, + "last_task_end" timestamp with time zone, + "task_times_out_at" timestamp with time zone +); + +ALTER TABLE "public"."sync_info" OWNER TO "postgres"; + +CREATE SEQUENCE IF NOT EXISTS "public"."sync_info_id_seq" + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER TABLE "public"."sync_info_id_seq" OWNER TO "postgres"; + +ALTER SEQUENCE "public"."sync_info_id_seq" OWNED BY "public"."sync_info"."id"; + +ALTER TABLE ONLY "public"."sync_info" ALTER COLUMN "id" SET DEFAULT "nextval"('"public"."sync_info_id_seq"'::"regclass"); + +ALTER TABLE ONLY "public"."sync_info" + ADD CONSTRAINT "sync_info_pkey" PRIMARY KEY ("id"); + +CREATE UNIQUE INDEX "sync_info_u_idx" ON "public"."sync_info" USING "btree" ("sync_target", "sync_function"); + + + +CREATE OR REPLACE FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") RETURNS "void" + LANGUAGE "plpgsql" + AS $$ +DECLARE t_id INTEGER; +DECLARE t_target varchar; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, "Wrong worker"; + ASSERT s_status >= t_status, "do not go back in status"; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$$; + +ALTER FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") OWNER TO "postgres"; + + +CREATE OR REPLACE FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) RETURNS interval + LANGUAGE "plpgsql" + AS $$ +DECLARE s_id INTEGER; +DECLARE timeout_as TIMESTAMP WITH TIME ZONE; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_worker VARCHAR; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT worker, status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_worker, t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$$; + +ALTER FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) OWNER TO "postgres"; + +GRANT ALL ON TABLE "public"."sync_info" TO "anon"; +GRANT ALL ON TABLE "public"."sync_info" TO "authenticated"; +GRANT ALL ON TABLE "public"."sync_info" TO "service_role"; + +GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "anon"; +GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "authenticated"; +GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "service_role"; + +GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "anon"; +GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "authenticated"; +GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "service_role"; + +GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "anon"; +GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "authenticated"; +GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "service_role"; + + +RESET ALL; diff --git a/packages/supabase/migrations/20250504193643_remote_schema.sql b/packages/supabase/migrations/20250504193643_remote_schema.sql deleted file mode 100644 index e69de29bb..000000000 From 308bcce3adbf065f72785ed927f16bd7ffe59f8a Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 15 May 2025 12:46:02 -0400 Subject: [PATCH 11/40] provisional README --- packages/database/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 packages/database/README.md diff --git a/packages/database/README.md b/packages/database/README.md new file mode 100644 index 000000000..9c492d867 --- /dev/null +++ b/packages/database/README.md @@ -0,0 +1,4 @@ +This contains the database schema for vector embeddings and concepts. +We should follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. +Run supabase at this level. + From d87b5ef9a2519eb6c0b4b02f63ed1657064a3a3d Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 09:27:11 -0400 Subject: [PATCH 12/40] add supabase generated types --- packages/database/types.gen.ts | 811 +++++++++++++++++++++++++++++++++ 1 file changed, 811 insertions(+) create mode 100644 packages/database/types.gen.ts diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts new file mode 100644 index 000000000..5af17828f --- /dev/null +++ b/packages/database/types.gen.ts @@ -0,0 +1,811 @@ +export type Json = + | string + | number + | boolean + | null + | { [key: string]: Json | undefined } + | Json[] + +export type Database = { + graphql_public: { + Tables: { + [_ in never]: never + } + Views: { + [_ in never]: never + } + Functions: { + graphql: { + Args: { + operationName?: string + query?: string + variables?: Json + extensions?: Json + } + Returns: Json + } + } + Enums: { + [_ in never]: never + } + CompositeTypes: { + [_ in never]: never + } + } + public: { + Tables: { + Account: { + Row: { + active: boolean + id: number + person_id: number + platform_id: number + write_permission: boolean + } + Insert: { + active?: boolean + id?: number + person_id: number + platform_id: number + write_permission: boolean + } + Update: { + active?: boolean + id?: number + person_id?: number + platform_id?: number + write_permission?: boolean + } + Relationships: [ + { + foreignKeyName: "Account_person_id_fkey" + columns: ["person_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Account_platform_id_fkey" + columns: ["platform_id"] + isOneToOne: false + referencedRelation: "DiscoursePlatform" + referencedColumns: ["id"] + }, + ] + } + Agent: { + Row: { + id: number + type: Database["public"]["Enums"]["EntityType"] + } + Insert: { + id?: number + type: Database["public"]["Enums"]["EntityType"] + } + Update: { + id?: number + type?: Database["public"]["Enums"]["EntityType"] + } + Relationships: [] + } + AutomatedAgent: { + Row: { + deterministic: boolean | null + id: number + metadata: Json + name: string + version: string | null + } + Insert: { + deterministic?: boolean | null + id: number + metadata?: Json + name: string + version?: string | null + } + Update: { + deterministic?: boolean | null + id?: number + metadata?: Json + name?: string + version?: string | null + } + Relationships: [ + { + foreignKeyName: "person_id_fkey" + columns: ["id"] + isOneToOne: true + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + Concept: { + Row: { + arity: number + author_id: number | null + content: Json + created: string + description: string | null + epistemic_status: Database["public"]["Enums"]["EpistemicStatus"] + id: number + is_schema: boolean + last_modified: string + last_synced: string + name: string + schema_id: number | null + space_id: number | null + } + Insert: { + arity?: number + author_id?: number | null + content?: Json + created: string + description?: string | null + epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"] + id?: number + is_schema?: boolean + last_modified: string + last_synced: string + name: string + schema_id?: number | null + space_id?: number | null + } + Update: { + arity?: number + author_id?: number | null + content?: Json + created?: string + description?: string | null + epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"] + id?: number + is_schema?: boolean + last_modified?: string + last_synced?: string + name?: string + schema_id?: number | null + space_id?: number | null + } + Relationships: [ + { + foreignKeyName: "Concept_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Concept_schema_id_fkey" + columns: ["schema_id"] + isOneToOne: false + referencedRelation: "Concept" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Concept_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + concept_contributors: { + Row: { + concept_id: number + contributor_id: number + } + Insert: { + concept_id: number + contributor_id: number + } + Update: { + concept_id?: number + contributor_id?: number + } + Relationships: [ + { + foreignKeyName: "concept_contributors_concept_id_fkey" + columns: ["concept_id"] + isOneToOne: false + referencedRelation: "Concept" + referencedColumns: ["id"] + }, + { + foreignKeyName: "concept_contributors_contributor_id_fkey" + columns: ["contributor_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + Content: { + Row: { + author_id: number | null + created: string + creator_id: number | null + document_id: number + id: number + last_modified: string + last_synced: string + metadata: Json + part_of_id: number | null + represents_id: number | null + scale: Database["public"]["Enums"]["Scale"] + source_local_id: string | null + space_id: number | null + text: string + } + Insert: { + author_id?: number | null + created: string + creator_id?: number | null + document_id: number + id?: number + last_modified: string + last_synced: string + metadata?: Json + part_of_id?: number | null + represents_id?: number | null + scale: Database["public"]["Enums"]["Scale"] + source_local_id?: string | null + space_id?: number | null + text: string + } + Update: { + author_id?: number | null + created?: string + creator_id?: number | null + document_id?: number + id?: number + last_modified?: string + last_synced?: string + metadata?: Json + part_of_id?: number | null + represents_id?: number | null + scale?: Database["public"]["Enums"]["Scale"] + source_local_id?: string | null + space_id?: number | null + text?: string + } + Relationships: [ + { + foreignKeyName: "Content_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_creator_id_fkey" + columns: ["creator_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_document_id_fkey" + columns: ["document_id"] + isOneToOne: false + referencedRelation: "Document" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_part_of_id_fkey" + columns: ["part_of_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_represents_id_fkey" + columns: ["represents_id"] + isOneToOne: false + referencedRelation: "Concept" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + content_contributors: { + Row: { + content_id: number + contributor_id: number + } + Insert: { + content_id: number + contributor_id: number + } + Update: { + content_id?: number + contributor_id?: number + } + Relationships: [ + { + foreignKeyName: "content_contributors_content_id_fkey" + columns: ["content_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, + { + foreignKeyName: "content_contributors_contributor_id_fkey" + columns: ["contributor_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + ContentEmbedding_openai_text_embedding_3_small_1536: { + Row: { + model: Database["public"]["Enums"]["EmbeddingName"] + obsolete: boolean | null + target_id: number + vector: string + } + Insert: { + model?: Database["public"]["Enums"]["EmbeddingName"] + obsolete?: boolean | null + target_id: number + vector: string + } + Update: { + model?: Database["public"]["Enums"]["EmbeddingName"] + obsolete?: boolean | null + target_id?: number + vector?: string + } + Relationships: [ + { + foreignKeyName: "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" + columns: ["target_id"] + isOneToOne: true + referencedRelation: "Content" + referencedColumns: ["id"] + }, + ] + } + DiscoursePlatform: { + Row: { + id: number + name: string + url: string + } + Insert: { + id?: number + name: string + url: string + } + Update: { + id?: number + name?: string + url?: string + } + Relationships: [] + } + DiscourseSpace: { + Row: { + discourse_platform_id: number + id: number + name: string + url: string | null + } + Insert: { + discourse_platform_id: number + id?: number + name: string + url?: string | null + } + Update: { + discourse_platform_id?: number + id?: number + name?: string + url?: string | null + } + Relationships: [ + { + foreignKeyName: "DiscourseSpace_discourse_platform_id_fkey" + columns: ["discourse_platform_id"] + isOneToOne: false + referencedRelation: "DiscoursePlatform" + referencedColumns: ["id"] + }, + ] + } + Document: { + Row: { + author_id: number + contents: unknown | null + created: string + id: number + last_modified: string + last_synced: string + metadata: Json + source_local_id: string | null + space_id: number | null + url: string | null + } + Insert: { + author_id: number + contents?: unknown | null + created: string + id?: number + last_modified: string + last_synced: string + metadata?: Json + source_local_id?: string | null + space_id?: number | null + url?: string | null + } + Update: { + author_id?: number + contents?: unknown | null + created?: string + id?: number + last_modified?: string + last_synced?: string + metadata?: Json + source_local_id?: string | null + space_id?: number | null + url?: string | null + } + Relationships: [ + { + foreignKeyName: "Document_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Document_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + Person: { + Row: { + email: string + id: number + name: string + orcid: string | null + } + Insert: { + email: string + id: number + name: string + orcid?: string | null + } + Update: { + email?: string + id?: number + name?: string + orcid?: string | null + } + Relationships: [ + { + foreignKeyName: "person_id_fkey" + columns: ["id"] + isOneToOne: true + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + SpaceAccess: { + Row: { + account_id: number + editor: boolean + id: number + space_id: number | null + } + Insert: { + account_id: number + editor: boolean + id?: number + space_id?: number | null + } + Update: { + account_id?: number + editor?: boolean + id?: number + space_id?: number | null + } + Relationships: [ + { + foreignKeyName: "SpaceAccess_account_id_fkey" + columns: ["account_id"] + isOneToOne: false + referencedRelation: "Account" + referencedColumns: ["id"] + }, + { + foreignKeyName: "SpaceAccess_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + sync_info: { + Row: { + failure_count: number | null + id: number + last_task_end: string | null + last_task_start: string | null + status: Database["public"]["Enums"]["task_status"] | null + sync_function: string | null + sync_target: number | null + task_times_out_at: string | null + worker: string + } + Insert: { + failure_count?: number | null + id?: number + last_task_end?: string | null + last_task_start?: string | null + status?: Database["public"]["Enums"]["task_status"] | null + sync_function?: string | null + sync_target?: number | null + task_times_out_at?: string | null + worker: string + } + Update: { + failure_count?: number | null + id?: number + last_task_end?: string | null + last_task_start?: string | null + status?: Database["public"]["Enums"]["task_status"] | null + sync_function?: string | null + sync_target?: number | null + task_times_out_at?: string | null + worker?: string + } + Relationships: [] + } + } + Views: { + [_ in never]: never + } + Functions: { + end_sync_task: { + Args: { + s_target: number + s_function: string + s_worker: string + s_status: Database["public"]["Enums"]["task_status"] + } + Returns: undefined + } + propose_sync_task: { + Args: { + s_target: number + s_function: string + s_worker: string + timeout: unknown + task_interval: unknown + } + Returns: unknown + } + } + Enums: { + EmbeddingName: + | "openai_text_embedding_ada2_1536" + | "openai_text_embedding_3_small_512" + | "openai_text_embedding_3_small_1536" + | "openai_text_embedding_3_large_256" + | "openai_text_embedding_3_large_1024" + | "openai_text_embedding_3_large_3072" + EntityType: + | "Platform" + | "Space" + | "Account" + | "Person" + | "AutomatedAgent" + | "Document" + | "Content" + | "Concept" + | "ConceptSchema" + | "ContentLink" + | "Occurrence" + EpistemicStatus: + | "certainly_not" + | "strong_evidence_against" + | "could_be_false" + | "unknown" + | "uncertain" + | "contentious" + | "could_be_true" + | "strong_evidence_for" + | "certain" + Scale: + | "document" + | "post" + | "chunk_unit" + | "section" + | "block" + | "field" + | "paragraph" + | "quote" + | "sentence" + | "phrase" + task_status: "active" | "timeout" | "complete" | "failed" + } + CompositeTypes: { + [_ in never]: never + } + } +} + +type DefaultSchema = Database[Extract] + +export type Tables< + DefaultSchemaTableNameOrOptions extends + | keyof (DefaultSchema["Tables"] & DefaultSchema["Views"]) + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] & + Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"]) + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] & + Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])[TableName] extends { + Row: infer R + } + ? R + : never + : DefaultSchemaTableNameOrOptions extends keyof (DefaultSchema["Tables"] & + DefaultSchema["Views"]) + ? (DefaultSchema["Tables"] & + DefaultSchema["Views"])[DefaultSchemaTableNameOrOptions] extends { + Row: infer R + } + ? R + : never + : never + +export type TablesInsert< + DefaultSchemaTableNameOrOptions extends + | keyof DefaultSchema["Tables"] + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends { + Insert: infer I + } + ? I + : never + : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"] + ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends { + Insert: infer I + } + ? I + : never + : never + +export type TablesUpdate< + DefaultSchemaTableNameOrOptions extends + | keyof DefaultSchema["Tables"] + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends { + Update: infer U + } + ? U + : never + : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"] + ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends { + Update: infer U + } + ? U + : never + : never + +export type Enums< + DefaultSchemaEnumNameOrOptions extends + | keyof DefaultSchema["Enums"] + | { schema: keyof Database }, + EnumName extends DefaultSchemaEnumNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"] + : never = never, +> = DefaultSchemaEnumNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"][EnumName] + : DefaultSchemaEnumNameOrOptions extends keyof DefaultSchema["Enums"] + ? DefaultSchema["Enums"][DefaultSchemaEnumNameOrOptions] + : never + +export type CompositeTypes< + PublicCompositeTypeNameOrOptions extends + | keyof DefaultSchema["CompositeTypes"] + | { schema: keyof Database }, + CompositeTypeName extends PublicCompositeTypeNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"] + : never = never, +> = PublicCompositeTypeNameOrOptions extends { schema: keyof Database } + ? Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"][CompositeTypeName] + : PublicCompositeTypeNameOrOptions extends keyof DefaultSchema["CompositeTypes"] + ? DefaultSchema["CompositeTypes"][PublicCompositeTypeNameOrOptions] + : never + +export const Constants = { + graphql_public: { + Enums: {}, + }, + public: { + Enums: { + EmbeddingName: [ + "openai_text_embedding_ada2_1536", + "openai_text_embedding_3_small_512", + "openai_text_embedding_3_small_1536", + "openai_text_embedding_3_large_256", + "openai_text_embedding_3_large_1024", + "openai_text_embedding_3_large_3072", + ], + EntityType: [ + "Platform", + "Space", + "Account", + "Person", + "AutomatedAgent", + "Document", + "Content", + "Concept", + "ConceptSchema", + "ContentLink", + "Occurrence", + ], + EpistemicStatus: [ + "certainly_not", + "strong_evidence_against", + "could_be_false", + "unknown", + "uncertain", + "contentious", + "could_be_true", + "strong_evidence_for", + "certain", + ], + Scale: [ + "document", + "post", + "chunk_unit", + "section", + "block", + "field", + "paragraph", + "quote", + "sentence", + "phrase", + ], + task_status: ["active", "timeout", "complete", "failed"], + }, + }, +} as const + From efcddb4783f93e86b56cc08a42830f94e9852e32 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 10:26:37 -0400 Subject: [PATCH 13/40] Document supabase process --- packages/database/README.md | 23 +++++++++++++++++++++-- packages/database/supabase/.gitignore | 8 ++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 packages/database/supabase/.gitignore diff --git a/packages/database/README.md b/packages/database/README.md index 9c492d867..0a882a473 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -1,4 +1,23 @@ This contains the database schema for vector embeddings and concepts. -We should follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. -Run supabase at this level. +All CLI commands below should be run in this directory (`packages/database`.) +1. Setup + 1. Install [Docker](https://www.docker.com) + 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version) + 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.) + 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) +2. Usage: + 1. Use `supabase start` before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. + 2. You may need to `supabase db pull` if changes are deployed while you work. + 3. End you work session with `supabase end` to free docker resources. +3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. + 1. Assuming you're working on a feature branch. + 2. `supabase stop` if it's running. + 3. Make changes to the schema, by editing files in `project/database/supabase/schemas` + 4. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. + 4. `supabase db diff -f some_meaningful_migration_name` + 5. If applying the new schema fails, repeat steps 2 and 3 + 6. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. + 7. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` + 8. You can start using your changes `supabase start` + 9. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) diff --git a/packages/database/supabase/.gitignore b/packages/database/supabase/.gitignore new file mode 100644 index 000000000..ad9264f0b --- /dev/null +++ b/packages/database/supabase/.gitignore @@ -0,0 +1,8 @@ +# Supabase +.branches +.temp + +# dotenvx +.env.keys +.env.local +.env.*.local From 9a4807208c5bd20f84b81624751d34720e8eb54a Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 16:00:56 -0400 Subject: [PATCH 14/40] github markdown is finicky about nesting --- packages/database/README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/database/README.md b/packages/database/README.md index 0a882a473..39ea08f5c 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -2,22 +2,22 @@ This contains the database schema for vector embeddings and concepts. All CLI commands below should be run in this directory (`packages/database`.) 1. Setup - 1. Install [Docker](https://www.docker.com) - 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version) - 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.) - 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) + 1. Install [Docker](https://www.docker.com) + 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version) + 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.) + 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) 2. Usage: - 1. Use `supabase start` before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. - 2. You may need to `supabase db pull` if changes are deployed while you work. - 3. End you work session with `supabase end` to free docker resources. + 1. Use `supabase start` before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. + 2. You may need to `supabase db pull` if changes are deployed while you work. + 3. End you work session with `supabase end` to free docker resources. 3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. - 1. Assuming you're working on a feature branch. - 2. `supabase stop` if it's running. - 3. Make changes to the schema, by editing files in `project/database/supabase/schemas` - 4. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. - 4. `supabase db diff -f some_meaningful_migration_name` - 5. If applying the new schema fails, repeat steps 2 and 3 - 6. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. - 7. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` - 8. You can start using your changes `supabase start` - 9. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) + 1. Assuming you're working on a feature branch. + 2. `supabase stop` if it's running. + 3. Make changes to the schema, by editing files in `project/database/supabase/schemas` + 4. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. + 4. `supabase db diff -f some_meaningful_migration_name` + 5. If applying the new schema fails, repeat steps 2 and 3 + 6. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. + 7. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` + 8. You can start using your changes `supabase start` + 9. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) From 5e99a78c146d6d37eb7450d611f8ff7c10f500ca Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 17:19:39 -0400 Subject: [PATCH 15/40] correction to README and types --- packages/database/README.md | 4 +-- packages/database/types.gen.ts | 54 +++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/packages/database/README.md b/packages/database/README.md index 39ea08f5c..2f8ac8d88 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -18,6 +18,6 @@ All CLI commands below should be run in this directory (`packages/database`.) 4. `supabase db diff -f some_meaningful_migration_name` 5. If applying the new schema fails, repeat steps 2 and 3 6. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. - 7. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` - 8. You can start using your changes `supabase start` + 7. You can start using your changes `supabase start` + 8. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` 9. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts index 5af17828f..57212ef87 100644 --- a/packages/database/types.gen.ts +++ b/packages/database/types.gen.ts @@ -112,7 +112,7 @@ export type Database = { } Relationships: [ { - foreignKeyName: "person_id_fkey" + foreignKeyName: "automated_agent_id_fkey" columns: ["id"] isOneToOne: true referencedRelation: "Agent" @@ -131,8 +131,8 @@ export type Database = { id: number is_schema: boolean last_modified: string - last_synced: string name: string + represented_by_id: number | null schema_id: number | null space_id: number | null } @@ -146,8 +146,8 @@ export type Database = { id?: number is_schema?: boolean last_modified: string - last_synced: string name: string + represented_by_id?: number | null schema_id?: number | null space_id?: number | null } @@ -161,8 +161,8 @@ export type Database = { id?: number is_schema?: boolean last_modified?: string - last_synced?: string name?: string + represented_by_id?: number | null schema_id?: number | null space_id?: number | null } @@ -174,6 +174,13 @@ export type Database = { referencedRelation: "Agent" referencedColumns: ["id"] }, + { + foreignKeyName: "Concept_represented_by_id_fkey" + columns: ["represented_by_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, { foreignKeyName: "Concept_schema_id_fkey" columns: ["schema_id"] @@ -228,10 +235,8 @@ export type Database = { document_id: number id: number last_modified: string - last_synced: string metadata: Json part_of_id: number | null - represents_id: number | null scale: Database["public"]["Enums"]["Scale"] source_local_id: string | null space_id: number | null @@ -244,10 +249,8 @@ export type Database = { document_id: number id?: number last_modified: string - last_synced: string metadata?: Json part_of_id?: number | null - represents_id?: number | null scale: Database["public"]["Enums"]["Scale"] source_local_id?: string | null space_id?: number | null @@ -260,10 +263,8 @@ export type Database = { document_id?: number id?: number last_modified?: string - last_synced?: string metadata?: Json part_of_id?: number | null - represents_id?: number | null scale?: Database["public"]["Enums"]["Scale"] source_local_id?: string | null space_id?: number | null @@ -298,13 +299,6 @@ export type Database = { referencedRelation: "Content" referencedColumns: ["id"] }, - { - foreignKeyName: "Content_represents_id_fkey" - columns: ["represents_id"] - isOneToOne: false - referencedRelation: "Concept" - referencedColumns: ["id"] - }, { foreignKeyName: "Content_space_id_fkey" columns: ["space_id"] @@ -427,7 +421,6 @@ export type Database = { created: string id: number last_modified: string - last_synced: string metadata: Json source_local_id: string | null space_id: number | null @@ -439,7 +432,6 @@ export type Database = { created: string id?: number last_modified: string - last_synced: string metadata?: Json source_local_id?: string | null space_id?: number | null @@ -451,7 +443,6 @@ export type Database = { created?: string id?: number last_modified?: string - last_synced?: string metadata?: Json source_local_id?: string | null space_id?: number | null @@ -589,6 +580,29 @@ export type Database = { } Returns: undefined } + match_content_embeddings: { + Args: { + query_embedding: string + match_threshold: number + match_count: number + current_document_id?: number + } + Returns: { + content_id: number + roam_uid: string + text_content: string + similarity: number + }[] + } + match_embeddings_for_subset_nodes: { + Args: { p_query_embedding: string; p_subset_roam_uids: string[] } + Returns: { + content_id: number + roam_uid: string + text_content: string + similarity: number + }[] + } propose_sync_task: { Args: { s_target: number From ba5fdf9321df1fed13b30b623223ad995a481ea2 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 18:03:31 -0400 Subject: [PATCH 16/40] only public namespace --- packages/database/types.gen.ts | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts index 57212ef87..4c32dc33e 100644 --- a/packages/database/types.gen.ts +++ b/packages/database/types.gen.ts @@ -7,31 +7,6 @@ export type Json = | Json[] export type Database = { - graphql_public: { - Tables: { - [_ in never]: never - } - Views: { - [_ in never]: never - } - Functions: { - graphql: { - Args: { - operationName?: string - query?: string - variables?: Json - extensions?: Json - } - Returns: Json - } - } - Enums: { - [_ in never]: never - } - CompositeTypes: { - [_ in never]: never - } - } public: { Tables: { Account: { @@ -769,9 +744,6 @@ export type CompositeTypes< : never export const Constants = { - graphql_public: { - Enums: {}, - }, public: { Enums: { EmbeddingName: [ From d1cdc92cf6c6f6ad552b623248cb53d5ec0cd10a Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Fri, 16 May 2025 18:08:12 -0400 Subject: [PATCH 17/40] WIP: supabase via npm --- packages/database/package.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 packages/database/package.json diff --git a/packages/database/package.json b/packages/database/package.json new file mode 100644 index 000000000..df0f8c3f4 --- /dev/null +++ b/packages/database/package.json @@ -0,0 +1,24 @@ +{ + "name": "@repo/database", + "version": "0.0.0", + "private": true, + "license": "Apache-2.0", + "type": "module", + "exports": { + "./types.gen.ts": "./types.gen.ts" + }, + "scripts": { + "init": "supabase login", + "dev": "supabase start", + "stop": "supabase stop", + "local-types": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", + "active-types": "supabase start && supabase gen types typescript --project-id \"$PRODUCTION_PROJECT_ID\" --schema public > types.gen.ts", + "new-migration": "supabase stop && supabase db diff -f ", + "deploy": "supabase db push --project-id \"$PRODUCTION_PROJECT_ID\"" + }, + "devDependencies": { + "supabase": "^2.22.12", + "tsx": "^4.19.2" + }, + "dependencies": {} +} From 183000bdf915e514331d21bcd2bc025393ecb759 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 11:43:31 -0400 Subject: [PATCH 18/40] locallly applied plpgsql_check to plpgsql functions --- .../20250517154122_plpgsql_linting.sql | 95 +++++++++++++++++++ packages/database/supabase/schemas/base.sql | 2 +- .../database/supabase/schemas/embedding.sql | 4 +- packages/database/supabase/schemas/sync.sql | 11 +-- 4 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql diff --git a/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql new file mode 100644 index 000000000..18b0b242e --- /dev/null +++ b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql @@ -0,0 +1,95 @@ +CREATE OR REPLACE FUNCTION public.end_sync_task(s_target bigint, s_function character varying, s_worker character varying, s_status task_status) + RETURNS void + LANGUAGE plpgsql +AS $function$ +DECLARE t_id INTEGER; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, 'Wrong worker'; + ASSERT s_status >= t_status, 'do not go back in status'; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION public.propose_sync_task(s_target bigint, s_function character varying, s_worker character varying, timeout interval, task_interval interval) + RETURNS interval + LANGUAGE plpgsql +AS $function$ +DECLARE s_id INTEGER; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$function$ +; diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql index b363f380a..d4f4a71e6 100644 --- a/packages/database/supabase/schemas/base.sql +++ b/packages/database/supabase/schemas/base.sql @@ -6,7 +6,7 @@ SET idle_in_transaction_session_timeout = 0; SET client_encoding = 'UTF8'; SET standard_conforming_strings = on; SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; +SET check_function_bodies = true; SET xmloption = content; SET client_min_messages = warning; SET row_security = on; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index 8b3051416..56fc0bd2d 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -28,7 +28,7 @@ GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536 GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated"; GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role"; - +set search_path to public,extensions; CREATE OR REPLACE FUNCTION "public"."match_content_embeddings"("query_embedding" "extensions"."vector", "match_threshold" double precision, "match_count" integer, "current_document_id" integer DEFAULT NULL::integer) RETURNS TABLE("content_id" bigint, "roam_uid" "text", "text_content" "text", "similarity" double precision) LANGUAGE "sql" STABLE @@ -75,3 +75,5 @@ ORDER BY similarity DESC; -- Order by calculated similarity, highest first $$; ALTER FUNCTION "public"."match_embeddings_for_subset_nodes"("p_query_embedding" "extensions"."vector", "p_subset_roam_uids" "text"[]) OWNER TO "postgres"; + +set search_path to ''; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql index 4fcd5cc35..a967f4d09 100644 --- a/packages/database/supabase/schemas/sync.sql +++ b/packages/database/supabase/schemas/sync.sql @@ -47,7 +47,6 @@ CREATE OR REPLACE FUNCTION "public"."end_sync_task"("s_target" bigint, "s_functi LANGUAGE "plpgsql" AS $$ DECLARE t_id INTEGER; -DECLARE t_target varchar; DECLARE t_worker varchar; DECLARE t_status task_status; DECLARE t_failure_count SMALLINT; @@ -57,8 +56,8 @@ BEGIN INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; ASSERT s_status > 'active'; - ASSERT t_worker = s_worker, "Wrong worker"; - ASSERT s_status >= t_status, "do not go back in status"; + ASSERT t_worker = s_worker, 'Wrong worker'; + ASSERT s_status >= t_status, 'do not go back in status'; IF s_status = 'complete' THEN t_last_task_end := now(); t_failure_count := 0; @@ -84,9 +83,7 @@ CREATE OR REPLACE FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_fu LANGUAGE "plpgsql" AS $$ DECLARE s_id INTEGER; -DECLARE timeout_as TIMESTAMP WITH TIME ZONE; DECLARE start_time TIMESTAMP WITH TIME ZONE; -DECLARE t_worker VARCHAR; DECLARE t_status task_status; DECLARE t_failure_count SMALLINT; DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; @@ -110,8 +107,8 @@ BEGIN -- now we know it pre-existed. Maybe already active. SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; PERFORM pg_advisory_lock(s_id); - SELECT worker, status, failure_count, last_task_start, last_task_end, task_times_out_at - INTO t_worker, t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at FROM sync_info WHERE id = s_id; From a08484887ad24f4720e766f8c78509440945b79a Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 12:37:45 -0400 Subject: [PATCH 19/40] Apply sqruff and some sqlfluff to schemas --- packages/database/.sqruff | 8 + packages/database/README.md | 14 +- packages/database/package.json | 2 + .../database/supabase/schemas/account.sql | 94 +++++---- packages/database/supabase/schemas/agent.sql | 81 ++++---- packages/database/supabase/schemas/base.sql | 64 +++---- .../database/supabase/schemas/concept.sql | 110 ++++++----- .../database/supabase/schemas/content.sql | 178 ++++++++++-------- .../database/supabase/schemas/contributor.sql | 64 ++++--- .../database/supabase/schemas/embedding.sql | 85 +++++---- .../database/supabase/schemas/extensions.sql | 22 +-- packages/database/supabase/schemas/space.sql | 60 +++--- packages/database/supabase/schemas/sync.sql | 166 ++++++++++------ 13 files changed, 557 insertions(+), 391 deletions(-) create mode 100644 packages/database/.sqruff diff --git a/packages/database/.sqruff b/packages/database/.sqruff new file mode 100644 index 000000000..3effbf331 --- /dev/null +++ b/packages/database/.sqruff @@ -0,0 +1,8 @@ +[sqruff] +dialect = postgres +exclude_rules = CP05,LT05 + +[sqruff:indentation] +indent_unit = space +tab_space_size = 4 +indented_joins = True diff --git a/packages/database/README.md b/packages/database/README.md index 2f8ac8d88..20a9e2c86 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -6,6 +6,7 @@ All CLI commands below should be run in this directory (`packages/database`.) 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version) 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.) 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) + 5. Install [sqruff](https://github.com/quarylabs/sqruff) 2. Usage: 1. Use `supabase start` before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. 2. You may need to `supabase db pull` if changes are deployed while you work. @@ -15,9 +16,10 @@ All CLI commands below should be run in this directory (`packages/database`.) 2. `supabase stop` if it's running. 3. Make changes to the schema, by editing files in `project/database/supabase/schemas` 4. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. - 4. `supabase db diff -f some_meaningful_migration_name` - 5. If applying the new schema fails, repeat steps 2 and 3 - 6. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. - 7. You can start using your changes `supabase start` - 8. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` - 9. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) + 5. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas` + 6. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` + 7. `supabase db diff -f some_meaningful_migration_name` + 8. If applying the new schema fails, repeat steps 2 to 7 + 9. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. + 10. You can start using your changes `supabase start` + 11. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) diff --git a/packages/database/package.json b/packages/database/package.json index df0f8c3f4..5f62f4fb4 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -11,6 +11,8 @@ "init": "supabase login", "dev": "supabase start", "stop": "supabase stop", + "lint": "sqruff lint supabase/schemas", + "lint:fix": "sqruff fix supabase/schemas", "local-types": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", "active-types": "supabase start && supabase gen types typescript --project-id \"$PRODUCTION_PROJECT_ID\" --schema public > types.gen.ts", "new-migration": "supabase stop && supabase db diff -f ", diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql index 2c911c39b..5059d12cb 100644 --- a/packages/database/supabase/schemas/account.sql +++ b/packages/database/supabase/schemas/account.sql @@ -1,62 +1,76 @@ - - - -CREATE TABLE IF NOT EXISTS "public"."Account" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "platform_id" bigint NOT NULL, - "person_id" bigint NOT NULL, - "write_permission" boolean NOT NULL, - "active" boolean DEFAULT true NOT NULL +CREATE TABLE IF NOT EXISTS public."Account" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + platform_id bigint NOT NULL, + person_id bigint NOT NULL, + write_permission boolean NOT NULL, + active boolean DEFAULT true NOT NULL ); -ALTER TABLE "public"."Account" OWNER TO "postgres"; +ALTER TABLE public."Account" OWNER TO "postgres"; -COMMENT ON TABLE "public"."Account" IS 'A user account on a discourse platform'; +COMMENT ON TABLE public."Account" IS 'A user account on a discourse platform'; -ALTER TABLE ONLY "public"."Account" - ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY ("person_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY ( + person_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."Account" - ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY ("platform_id") REFERENCES "public"."DiscoursePlatform"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY ( + platform_id +) REFERENCES public."DiscoursePlatform" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."Account" - ADD CONSTRAINT "Account_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_pkey" PRIMARY KEY (id); -CREATE TABLE IF NOT EXISTS "public"."SpaceAccess" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "space_id" bigint, - "account_id" bigint NOT NULL, - "editor" boolean NOT NULL +CREATE TABLE IF NOT EXISTS public."SpaceAccess" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + space_id bigint, + account_id bigint NOT NULL, + editor boolean NOT NULL ); -ALTER TABLE ONLY "public"."SpaceAccess" - ADD CONSTRAINT "SpaceAccess_account_id_space_id_key" UNIQUE ("account_id", "space_id"); - -ALTER TABLE ONLY "public"."SpaceAccess" - ADD CONSTRAINT "SpaceAccess_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_account_id_space_id_key" UNIQUE ( + account_id, space_id +); +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_pkey" PRIMARY KEY (id); -ALTER TABLE "public"."SpaceAccess" OWNER TO "postgres"; -COMMENT ON TABLE "public"."SpaceAccess" IS 'An access control entry for a space'; +ALTER TABLE public."SpaceAccess" OWNER TO "postgres"; -COMMENT ON COLUMN "public"."SpaceAccess"."space_id" IS 'The space in which the content is located'; +COMMENT ON TABLE public."SpaceAccess" IS 'An access control entry for a space'; +COMMENT ON COLUMN public."SpaceAccess".space_id IS 'The space in which the content is located'; -ALTER TABLE ONLY "public"."SpaceAccess" - ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY ("account_id") REFERENCES "public"."Account"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY ( + account_id +) REFERENCES public."Account" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."SpaceAccess" - ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."DiscourseSpace" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; -GRANT ALL ON TABLE "public"."SpaceAccess" TO "anon"; -GRANT ALL ON TABLE "public"."SpaceAccess" TO "authenticated"; -GRANT ALL ON TABLE "public"."SpaceAccess" TO "service_role"; +GRANT ALL ON TABLE public."SpaceAccess" TO anon; +GRANT ALL ON TABLE public."SpaceAccess" TO authenticated; +GRANT ALL ON TABLE public."SpaceAccess" TO service_role; -GRANT ALL ON TABLE "public"."Account" TO "anon"; -GRANT ALL ON TABLE "public"."Account" TO "authenticated"; -GRANT ALL ON TABLE "public"."Account" TO "service_role"; +GRANT ALL ON TABLE public."Account" TO anon; +GRANT ALL ON TABLE public."Account" TO authenticated; +GRANT ALL ON TABLE public."Account" TO service_role; diff --git a/packages/database/supabase/schemas/agent.sql b/packages/database/supabase/schemas/agent.sql index f4f9154c7..465900ac8 100644 --- a/packages/database/supabase/schemas/agent.sql +++ b/packages/database/supabase/schemas/agent.sql @@ -1,60 +1,65 @@ - -CREATE TABLE IF NOT EXISTS "public"."Agent" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "type" "public"."EntityType" NOT NULL +CREATE TABLE IF NOT EXISTS public."Agent" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + type public."EntityType" NOT NULL ); -ALTER TABLE ONLY "public"."Agent" - ADD CONSTRAINT "Agent_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."Agent" +ADD CONSTRAINT "Agent_pkey" PRIMARY KEY (id); -ALTER TABLE "public"."Agent" OWNER TO "postgres"; +ALTER TABLE public."Agent" OWNER TO "postgres"; -COMMENT ON TABLE "public"."Agent" IS 'An agent that acts in the system'; +COMMENT ON TABLE public."Agent" IS 'An agent that acts in the system'; -CREATE TABLE IF NOT EXISTS "public"."AutomatedAgent" ( - "id" bigint NOT NULL, - "name" character varying NOT NULL, - "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, - "deterministic" boolean DEFAULT false, - "version" character varying +CREATE TABLE IF NOT EXISTS public."AutomatedAgent" ( + id bigint NOT NULL, + name character varying NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + deterministic boolean DEFAULT false, + version character varying ); -ALTER TABLE ONLY "public"."AutomatedAgent" - ADD CONSTRAINT "AutomatedAgent_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."AutomatedAgent" +ADD CONSTRAINT "AutomatedAgent_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY "public"."AutomatedAgent" - ADD CONSTRAINT "automated_agent_id_fkey" FOREIGN KEY ("id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."AutomatedAgent" +ADD CONSTRAINT automated_agent_id_fkey FOREIGN KEY ( + id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE "public"."AutomatedAgent" OWNER TO "postgres"; +ALTER TABLE public."AutomatedAgent" OWNER TO "postgres"; -COMMENT ON TABLE "public"."AutomatedAgent" IS 'An automated agent'; +COMMENT ON TABLE public."AutomatedAgent" IS 'An automated agent'; -CREATE TABLE IF NOT EXISTS "public"."Person" ( - "id" bigint NOT NULL, - "name" character varying NOT NULL, - "orcid" character varying(20), - "email" character varying NOT NULL +CREATE TABLE IF NOT EXISTS public."Person" ( + id bigint NOT NULL, + name character varying NOT NULL, + orcid character varying(20), + email character varying NOT NULL ); -ALTER TABLE ONLY "public"."Person" - ADD CONSTRAINT "person_id_fkey" FOREIGN KEY ("id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Person" +ADD CONSTRAINT person_id_fkey FOREIGN KEY ( + id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE "public"."Person" OWNER TO "postgres"; +ALTER TABLE public."Person" OWNER TO "postgres"; -COMMENT ON TABLE "public"."Person" IS 'A person using the system'; +COMMENT ON TABLE public."Person" IS 'A person using the system'; -GRANT ALL ON TABLE "public"."Agent" TO "anon"; -GRANT ALL ON TABLE "public"."Agent" TO "authenticated"; -GRANT ALL ON TABLE "public"."Agent" TO "service_role"; +GRANT ALL ON TABLE public."Agent" TO anon; +GRANT ALL ON TABLE public."Agent" TO authenticated; +GRANT ALL ON TABLE public."Agent" TO service_role; -GRANT ALL ON TABLE "public"."AutomatedAgent" TO "anon"; -GRANT ALL ON TABLE "public"."AutomatedAgent" TO "authenticated"; -GRANT ALL ON TABLE "public"."AutomatedAgent" TO "service_role"; +GRANT ALL ON TABLE public."AutomatedAgent" TO anon; +GRANT ALL ON TABLE public."AutomatedAgent" TO authenticated; +GRANT ALL ON TABLE public."AutomatedAgent" TO service_role; -GRANT ALL ON TABLE "public"."Person" TO "anon"; -GRANT ALL ON TABLE "public"."Person" TO "authenticated"; -GRANT ALL ON TABLE "public"."Person" TO "service_role"; +GRANT ALL ON TABLE public."Person" TO anon; +GRANT ALL ON TABLE public."Person" TO authenticated; +GRANT ALL ON TABLE public."Person" TO service_role; diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql index d4f4a71e6..e66946bdd 100644 --- a/packages/database/supabase/schemas/base.sql +++ b/packages/database/supabase/schemas/base.sql @@ -1,5 +1,3 @@ - - SET statement_timeout = 0; SET lock_timeout = 0; SET idle_in_transaction_session_timeout = 0; @@ -11,19 +9,19 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = on; SET default_tablespace = ''; -SET default_table_access_method = "heap"; +SET default_table_access_method = heap; -COMMENT ON SCHEMA "public" IS 'standard public schema'; +COMMENT ON SCHEMA public IS 'standard public schema'; -ALTER PUBLICATION "supabase_realtime" OWNER TO "postgres"; +ALTER PUBLICATION supabase_realtime OWNER TO postgres; -GRANT USAGE ON SCHEMA "public" TO "postgres"; -GRANT USAGE ON SCHEMA "public" TO "anon"; -GRANT USAGE ON SCHEMA "public" TO "authenticated"; -GRANT USAGE ON SCHEMA "public" TO "service_role"; +GRANT USAGE ON SCHEMA public TO postgres; +GRANT USAGE ON SCHEMA public TO anon; +GRANT USAGE ON SCHEMA public TO authenticated; +GRANT USAGE ON SCHEMA public TO service_role; -CREATE TYPE "public"."EntityType" AS ENUM ( +CREATE TYPE public."EntityType" AS ENUM ( 'Platform', 'Space', 'Account', @@ -37,31 +35,31 @@ CREATE TYPE "public"."EntityType" AS ENUM ( 'Occurrence' ); -ALTER TYPE "public"."EntityType" OWNER TO "postgres"; +ALTER TYPE public."EntityType" OWNER TO postgres; -CREATE SEQUENCE IF NOT EXISTS "public"."entity_id_seq" - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; +CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq +START WITH 1 +INCREMENT BY 1 +NO MINVALUE +NO MAXVALUE +CACHE 1; -ALTER TABLE "public"."entity_id_seq" OWNER TO "postgres"; +ALTER TABLE public.entity_id_seq OWNER TO "postgres"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "postgres"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "anon"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "authenticated"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON SEQUENCES TO "service_role"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "postgres"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "anon"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "authenticated"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON FUNCTIONS TO "service_role"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "postgres"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "anon"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "authenticated"; -ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "service_role"; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO service_role; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO service_role; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO service_role; -GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "anon"; -GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "authenticated"; -GRANT ALL ON SEQUENCE "public"."entity_id_seq" TO "service_role"; +GRANT ALL ON SEQUENCE public.entity_id_seq TO anon; +GRANT ALL ON SEQUENCE public.entity_id_seq TO authenticated; +GRANT ALL ON SEQUENCE public.entity_id_seq TO service_role; diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql index 99b87b87f..8ec68744d 100644 --- a/packages/database/supabase/schemas/concept.sql +++ b/packages/database/supabase/schemas/concept.sql @@ -1,7 +1,4 @@ - - - -CREATE TYPE "public"."EpistemicStatus" AS ENUM ( +CREATE TYPE public."EpistemicStatus" AS ENUM ( 'certainly_not', 'strong_evidence_against', 'could_be_false', @@ -13,74 +10,85 @@ CREATE TYPE "public"."EpistemicStatus" AS ENUM ( 'certain' ); -ALTER TYPE "public"."EpistemicStatus" OWNER TO "postgres"; - - - -CREATE TABLE IF NOT EXISTS "public"."Concept" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "epistemic_status" "public"."EpistemicStatus" DEFAULT 'unknown'::"public"."EpistemicStatus" NOT NULL, - "name" character varying NOT NULL, - "description" "text", - "author_id" bigint, - "created" timestamp without time zone NOT NULL, - "last_modified" timestamp without time zone NOT NULL, - "space_id" bigint, - "arity" smallint DEFAULT 0 NOT NULL, - "schema_id" bigint, - "content" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, - "is_schema" boolean DEFAULT false NOT NULL, - "represented_by_id" BIGINT +ALTER TYPE public."EpistemicStatus" OWNER TO postgres; + + +CREATE TABLE IF NOT EXISTS public."Concept" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + epistemic_status public."EpistemicStatus" DEFAULT 'unknown'::public."EpistemicStatus" NOT NULL, + name character varying NOT NULL, + description text, + author_id bigint, + created timestamp without time zone NOT NULL, + last_modified timestamp without time zone NOT NULL, + space_id bigint, + arity smallint DEFAULT 0 NOT NULL, + schema_id bigint, + content jsonb DEFAULT '{}'::jsonb NOT NULL, + is_schema boolean DEFAULT false NOT NULL, + represented_by_id bigint ); -ALTER TABLE "public"."Concept" OWNER TO "postgres"; +ALTER TABLE public."Concept" OWNER TO "postgres"; -COMMENT ON TABLE "public"."Concept" IS 'An abstract concept, claim or relation'; +COMMENT ON TABLE public."Concept" IS 'An abstract concept, claim or relation'; -COMMENT ON COLUMN "public"."Concept"."author_id" IS 'The author of content'; +COMMENT ON COLUMN public."Concept".author_id IS 'The author of content'; -COMMENT ON COLUMN "public"."Concept"."created" IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN public."Concept".created IS 'The time when the content was created in the remote source'; -COMMENT ON COLUMN "public"."Concept"."last_modified" IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN public."Concept".last_modified IS 'The last time the content was modified in the remote source'; -COMMENT ON COLUMN "public"."Concept"."space_id" IS 'The space in which the content is located'; +COMMENT ON COLUMN public."Concept".space_id IS 'The space in which the content is located'; +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_pkey" PRIMARY KEY (id); +ALTER TABLE ONLY public."Concept" +ADD FOREIGN KEY (represented_by_id) REFERENCES public."Content" ( + id +) ON DELETE SET NULL ON UPDATE CASCADE; -ALTER TABLE ONLY "public"."Concept" - ADD CONSTRAINT "Concept_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."Person" +ADD CONSTRAINT "Person_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY "public"."Concept" - ADD FOREIGN KEY (represented_by_id) REFERENCES "public"."Content" (id) ON DELETE SET NULL ON UPDATE CASCADE; - -ALTER TABLE ONLY "public"."Person" - ADD CONSTRAINT "Person_pkey" PRIMARY KEY ("id"); - -CREATE INDEX "Concept_content" ON "public"."Concept" USING "gin" ("content" "jsonb_path_ops"); - -CREATE INDEX "Concept_schema" ON "public"."Concept" USING "btree" ("schema_id"); - -CREATE INDEX "Concept_space" ON "public"."Concept" USING "btree" ("space_id"); +CREATE INDEX "Concept_content" ON public."Concept" USING gin ( + content jsonb_path_ops +); -CREATE UNIQUE INDEX "Concept_represented_by" ON "public"."Concept" ("represented_by_id"); +CREATE INDEX "Concept_schema" ON public."Concept" USING btree (schema_id); +CREATE INDEX "Concept_space" ON public."Concept" USING btree (space_id); -ALTER TABLE ONLY "public"."Concept" - ADD CONSTRAINT "Concept_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; +CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" ( + represented_by_id +); -ALTER TABLE ONLY "public"."Concept" - ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY ("schema_id") REFERENCES "public"."Concept"("id") ON UPDATE CASCADE ON DELETE SET NULL; -ALTER TABLE ONLY "public"."Concept" - ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY ( + schema_id +) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE SET NULL; +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."DiscourseSpace" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; -GRANT ALL ON TABLE "public"."Concept" TO "anon"; -GRANT ALL ON TABLE "public"."Concept" TO "authenticated"; -GRANT ALL ON TABLE "public"."Concept" TO "service_role"; +GRANT ALL ON TABLE public."Concept" TO anon; +GRANT ALL ON TABLE public."Concept" TO authenticated; +GRANT ALL ON TABLE public."Concept" TO service_role; RESET ALL; diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index f433f574c..d9bced1b6 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -1,4 +1,4 @@ -CREATE TYPE "public"."Scale" AS ENUM ( +CREATE TYPE public."Scale" AS ENUM ( 'document', 'post', 'chunk_unit', @@ -11,110 +11,138 @@ CREATE TYPE "public"."Scale" AS ENUM ( 'phrase' ); -ALTER TYPE "public"."Scale" OWNER TO "postgres"; +ALTER TYPE public."Scale" OWNER TO postgres; -CREATE TABLE IF NOT EXISTS "public"."Document" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "space_id" bigint, - "source_local_id" character varying, - "url" character varying, +CREATE TABLE IF NOT EXISTS public."Document" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + space_id bigint, + source_local_id character varying, + url character varying, "created" timestamp without time zone NOT NULL, - "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, - "last_modified" timestamp without time zone NOT NULL, - "author_id" bigint NOT NULL, - "contents" "oid" + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + last_modified timestamp without time zone NOT NULL, + author_id bigint NOT NULL, + contents oid ); -ALTER TABLE ONLY "public"."Document" - ADD CONSTRAINT "Document_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY "public"."Document" - ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."Document" - ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."DiscourseSpace" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE "public"."Document" OWNER TO "postgres"; +ALTER TABLE public."Document" OWNER TO "postgres"; -COMMENT ON COLUMN "public"."Document"."space_id" IS 'The space in which the content is located'; +COMMENT ON COLUMN public."Document".space_id IS 'The space in which the content is located'; -COMMENT ON COLUMN "public"."Document"."source_local_id" IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN public."Document".source_local_id IS 'The unique identifier of the content in the remote source'; -COMMENT ON COLUMN "public"."Document"."created" IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN public."Document".created IS 'The time when the content was created in the remote source'; -COMMENT ON COLUMN "public"."Document"."last_modified" IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN public."Document".last_modified IS 'The last time the content was modified in the remote source'; -COMMENT ON COLUMN "public"."Document"."author_id" IS 'The author of content'; +COMMENT ON COLUMN public."Document".author_id IS 'The author of content'; -COMMENT ON COLUMN "public"."Document"."contents" IS 'A large object OID for the downloaded raw content'; +COMMENT ON COLUMN public."Document".contents IS 'A large object OID for the downloaded raw content'; -CREATE TABLE IF NOT EXISTS "public"."Content" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "document_id" bigint NOT NULL, - "source_local_id" character varying, - "author_id" bigint, - "creator_id" bigint, - "created" timestamp without time zone NOT NULL, - "text" "text" NOT NULL, - "metadata" "jsonb" DEFAULT '{}'::"jsonb" NOT NULL, - "scale" "public"."Scale" NOT NULL, - "space_id" bigint, - "last_modified" timestamp without time zone NOT NULL, - "part_of_id" bigint +CREATE TABLE IF NOT EXISTS public."Content" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + document_id bigint NOT NULL, + source_local_id character varying, + author_id bigint, + creator_id bigint, + created timestamp without time zone NOT NULL, + text text NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + scale public."Scale" NOT NULL, + space_id bigint, + last_modified timestamp without time zone NOT NULL, + part_of_id bigint ); -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_pkey" PRIMARY KEY ("id"); - -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_author_id_fkey" FOREIGN KEY ("author_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; - -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_creator_id_fkey" FOREIGN KEY ("creator_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE SET NULL; - -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_document_id_fkey" FOREIGN KEY ("document_id") REFERENCES "public"."Document"("id") ON UPDATE CASCADE ON DELETE CASCADE; - -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY ("part_of_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE SET NULL; - -ALTER TABLE ONLY "public"."Content" - ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY ("space_id") REFERENCES "public"."DiscourseSpace"("id") ON UPDATE CASCADE ON DELETE CASCADE; - -CREATE INDEX "Content_document" ON "public"."Content" USING "btree" ("document_id"); +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_creator_id_fkey" FOREIGN KEY ( + creator_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_document_id_fkey" FOREIGN KEY ( + document_id +) REFERENCES public."Document" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY ( + part_of_id +) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."DiscourseSpace" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +CREATE INDEX "Content_document" ON public."Content" USING btree ( + document_id +); -CREATE INDEX "Content_part_of" ON "public"."Content" USING "btree" ("part_of_id"); +CREATE INDEX "Content_part_of" ON public."Content" USING btree ( + part_of_id +); -CREATE INDEX "Content_space" ON "public"."Content" USING "btree" ("space_id"); +CREATE INDEX "Content_space" ON public."Content" USING btree (space_id); -CREATE UNIQUE INDEX "Content_space_and_id" ON "public"."Content" USING "btree" ("space_id", "source_local_id") WHERE ("source_local_id" IS NOT NULL); +CREATE UNIQUE INDEX "Content_space_and_id" ON public."Content" USING btree ( + space_id, source_local_id +) WHERE (source_local_id IS NOT NULL); -CREATE INDEX "Content_text" ON "public"."Content" USING "pgroonga" ("text"); +CREATE INDEX "Content_text" ON public."Content" USING pgroonga (text); -ALTER TABLE "public"."Content" OWNER TO "postgres"; +ALTER TABLE public."Content" OWNER TO "postgres"; -COMMENT ON TABLE "public"."Content" IS 'A unit of content'; +COMMENT ON TABLE public."Content" IS 'A unit of content'; -COMMENT ON COLUMN "public"."Content"."source_local_id" IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN public."Content".source_local_id IS 'The unique identifier of the content in the remote source'; -COMMENT ON COLUMN "public"."Content"."author_id" IS 'The author of content'; +COMMENT ON COLUMN public."Content".author_id IS 'The author of content'; -COMMENT ON COLUMN "public"."Content"."creator_id" IS 'The creator of a logical structure, such as a content subdivision'; +COMMENT ON COLUMN public."Content".creator_id IS 'The creator of a logical structure, such as a content subdivision'; -COMMENT ON COLUMN "public"."Content"."created" IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN public."Content".created IS 'The time when the content was created in the remote source'; -COMMENT ON COLUMN "public"."Content"."space_id" IS 'The space in which the content is located'; +COMMENT ON COLUMN public."Content".space_id IS 'The space in which the content is located'; -COMMENT ON COLUMN "public"."Content"."last_modified" IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN public."Content".last_modified IS 'The last time the content was modified in the remote source'; -COMMENT ON COLUMN "public"."Content"."part_of_id" IS 'This content is part of a larger content unit'; +COMMENT ON COLUMN public."Content".part_of_id IS 'This content is part of a larger content unit'; -GRANT ALL ON TABLE "public"."Document" TO "anon"; -GRANT ALL ON TABLE "public"."Document" TO "authenticated"; -GRANT ALL ON TABLE "public"."Document" TO "service_role"; +GRANT ALL ON TABLE public."Document" TO anon; +GRANT ALL ON TABLE public."Document" TO authenticated; +GRANT ALL ON TABLE public."Document" TO service_role; -GRANT ALL ON TABLE "public"."Content" TO "anon"; -GRANT ALL ON TABLE "public"."Content" TO "authenticated"; -GRANT ALL ON TABLE "public"."Content" TO "service_role"; +GRANT ALL ON TABLE public."Content" TO anon; +GRANT ALL ON TABLE public."Content" TO authenticated; +GRANT ALL ON TABLE public."Content" TO service_role; diff --git a/packages/database/supabase/schemas/contributor.sql b/packages/database/supabase/schemas/contributor.sql index 8105f65ab..f83b9a40a 100644 --- a/packages/database/supabase/schemas/contributor.sql +++ b/packages/database/supabase/schemas/contributor.sql @@ -1,40 +1,52 @@ -CREATE TABLE IF NOT EXISTS "public"."content_contributors" ( - "content_id" bigint NOT NULL, - "contributor_id" bigint NOT NULL +CREATE TABLE IF NOT EXISTS public.content_contributors ( + content_id bigint NOT NULL, + contributor_id bigint NOT NULL ); -ALTER TABLE ONLY "public"."content_contributors" - ADD CONSTRAINT "content_contributors_pkey" PRIMARY KEY ("content_id", "contributor_id"); +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_pkey PRIMARY KEY ( + content_id, contributor_id +); -ALTER TABLE ONLY "public"."content_contributors" - ADD CONSTRAINT "content_contributors_content_id_fkey" FOREIGN KEY ("content_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_content_id_fkey FOREIGN KEY ( + content_id +) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."content_contributors" - ADD CONSTRAINT "content_contributors_contributor_id_fkey" FOREIGN KEY ("contributor_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_contributor_id_fkey FOREIGN KEY ( + contributor_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE "public"."content_contributors" OWNER TO "postgres"; +ALTER TABLE public.content_contributors OWNER TO "postgres"; -CREATE TABLE IF NOT EXISTS "public"."concept_contributors" ( - "concept_id" bigint NOT NULL, - "contributor_id" bigint NOT NULL +CREATE TABLE IF NOT EXISTS public.concept_contributors ( + concept_id bigint NOT NULL, + contributor_id bigint NOT NULL ); -ALTER TABLE "public"."concept_contributors" OWNER TO "postgres"; +ALTER TABLE public.concept_contributors OWNER TO "postgres"; -ALTER TABLE ONLY "public"."concept_contributors" - ADD CONSTRAINT "concept_contributors_concept_id_fkey" FOREIGN KEY ("concept_id") REFERENCES "public"."Concept"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_concept_id_fkey FOREIGN KEY ( + concept_id +) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."concept_contributors" - ADD CONSTRAINT "concept_contributors_contributor_id_fkey" FOREIGN KEY ("contributor_id") REFERENCES "public"."Agent"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_contributor_id_fkey FOREIGN KEY ( + contributor_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; -ALTER TABLE ONLY "public"."concept_contributors" - ADD CONSTRAINT "concept_contributors_pkey" PRIMARY KEY ("concept_id", "contributor_id"); +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_pkey PRIMARY KEY ( + concept_id, contributor_id +); -GRANT ALL ON TABLE "public"."concept_contributors" TO "anon"; -GRANT ALL ON TABLE "public"."concept_contributors" TO "authenticated"; -GRANT ALL ON TABLE "public"."concept_contributors" TO "service_role"; +GRANT ALL ON TABLE public.concept_contributors TO anon; +GRANT ALL ON TABLE public.concept_contributors TO authenticated; +GRANT ALL ON TABLE public.concept_contributors TO service_role; -GRANT ALL ON TABLE "public"."content_contributors" TO "anon"; -GRANT ALL ON TABLE "public"."content_contributors" TO "authenticated"; -GRANT ALL ON TABLE "public"."content_contributors" TO "service_role"; +GRANT ALL ON TABLE public.content_contributors TO anon; +GRANT ALL ON TABLE public.content_contributors TO authenticated; +GRANT ALL ON TABLE public.content_contributors TO service_role; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index 56fc0bd2d..bd5b4451d 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -1,4 +1,4 @@ -CREATE TYPE "public"."EmbeddingName" AS ENUM ( +CREATE TYPE public."EmbeddingName" AS ENUM ( 'openai_text_embedding_ada2_1536', 'openai_text_embedding_3_small_512', 'openai_text_embedding_3_small_1536', @@ -7,51 +7,70 @@ CREATE TYPE "public"."EmbeddingName" AS ENUM ( 'openai_text_embedding_3_large_3072' ); -ALTER TYPE "public"."EmbeddingName" OWNER TO "postgres"; +ALTER TYPE public."EmbeddingName" OWNER TO "postgres"; -CREATE TABLE IF NOT EXISTS "public"."ContentEmbedding_openai_text_embedding_3_small_1536" ( - "target_id" bigint NOT NULL, - "model" "public"."EmbeddingName" DEFAULT 'openai_text_embedding_3_small_1536'::"public"."EmbeddingName" NOT NULL, - "vector" "extensions"."vector"(1536) NOT NULL, - "obsolete" boolean DEFAULT false -); +CREATE TABLE IF NOT EXISTS public."ContentEmbedding_openai_text_embedding_3_small_1536" ( +target_id bigint NOT NULL, +"model" public."EmbeddingName" DEFAULT 'openai_text_embedding_3_small_1536'::public."EmbeddingName" NOT NULL, +"vector" extensions.vector (1536) NOT NULL, +obsolete boolean DEFAULT false +) ; -ALTER TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" OWNER TO "postgres"; +ALTER TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" OWNER TO "postgres" ; -ALTER TABLE ONLY "public"."ContentEmbedding_openai_text_embedding_3_small_1536" - ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1536_pkey" PRIMARY KEY ("target_id"); +ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536" +ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1536_pkey" PRIMARY KEY (target_id) ; -ALTER TABLE ONLY "public"."ContentEmbedding_openai_text_embedding_3_small_1536" - ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" FOREIGN KEY ("target_id") REFERENCES "public"."Content"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536" +ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" FOREIGN KEY (target_id) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE ; -GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "anon"; -GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated"; -GRANT ALL ON TABLE "public"."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role"; +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "anon" ; +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated" ; +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role" ; -set search_path to public,extensions; +set search_path to public, extensions ; -CREATE OR REPLACE FUNCTION "public"."match_content_embeddings"("query_embedding" "extensions"."vector", "match_threshold" double precision, "match_count" integer, "current_document_id" integer DEFAULT NULL::integer) RETURNS TABLE("content_id" bigint, "roam_uid" "text", "text_content" "text", "similarity" double precision) - LANGUAGE "sql" STABLE - AS $$ +CREATE OR REPLACE FUNCTION public.match_content_embeddings ( +query_embedding extensions.vector, +match_threshold double precision, +match_count integer, +current_document_id integer DEFAULT NULL::integer) +RETURNS TABLE ( +content_id bigint, +roam_uid Text, +text_content Text, +similarity double precision) +LANGUAGE sql STABLE +AS $$ SELECT c.id AS content_id, c.source_local_id AS roam_uid, c.text AS text_content, 1 - (ce.vector <=> query_embedding) AS similarity -FROM "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce -JOIN "public"."Content" AS c ON ce.target_id = c.id +FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN public."Content" AS c ON ce.target_id = c.id WHERE 1 - (ce.vector <=> query_embedding) > match_threshold AND ce.obsolete = FALSE ORDER BY ce.vector <=> query_embedding ASC LIMIT match_count; -$$; +$$ ; -ALTER FUNCTION "public"."match_content_embeddings"("query_embedding" "extensions"."vector", "match_threshold" double precision, "match_count" integer, "current_document_id" integer) OWNER TO "postgres"; +ALTER FUNCTION public.match_content_embeddings ( +query_embedding extensions.vector, +match_threshold double precision, +match_count integer, +current_document_id integer) OWNER TO "postgres" ; -CREATE OR REPLACE FUNCTION "public"."match_embeddings_for_subset_nodes"("p_query_embedding" "extensions"."vector", "p_subset_roam_uids" "text"[]) RETURNS TABLE("content_id" bigint, "roam_uid" "text", "text_content" "text", "similarity" double precision) - LANGUAGE "sql" STABLE - AS $$ +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes ( +"p_query_embedding" extensions.vector, +"p_subset_roam_uids" Text []) +RETURNS TABLE (content_id bigint, +roam_uid Text, +text_content Text, +similarity double precision) +LANGUAGE sql STABLE +AS $$ WITH subset_content_with_embeddings AS ( -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset SELECT @@ -59,8 +78,8 @@ WITH subset_content_with_embeddings AS ( c.source_local_id AS roam_uid, c.text AS text_content, ce.vector AS embedding_vector - FROM "public"."Content" AS c - JOIN "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + FROM public."Content" AS c + JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id WHERE c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs AND ce.obsolete = FALSE @@ -72,8 +91,10 @@ SELECT 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity FROM subset_content_with_embeddings AS ss_ce ORDER BY similarity DESC; -- Order by calculated similarity, highest first -$$; +$$ ; -ALTER FUNCTION "public"."match_embeddings_for_subset_nodes"("p_query_embedding" "extensions"."vector", "p_subset_roam_uids" "text"[]) OWNER TO "postgres"; +ALTER FUNCTION public.match_embeddings_for_subset_nodes ( +"p_query_embedding" extensions.vector, "p_subset_roam_uids" Text []) +OWNER TO "postgres" ; -set search_path to ''; +set search_path to '' ; diff --git a/packages/database/supabase/schemas/extensions.sql b/packages/database/supabase/schemas/extensions.sql index 7d7eb678f..60450e1e2 100644 --- a/packages/database/supabase/schemas/extensions.sql +++ b/packages/database/supabase/schemas/extensions.sql @@ -1,11 +1,11 @@ -CREATE EXTENSION IF NOT EXISTS "pg_cron" WITH SCHEMA "pg_catalog"; -CREATE EXTENSION IF NOT EXISTS "pgroonga" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "pg_graphql" WITH SCHEMA "graphql"; -CREATE EXTENSION IF NOT EXISTS "pg_jsonschema" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "pg_stat_monitor" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "pg_stat_statements" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "pgcrypto" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "pgjwt" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "supabase_vault" WITH SCHEMA "vault"; -CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA "extensions"; -CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "extensions"; +CREATE EXTENSION IF NOT EXISTS pg_cron WITH SCHEMA pg_catalog; +CREATE EXTENSION IF NOT EXISTS pgroonga WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_graphql WITH SCHEMA graphql; +CREATE EXTENSION IF NOT EXISTS pg_jsonschema WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_stat_monitor WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pgjwt WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS supabase_vault WITH SCHEMA vault; +CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA extensions; diff --git a/packages/database/supabase/schemas/space.sql b/packages/database/supabase/schemas/space.sql index 9d6003bb3..72f228a4e 100644 --- a/packages/database/supabase/schemas/space.sql +++ b/packages/database/supabase/schemas/space.sql @@ -1,37 +1,47 @@ -CREATE TABLE IF NOT EXISTS "public"."DiscoursePlatform" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "name" character varying NOT NULL, - "url" character varying NOT NULL +CREATE TABLE IF NOT EXISTS public."DiscoursePlatform" ( + id bigint DEFAULT nextval( + 'public."entity_id_seq"'::regclass + ) NOT NULL, + name character varying NOT NULL, + url character varying NOT NULL ); -ALTER TABLE "public"."DiscoursePlatform" OWNER TO "postgres"; +ALTER TABLE public."DiscoursePlatform" OWNER TO "postgres"; -COMMENT ON TABLE "public"."DiscoursePlatform" IS 'A data platform where discourse happens'; +COMMENT ON TABLE public."DiscoursePlatform" IS +'A data platform where discourse happens'; -CREATE TABLE IF NOT EXISTS "public"."DiscourseSpace" ( - "id" bigint DEFAULT "nextval"('"public"."entity_id_seq"'::"regclass") NOT NULL, - "url" character varying, - "name" character varying NOT NULL, - "discourse_platform_id" bigint NOT NULL +CREATE TABLE IF NOT EXISTS public."DiscourseSpace" ( + id bigint DEFAULT nextval( + 'public."entity_id_seq"'::regclass + ) NOT NULL, + url character varying, + name character varying NOT NULL, + discourse_platform_id bigint NOT NULL ); -ALTER TABLE "public"."DiscourseSpace" OWNER TO "postgres"; +ALTER TABLE public."DiscourseSpace" OWNER TO "postgres"; -COMMENT ON TABLE "public"."DiscourseSpace" IS 'A space on a discourse platform representing a community engaged in a conversation'; +COMMENT ON TABLE public."DiscourseSpace" IS +'A space on a discourse platform representing a community engaged in a conversation'; -ALTER TABLE ONLY "public"."DiscoursePlatform" - ADD CONSTRAINT "DiscoursePlatform_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."DiscoursePlatform" +ADD CONSTRAINT "DiscoursePlatform_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY "public"."DiscourseSpace" - ADD CONSTRAINT "DiscourseSpace_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public."DiscourseSpace" +ADD CONSTRAINT "DiscourseSpace_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY "public"."DiscourseSpace" - ADD CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" FOREIGN KEY ("discourse_platform_id") REFERENCES "public"."DiscoursePlatform"("id") ON UPDATE CASCADE ON DELETE CASCADE; +ALTER TABLE ONLY public."DiscourseSpace" +ADD CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" FOREIGN KEY ( + discourse_platform_id +) REFERENCES public."DiscoursePlatform" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; -GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "anon"; -GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "authenticated"; -GRANT ALL ON TABLE "public"."DiscoursePlatform" TO "service_role"; +GRANT ALL ON TABLE public."DiscoursePlatform" TO anon; +GRANT ALL ON TABLE public."DiscoursePlatform" TO authenticated; +GRANT ALL ON TABLE public."DiscoursePlatform" TO service_role; -GRANT ALL ON TABLE "public"."DiscourseSpace" TO "anon"; -GRANT ALL ON TABLE "public"."DiscourseSpace" TO "authenticated"; -GRANT ALL ON TABLE "public"."DiscourseSpace" TO "service_role"; +GRANT ALL ON TABLE public."DiscourseSpace" TO anon; +GRANT ALL ON TABLE public."DiscourseSpace" TO authenticated; +GRANT ALL ON TABLE public."DiscourseSpace" TO service_role; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql index a967f4d09..7dbf96c58 100644 --- a/packages/database/supabase/schemas/sync.sql +++ b/packages/database/supabase/schemas/sync.sql @@ -1,51 +1,59 @@ - -CREATE TYPE "public"."task_status" AS ENUM ( +CREATE TYPE public.task_status AS ENUM ( 'active', 'timeout', 'complete', 'failed' ); -ALTER TYPE "public"."task_status" OWNER TO "postgres"; - -CREATE TABLE IF NOT EXISTS "public"."sync_info" ( - "id" integer NOT NULL, - "sync_target" bigint, - "sync_function" character varying(20), - "status" "public"."task_status" DEFAULT 'active'::"public"."task_status", - "worker" character varying(100) NOT NULL, - "failure_count" smallint DEFAULT 0, - "last_task_start" timestamp with time zone, - "last_task_end" timestamp with time zone, - "task_times_out_at" timestamp with time zone +ALTER TYPE public.task_status OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS public.sync_info ( + id integer NOT NULL, + sync_target bigint, + sync_function character varying(20), + status public.task_status DEFAULT 'active'::public.task_status, + worker character varying(100) NOT NULL, + failure_count smallint DEFAULT 0, + last_task_start timestamp with time zone, + last_task_end timestamp with time zone, + task_times_out_at timestamp with time zone ); -ALTER TABLE "public"."sync_info" OWNER TO "postgres"; +ALTER TABLE public.sync_info OWNER TO "postgres"; -CREATE SEQUENCE IF NOT EXISTS "public"."sync_info_id_seq" - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; +CREATE SEQUENCE IF NOT EXISTS public.sync_info_id_seq +AS integer +START WITH 1 +INCREMENT BY 1 +NO MINVALUE +NO MAXVALUE +CACHE 1; -ALTER TABLE "public"."sync_info_id_seq" OWNER TO "postgres"; +ALTER TABLE public.sync_info_id_seq OWNER TO "postgres"; -ALTER SEQUENCE "public"."sync_info_id_seq" OWNED BY "public"."sync_info"."id"; +ALTER SEQUENCE public.sync_info_id_seq OWNED BY public.sync_info.id; -ALTER TABLE ONLY "public"."sync_info" ALTER COLUMN "id" SET DEFAULT "nextval"('"public"."sync_info_id_seq"'::"regclass"); +ALTER TABLE ONLY public.sync_info ALTER COLUMN id SET DEFAULT nextval( + 'public.sync_info_id_seq'::regclass +); -ALTER TABLE ONLY "public"."sync_info" - ADD CONSTRAINT "sync_info_pkey" PRIMARY KEY ("id"); +ALTER TABLE ONLY public.sync_info +ADD CONSTRAINT sync_info_pkey PRIMARY KEY (id); -CREATE UNIQUE INDEX "sync_info_u_idx" ON "public"."sync_info" USING "btree" ("sync_target", "sync_function"); +CREATE UNIQUE INDEX sync_info_u_idx ON public.sync_info USING btree ( + "sync_target", sync_function +); -CREATE OR REPLACE FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") RETURNS "void" - LANGUAGE "plpgsql" - AS $$ +CREATE OR REPLACE FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) RETURNS void +LANGUAGE plpgsql +AS $$ DECLARE t_id INTEGER; DECLARE t_worker varchar; DECLARE t_status task_status; @@ -76,12 +84,23 @@ BEGIN END; $$; -ALTER FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") OWNER TO "postgres"; - - -CREATE OR REPLACE FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) RETURNS interval - LANGUAGE "plpgsql" - AS $$ +ALTER FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) OWNER TO "postgres"; + + +CREATE OR REPLACE FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) RETURNS interval +LANGUAGE plpgsql +AS $$ DECLARE s_id INTEGER; DECLARE start_time TIMESTAMP WITH TIME ZONE; DECLARE t_status task_status; @@ -138,23 +157,62 @@ BEGIN END; $$; -ALTER FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) OWNER TO "postgres"; - -GRANT ALL ON TABLE "public"."sync_info" TO "anon"; -GRANT ALL ON TABLE "public"."sync_info" TO "authenticated"; -GRANT ALL ON TABLE "public"."sync_info" TO "service_role"; - -GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "anon"; -GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "authenticated"; -GRANT ALL ON SEQUENCE "public"."sync_info_id_seq" TO "service_role"; - -GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "anon"; -GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "authenticated"; -GRANT ALL ON FUNCTION "public"."end_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "s_status" "public"."task_status") TO "service_role"; - -GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "anon"; -GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "authenticated"; -GRANT ALL ON FUNCTION "public"."propose_sync_task"("s_target" bigint, "s_function" character varying, "s_worker" character varying, "timeout" interval, "task_interval" interval) TO "service_role"; +ALTER FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) OWNER TO "postgres"; + +GRANT ALL ON TABLE public.sync_info TO "anon"; +GRANT ALL ON TABLE public.sync_info TO "authenticated"; +GRANT ALL ON TABLE public.sync_info TO "service_role"; + +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "anon"; +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "authenticated"; +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "service_role"; + +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "anon"; +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "authenticated"; +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "service_role"; + +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "anon"; +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "authenticated"; +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "service_role"; RESET ALL; From 06a5a2b055819c56e764b4d50c39fe412d8f4b86 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 14:01:59 -0400 Subject: [PATCH 20/40] build step --- packages/database/package.json | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/database/package.json b/packages/database/package.json index 5f62f4fb4..c52efc6cd 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -11,11 +11,13 @@ "init": "supabase login", "dev": "supabase start", "stop": "supabase stop", + "build": "npm run lint && npm run gentypes:local && npm run dbdiff", "lint": "sqruff lint supabase/schemas", "lint:fix": "sqruff fix supabase/schemas", - "local-types": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", - "active-types": "supabase start && supabase gen types typescript --project-id \"$PRODUCTION_PROJECT_ID\" --schema public > types.gen.ts", - "new-migration": "supabase stop && supabase db diff -f ", + "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", + "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$PRODUCTION_PROJECT_ID\" --schema public > types.gen.ts", + "dbdiff": "supabase stop && supabase db diff", + "dbdiff:save": "supabase stop && supabase db diff -f ", "deploy": "supabase db push --project-id \"$PRODUCTION_PROJECT_ID\"" }, "devDependencies": { From 3ba9b11da6dea992d3a65154b13fef452a0419bc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 14:08:45 -0400 Subject: [PATCH 21/40] use combined step in README --- packages/database/README.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/packages/database/README.md b/packages/database/README.md index 20a9e2c86..416c24a4b 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -8,18 +8,19 @@ All CLI commands below should be run in this directory (`packages/database`.) 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) 5. Install [sqruff](https://github.com/quarylabs/sqruff) 2. Usage: - 1. Use `supabase start` before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. + 1. Use `turbo dev`, (alias for `supabase start`) before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. 2. You may need to `supabase db pull` if changes are deployed while you work. - 3. End you work session with `supabase end` to free docker resources. + 3. End you work session with `npm run stop` (alias for `supabase end`) to free docker resources. 3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. 1. Assuming you're working on a feature branch. - 2. `supabase stop` if it's running. - 3. Make changes to the schema, by editing files in `project/database/supabase/schemas` - 4. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. - 5. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas` - 6. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` - 7. `supabase db diff -f some_meaningful_migration_name` - 8. If applying the new schema fails, repeat steps 2 to 7 - 9. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. - 10. You can start using your changes `supabase start` + 2. Make changes to the schema, by editing files in `project/database/supabase/schemas` + 3. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. + 4. `turbo build`, which will do the following: + 1. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas` + 2. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` + 3. See if there would be a migration to apply with `supabase db diff` + 5. If applying the new schema fails, repeat step 4 + 6. If you are satisfied with the migration, create a migration file with `npm run dbdiff:save some_meaningful_migration_name` + 1. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. + 10. You can start using your changes again `turbo dev` 11. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) From 572c24da3b602cd6a3503fb28d733d69448f4bfc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 14:16:01 -0400 Subject: [PATCH 22/40] gh workflow wip --- .github/workflows/database-deploy.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/database-deploy.yaml diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml new file mode 100644 index 000000000..9c1f2fa60 --- /dev/null +++ b/.github/workflows/database-deploy.yaml @@ -0,0 +1,22 @@ +name: Supabase deploy Function +on: + push: + branches: + - main + workflow_dispatch: +jobs: + deploy: + if: false + runs-on: ubuntu-latest + env: + SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }} + PROJECT_ID: plzkmulvmuhdpcmnyrhm + steps: + - uses: actions/checkout@v4 + - uses: supabase/setup-cli@v1 + with: + version: latest + - run: | + cd projects/database + supabase functions deploy --project-ref ${{ secrets.PRODUCTION_PROJECT_ID }} + supabase push --project-ref ${{ secrets.PRODUCTION_PROJECT_ID }} From 1f1fea074c4cf398bfe25cdd1221a49f956d1b3f Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 17 May 2025 21:13:09 -0400 Subject: [PATCH 23/40] copy generated types where they are needed --- packages/database/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/database/package.json b/packages/database/package.json index c52efc6cd..b2e8cbe9f 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -11,7 +11,7 @@ "init": "supabase login", "dev": "supabase start", "stop": "supabase stop", - "build": "npm run lint && npm run gentypes:local && npm run dbdiff", + "build": "npm run lint && npm run gentypes:local && cp ./types.gen.ts ../../apps/website/app/utils/supabase && npm run dbdiff", "lint": "sqruff lint supabase/schemas", "lint:fix": "sqruff fix supabase/schemas", "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", From 497bdf5a2c1c9dc00590fe0662c74ded4105a0a2 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 16:13:41 -0400 Subject: [PATCH 24/40] test database-deploy --- .github/workflows/database-deploy.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 9c1f2fa60..3e6bbee10 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -6,7 +6,6 @@ on: workflow_dispatch: jobs: deploy: - if: false runs-on: ubuntu-latest env: SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }} @@ -18,5 +17,5 @@ jobs: version: latest - run: | cd projects/database - supabase functions deploy --project-ref ${{ secrets.PRODUCTION_PROJECT_ID }} - supabase push --project-ref ${{ secrets.PRODUCTION_PROJECT_ID }} + supabase functions deploy --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} + supabase push --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} From 6438e50e55013963140892b7c8e6f1c362f89906 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 16:22:23 -0400 Subject: [PATCH 25/40] test database-deploy before merge --- .github/workflows/database-deploy.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 3e6bbee10..4fd32bb24 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -2,8 +2,7 @@ name: Supabase deploy Function on: push: branches: - - main - workflow_dispatch: + - feature/supabase jobs: deploy: runs-on: ubuntu-latest From db445e25b03620c657c4d7259ead425e79c166e8 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 16:25:29 -0400 Subject: [PATCH 26/40] test database-deploy 2 --- .github/workflows/database-deploy.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 4fd32bb24..5bb80787e 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -1,8 +1,11 @@ name: Supabase deploy Function on: + workflow_dispatch: push: branches: - feature/supabase + paths: + - "projects/database" jobs: deploy: runs-on: ubuntu-latest @@ -15,6 +18,5 @@ jobs: with: version: latest - run: | - cd projects/database supabase functions deploy --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} supabase push --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} From 664469de657a18354b1759fa2cdb395207976928 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 17:00:23 -0400 Subject: [PATCH 27/40] repair workflow --- .github/workflows/database-deploy.yaml | 9 +++------ packages/database/package.json | 5 +++-- turbo.json | 5 ++++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 5bb80787e..f03721297 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -4,19 +4,16 @@ on: push: branches: - feature/supabase - paths: - - "projects/database" jobs: deploy: runs-on: ubuntu-latest env: SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }} - PROJECT_ID: plzkmulvmuhdpcmnyrhm + SUPABASE_PROJECT_ID: ${{ secrets.SUPABASE_PROJECT_ID_PROD }} + SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD_PROD }} steps: - uses: actions/checkout@v4 - uses: supabase/setup-cli@v1 with: version: latest - - run: | - supabase functions deploy --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} - supabase push --project-ref ${{ secrets.SUPABASE_PROJECT_ID_PROD }} + - run: turbo deploy -F @repo/database diff --git a/packages/database/package.json b/packages/database/package.json index b2e8cbe9f..f91951824 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -15,10 +15,11 @@ "lint": "sqruff lint supabase/schemas", "lint:fix": "sqruff fix supabase/schemas", "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", - "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$PRODUCTION_PROJECT_ID\" --schema public > types.gen.ts", + "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$SUPABASE_PROJECT_ID\" --schema public > types.gen.ts", "dbdiff": "supabase stop && supabase db diff", "dbdiff:save": "supabase stop && supabase db diff -f ", - "deploy": "supabase db push --project-id \"$PRODUCTION_PROJECT_ID\"" + "deploy": "echo \"$SUPABASE_PROJECT_ID\" && supabase link --project-ref \"$SUPABASE_PROJECT_ID\" --password \"$SUPABASE_DB_PASSWORD\" && supabase db push", + "deploy:functions": "supabase functions deploy --project-ref \"$SUPABASE_PROJECT_ID\"" }, "devDependencies": { "supabase": "^2.22.12", diff --git a/turbo.json b/turbo.json index 45f57fbdd..c52dabc35 100644 --- a/turbo.json +++ b/turbo.json @@ -32,7 +32,10 @@ "passThroughEnv": [ "BLOB_READ_WRITE_TOKEN", "GITHUB_REF_NAME", - "GITHUB_HEAD_REF" + "GITHUB_HEAD_REF", + "SUPABASE_PROJECT_ID", + "SUPABASE_DB_PASSWORD", + "SUPABASE_ACCESS_TOKEN" ] }, "publish": { From e0c21889644b2570e59e57e7cd6436c98b62aad9 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 20:08:13 -0400 Subject: [PATCH 28/40] repair workflow 2 --- .github/workflows/database-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index f03721297..8008e9d7b 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -16,4 +16,4 @@ jobs: - uses: supabase/setup-cli@v1 with: version: latest - - run: turbo deploy -F @repo/database + - run: npx turbo deploy -F @repo/database From 6a1ad6b46d3b0fa3a2473e59fff7bac013d601dc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 20:10:15 -0400 Subject: [PATCH 29/40] workflow works, now applies to main --- .github/workflows/database-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 8008e9d7b..7e6a994cc 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -3,7 +3,7 @@ on: workflow_dispatch: push: branches: - - feature/supabase + - main jobs: deploy: runs-on: ubuntu-latest From 139b3d91c9e399ac2eeab115a4acfe36b9a1cc9a Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Mon, 19 May 2025 23:34:08 -0400 Subject: [PATCH 30/40] resolve some coderabbit issues --- .github/workflows/database-deploy.yaml | 4 ++++ packages/database/README.md | 2 +- packages/database/schema.puml | 6 +----- packages/database/supabase/schemas/agent.sql | 3 +++ packages/database/supabase/schemas/base.sql | 2 +- packages/database/supabase/schemas/concept.sql | 3 --- packages/database/supabase/schemas/extensions.sql | 4 ++++ 7 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml index 7e6a994cc..e9f7faa2b 100644 --- a/.github/workflows/database-deploy.yaml +++ b/.github/workflows/database-deploy.yaml @@ -13,6 +13,10 @@ jobs: SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD_PROD }} steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v3 + with: + node-version: "20" + - run: npm ci - uses: supabase/setup-cli@v1 with: version: latest diff --git a/packages/database/README.md b/packages/database/README.md index 416c24a4b..0b5b489e0 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -13,7 +13,7 @@ All CLI commands below should be run in this directory (`packages/database`.) 3. End you work session with `npm run stop` (alias for `supabase end`) to free docker resources. 3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. 1. Assuming you're working on a feature branch. - 2. Make changes to the schema, by editing files in `project/database/supabase/schemas` + 2. Make changes to the schema, by editing files in `packages/database/supabase/schemas` 3. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. 4. `turbo build`, which will do the following: 1. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas` diff --git a/packages/database/schema.puml b/packages/database/schema.puml index 9a7d0c610..2d84ca665 100644 --- a/packages/database/schema.puml +++ b/packages/database/schema.puml @@ -21,7 +21,6 @@ class "Document" [[{None}]] { {field} id : integer {field} source_local_id : string {field} url : string - {field} last_synced : datetime {field} created : datetime {field} metadata : JSON {field} last_modified : datetime @@ -39,7 +38,6 @@ class "Content" [[{A unit of content}]] { {field} metadata : JSON {field} scale : Scale {field} last_modified : datetime - {field} last_synced : datetime } "Document" --> "0..*" "Agent" : "contributors" "Document" --> "1" "Agent" : "author" @@ -57,7 +55,6 @@ class "Concept" [[{An abstract concept, claim or relation}]] { {field} description : string {field} created : datetime {field} last_modified : datetime - {field} last_synced : datetime {field} arity : integer {field} content : JSON {field} is_schema : boolean @@ -72,11 +69,11 @@ class "ContentEmbedding" [[{None}]] { {field} obsolete : boolean } "ContentEmbedding" --> "1" "Content" : "target" -"Content" --> "0..1" "Concept" : "represents" "Content" --> "0..1" "Content" : "part_of" "Content" --> "0..*" "Agent" : "contributors" "Content" --> "1" "Agent" : "creator" "Content" --> "1" "Agent" : "author" +"Concept" --> "0..1" "Content" : "represented_by" class "ConceptSchema" [[{None}]] { {field} id(i) : integer {field} epistemic_status(i) : EpistemicStatus @@ -84,7 +81,6 @@ class "ConceptSchema" [[{None}]] { {field} description(i) : string {field} created(i) : datetime {field} last_modified(i) : datetime - {field} last_synced(i) : datetime {field} arity(i) : integer {field} content(i) : JSON {field} is_schema(i) : boolean diff --git a/packages/database/supabase/schemas/agent.sql b/packages/database/supabase/schemas/agent.sql index 465900ac8..ecbf3f511 100644 --- a/packages/database/supabase/schemas/agent.sql +++ b/packages/database/supabase/schemas/agent.sql @@ -41,6 +41,9 @@ CREATE TABLE IF NOT EXISTS public."Person" ( email character varying NOT NULL ); +ALTER TABLE ONLY public."Person" +ADD CONSTRAINT "Person_pkey" PRIMARY KEY (id); + ALTER TABLE ONLY public."Person" ADD CONSTRAINT person_id_fkey FOREIGN KEY ( id diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql index e66946bdd..64947f9e2 100644 --- a/packages/database/supabase/schemas/base.sql +++ b/packages/database/supabase/schemas/base.sql @@ -44,7 +44,7 @@ NO MINVALUE NO MAXVALUE CACHE 1; -ALTER TABLE public.entity_id_seq OWNER TO "postgres"; +ALTER SEQUENCE public.entity_id_seq OWNER TO "postgres"; ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO postgres; diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql index 8ec68744d..425208b65 100644 --- a/packages/database/supabase/schemas/concept.sql +++ b/packages/database/supabase/schemas/concept.sql @@ -52,9 +52,6 @@ ADD FOREIGN KEY (represented_by_id) REFERENCES public."Content" ( id ) ON DELETE SET NULL ON UPDATE CASCADE; -ALTER TABLE ONLY public."Person" -ADD CONSTRAINT "Person_pkey" PRIMARY KEY (id); - CREATE INDEX "Concept_content" ON public."Concept" USING gin ( content jsonb_path_ops ); diff --git a/packages/database/supabase/schemas/extensions.sql b/packages/database/supabase/schemas/extensions.sql index 60450e1e2..83ccd84bb 100644 --- a/packages/database/supabase/schemas/extensions.sql +++ b/packages/database/supabase/schemas/extensions.sql @@ -1,3 +1,7 @@ +CREATE SCHEMA IF NOT EXISTS extensions; +CREATE SCHEMA IF NOT EXISTS graphql; +CREATE SCHEMA IF NOT EXISTS vault; + CREATE EXTENSION IF NOT EXISTS pg_cron WITH SCHEMA pg_catalog; CREATE EXTENSION IF NOT EXISTS pgroonga WITH SCHEMA extensions; CREATE EXTENSION IF NOT EXISTS pg_graphql WITH SCHEMA graphql; From 608f7efc632a057e6c3aff3fcd1c80f78eabcc4e Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 20 May 2025 10:01:53 -0400 Subject: [PATCH 31/40] db pull; correct a bug pointed to by coderabbit; comply to supabase hallucinated migration. --- ...0520132747_restrict_search_by_document.sql | 48 +++++++++++++++++++ .../20250520133551_nodes_needing_sync.sql | 38 +++++++++++++++ .../database/supabase/schemas/embedding.sql | 1 + packages/database/supabase/schemas/sync.sql | 42 ++++++++++++++++ 4 files changed, 129 insertions(+) create mode 100644 packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql create mode 100644 packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql diff --git a/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql new file mode 100644 index 000000000..1e9b32a95 --- /dev/null +++ b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql @@ -0,0 +1,48 @@ +CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN public."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE + AND (current_document_id IS NULL OR c.document_id = current_document_id) +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$function$; + +-- Supabase wants to replace this function for no obvious reason. Letting it. + +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[]) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM public."Content" AS c + JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$function$; diff --git a/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql new file mode 100644 index 000000000..ec23fd887 --- /dev/null +++ b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql @@ -0,0 +1,38 @@ +CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) + RETURNS TABLE(uid_to_sync text) + LANGUAGE plpgsql +AS $function$ + DECLARE + node_info jsonb; + roam_node_uid TEXT; + roam_node_edit_epoch_ms BIGINT; + content_db_last_modified_epoch_ms BIGINT; + BEGIN + FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam) + LOOP + roam_node_uid := (node_info->>'uid')::text; + roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint; + + -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds + -- Assumes your 'last_modified' column in 'Content' is a timestamp type + SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000 + INTO content_db_last_modified_epoch_ms + FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted) + WHERE c.source_local_id = roam_node_uid; + + IF NOT FOUND THEN + -- Node does not exist in Supabase Content table, so it needs sync + uid_to_sync := roam_node_uid; + RETURN NEXT; + ELSE + -- Node exists, compare timestamps + IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN + uid_to_sync := roam_node_uid; + RETURN NEXT; + END IF; + END IF; + END LOOP; + RETURN; + END; + $function$ +; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index bd5b4451d..dfc15e626 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -51,6 +51,7 @@ FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce JOIN public."Content" AS c ON ce.target_id = c.id WHERE 1 - (ce.vector <=> query_embedding) > match_threshold AND ce.obsolete = FALSE + AND (current_document_id IS NULL OR c.document_id = current_document_id) ORDER BY ce.vector <=> query_embedding ASC LIMIT match_count; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql index 7dbf96c58..edebe7a06 100644 --- a/packages/database/supabase/schemas/sync.sql +++ b/packages/database/supabase/schemas/sync.sql @@ -165,6 +165,45 @@ ALTER FUNCTION public.propose_sync_task( "task_interval" interval ) OWNER TO "postgres"; +CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) +RETURNS TABLE (uid_to_sync text) +LANGUAGE plpgsql +AS $function$ + DECLARE + node_info jsonb; + roam_node_uid TEXT; + roam_node_edit_epoch_ms BIGINT; + content_db_last_modified_epoch_ms BIGINT; + BEGIN + FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam) + LOOP + roam_node_uid := (node_info->>'uid')::text; + roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint; + + -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds + -- Assumes your 'last_modified' column in 'Content' is a timestamp type + SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000 + INTO content_db_last_modified_epoch_ms + FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted) + WHERE c.source_local_id = roam_node_uid; + + IF NOT FOUND THEN + -- Node does not exist in Supabase Content table, so it needs sync + uid_to_sync := roam_node_uid; + RETURN NEXT; + ELSE + -- Node exists, compare timestamps + IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN + uid_to_sync := roam_node_uid; + RETURN NEXT; + END IF; + END IF; + END LOOP; + RETURN; + END; + $function$ +; + GRANT ALL ON TABLE public.sync_info TO "anon"; GRANT ALL ON TABLE public.sync_info TO "authenticated"; GRANT ALL ON TABLE public.sync_info TO "service_role"; @@ -214,5 +253,8 @@ GRANT ALL ON FUNCTION public.propose_sync_task( "task_interval" interval ) TO "service_role"; +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "anon"; +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "authenticated"; +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "service_role"; RESET ALL; From 4718d5c18abcdc9e6a55b32fafb037f3386442d9 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 20 May 2025 10:07:00 -0400 Subject: [PATCH 32/40] Forgot AutomatedAgent name --- packages/database/schema.puml | 1 + packages/database/schema.svg | 2 +- packages/database/schema.yaml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/database/schema.puml b/packages/database/schema.puml index 2d84ca665..cf284d078 100644 --- a/packages/database/schema.puml +++ b/packages/database/schema.puml @@ -98,6 +98,7 @@ class "Person" [[{A person using the system}]] { } class "AutomatedAgent" [[{An automated agent}]] { {field} metadata : JSON + {field} name : string {field} deterministic : boolean {field} version : string {field} id(i) : integer diff --git a/packages/database/schema.svg b/packages/database/schema.svg index 4d9b1aefb..70740d73f 100644 --- a/packages/database/schema.svg +++ b/packages/database/schema.svg @@ -1 +1 @@ -SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanDiscourseSpaceid : integerurl : stringname : stringDocumentid : integersource_local_id : stringurl : stringlast_synced : datetimecreated : datetimemetadata : JSONlast_modified : datetimecontents : blobAgentid : integertype : EntityTypeContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimelast_synced : datetimeDiscoursePlatformid : integername : stringurl : stringConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimelast_synced : datetimearity : integercontent : JSONis_schema : booleanContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimelast_synced(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1contributors0..*author1space0..1document1discourse_platform1space0..1space0..1platform1target1represents0..1part_of0..1contributors0..*creator1author1schema1contributors0..*author1person1 \ No newline at end of file +SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanDiscourseSpaceid : integerurl : stringname : stringDocumentid : integersource_local_id : stringurl : stringcreated : datetimemetadata : JSONlast_modified : datetimecontents : blobAgentid : integertype : EntityTypeContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimeDiscoursePlatformid : integername : stringurl : stringConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimearity : integercontent : JSONis_schema : booleanContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONname : stringdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1contributors0..*author1space0..1document1discourse_platform1space0..1space0..1platform1target1part_of0..1contributors0..*creator1author1represented_by0..1schema1contributors0..*author1person1 \ No newline at end of file diff --git a/packages/database/schema.yaml b/packages/database/schema.yaml index 1efec9b0a..ed5cd3bc5 100644 --- a/packages/database/schema.yaml +++ b/packages/database/schema.yaml @@ -125,6 +125,7 @@ classes: is_a: Agent slots: - metadata + - name attributes: deterministic: range: boolean From bbc709958958b34e033924a3f6107190fbd5d037 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Tue, 20 May 2025 11:49:15 -0400 Subject: [PATCH 33/40] clean redirect urls --- packages/database/supabase/config.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/database/supabase/config.toml b/packages/database/supabase/config.toml index c443a628a..ca2dc47aa 100644 --- a/packages/database/supabase/config.toml +++ b/packages/database/supabase/config.toml @@ -120,10 +120,8 @@ enabled = true site_url = "https://discourse-graph-discourse-graphs.vercel.app/" # A list of *exact* URLs that auth providers are permitted to redirect to post authentication. additional_redirect_urls = [ - "https://discourse-graph-discourse-graphs.vercel.app/", "https://discourse-graph-discourse-graphs.vercel.app/**", - "https://discourse-*-graph-discourse-graphs.vercel.app", - "https://discourse-*-graph-discourse-graphs.vercel.app/**", + "https://discourse-graph-*-discourse-graphs.vercel.app/**", ] # How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week). jwt_expiry = 3600 From 3ea3b170f9d31b3ffaf20c629b9311dfd1799b01 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 15:59:01 -0400 Subject: [PATCH 34/40] Rename DiscoursePlatform and DiscourseSpace without Discourse --- packages/database/schema.puml | 46 +++++++++---------- packages/database/schema.svg | 2 +- packages/database/schema.yaml | 16 +++---- .../20250522193823_rename_discourse_space.sql | 11 +++++ .../database/supabase/schemas/account.sql | 6 +-- .../database/supabase/schemas/concept.sql | 2 +- .../database/supabase/schemas/content.sql | 4 +- packages/database/supabase/schemas/space.sql | 44 +++++++++--------- 8 files changed, 71 insertions(+), 60 deletions(-) create mode 100644 packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql diff --git a/packages/database/schema.puml b/packages/database/schema.puml index cf284d078..914fc6e49 100644 --- a/packages/database/schema.puml +++ b/packages/database/schema.puml @@ -5,30 +5,22 @@ hide empty members class "SpaceAccess" [[{An access control entry for a space}]] { {field} editor : boolean } -class "Account" [[{A user account on a discourse platform}]] { +class "Account" [[{A user account on a platform}]] { {field} id : integer {field} write_permission : boolean {field} active : boolean } -class "DiscourseSpace" [[{A space on a discourse platform representing a community engaged in a conversation}]] { +class "Space" [[{A space on a platform representing a community engaged in a conversation}]] { {field} id : integer {field} url : string {field} name : string } "SpaceAccess" --> "1" "Account" : "account" -"SpaceAccess" --> "0..1" "DiscourseSpace" : "space" -class "Document" [[{None}]] { +"SpaceAccess" --> "0..1" "Space" : "space" +class "Platform" [[{A data platform where discourse happens}]] { {field} id : integer - {field} source_local_id : string + {field} name : string {field} url : string - {field} created : datetime - {field} metadata : JSON - {field} last_modified : datetime - {field} contents : blob -} -abstract "Agent" [[{An agent that acts in the system}]] { - {field} id : integer - {field} type : EntityType } class "Content" [[{A unit of content}]] { {field} id : integer @@ -39,14 +31,14 @@ class "Content" [[{A unit of content}]] { {field} scale : Scale {field} last_modified : datetime } -"Document" --> "0..*" "Agent" : "contributors" -"Document" --> "1" "Agent" : "author" -"Document" --> "0..1" "DiscourseSpace" : "space" -"Content" --> "1" "Document" : "document" -class "DiscoursePlatform" [[{A data platform where discourse happens}]] { +class "Document" [[{None}]] { {field} id : integer - {field} name : string + {field} source_local_id : string {field} url : string + {field} created : datetime + {field} metadata : JSON + {field} last_modified : datetime + {field} contents : blob } class "Concept" [[{An abstract concept, claim or relation}]] { {field} id : integer @@ -59,10 +51,18 @@ class "Concept" [[{An abstract concept, claim or relation}]] { {field} content : JSON {field} is_schema : boolean } -"DiscourseSpace" --> "1" "DiscoursePlatform" : "discourse_platform" -"Content" --> "0..1" "DiscourseSpace" : "space" -"Concept" --> "0..1" "DiscourseSpace" : "space" -"Account" --> "1" "DiscoursePlatform" : "platform" +"Space" --> "1" "Platform" : "platform" +"Content" --> "0..1" "Space" : "space" +"Document" --> "0..1" "Space" : "space" +"Concept" --> "0..1" "Space" : "space" +"Account" --> "1" "Platform" : "platform" +abstract "Agent" [[{An agent that acts in the system}]] { + {field} id : integer + {field} type : EntityType +} +"Document" --> "0..*" "Agent" : "contributors" +"Document" --> "1" "Agent" : "author" +"Content" --> "1" "Document" : "document" class "ContentEmbedding" [[{None}]] { {field} model : EmbeddingName {field} vector : vector diff --git a/packages/database/schema.svg b/packages/database/schema.svg index 70740d73f..ab1d233e5 100644 --- a/packages/database/schema.svg +++ b/packages/database/schema.svg @@ -1 +1 @@ -SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanDiscourseSpaceid : integerurl : stringname : stringDocumentid : integersource_local_id : stringurl : stringcreated : datetimemetadata : JSONlast_modified : datetimecontents : blobAgentid : integertype : EntityTypeContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimeDiscoursePlatformid : integername : stringurl : stringConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimearity : integercontent : JSONis_schema : booleanContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONname : stringdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1contributors0..*author1space0..1document1discourse_platform1space0..1space0..1platform1target1part_of0..1contributors0..*creator1author1represented_by0..1schema1contributors0..*author1person1 \ No newline at end of file +SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanSpaceid : integerurl : stringname : stringPlatformid : integername : stringurl : stringContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimeDocumentid : integersource_local_id : stringurl : stringcreated : datetimemetadata : JSONlast_modified : datetimecontents : blobConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimearity : integercontent : JSONis_schema : booleanAgentid : integertype : EntityTypeContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONname : stringdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1platform1space0..1space0..1space0..1platform1contributors0..*author1document1target1part_of0..1contributors0..*creator1author1represented_by0..1schema1contributors0..*author1person1 \ No newline at end of file diff --git a/packages/database/schema.yaml b/packages/database/schema.yaml index ed5cd3bc5..363f9ff8b 100644 --- a/packages/database/schema.yaml +++ b/packages/database/schema.yaml @@ -133,7 +133,7 @@ classes: version: range: string - DiscoursePlatform: + Platform: description: A data platform where discourse happens slots: - id @@ -142,7 +142,7 @@ classes: url: required: true Account: - description: A user account on a discourse platform + description: A user account on a platform slots: - id - platform @@ -157,15 +157,15 @@ classes: range: boolean required: true ifabsent: true - DiscourseSpace: - description: A space on a discourse platform representing a community engaged in a conversation + Space: + description: A space on a platform representing a community engaged in a conversation slots: - id - url - name attributes: - discourse_platform: - range: DiscoursePlatform + platform: + range: Platform required: true SpaceAccess: description: An access control entry for a space @@ -380,7 +380,7 @@ slots: url: range: string platform: - range: DiscoursePlatform + range: Platform required: true issn: abstract: @@ -406,7 +406,7 @@ slots: required: true ifabsent: EpistemicStatus(unknown) space: - range: DiscourseSpace + range: Space description: The space in which the content is located document: range: Document diff --git a/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql new file mode 100644 index 000000000..dcc2e7851 --- /dev/null +++ b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql @@ -0,0 +1,11 @@ +ALTER TABLE public."DiscoursePlatform" RENAME TO "Platform"; +ALTER TABLE public."Platform" RENAME CONSTRAINT "DiscoursePlatform_pkey" TO "Platform_pkey"; + +ALTER TABLE public."DiscourseSpace" RENAME TO "Space"; +ALTER TABLE public."Space" RENAME CONSTRAINT "DiscourseSpace_pkey" TO "Space_pkey"; +ALTER TABLE public."Space" RENAME COLUMN discourse_platform_id TO platform_id; +ALTER TABLE PUBLIC."Space" RENAME CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" TO "Space_platform_id_fkey"; + +COMMENT ON TABLE public."Space" IS +'A space on a platform representing a community engaged in a conversation'; +COMMENT ON TABLE public."Account" IS 'A user account on a platform'; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql index 5059d12cb..9914188d9 100644 --- a/packages/database/supabase/schemas/account.sql +++ b/packages/database/supabase/schemas/account.sql @@ -10,7 +10,7 @@ CREATE TABLE IF NOT EXISTS public."Account" ( ALTER TABLE public."Account" OWNER TO "postgres"; -COMMENT ON TABLE public."Account" IS 'A user account on a discourse platform'; +COMMENT ON TABLE public."Account" IS 'A user account on a platform'; ALTER TABLE ONLY public."Account" @@ -21,7 +21,7 @@ ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY ( ALTER TABLE ONLY public."Account" ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY ( platform_id -) REFERENCES public."DiscoursePlatform" ( +) REFERENCES public."Platform" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; @@ -62,7 +62,7 @@ ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY ( ALTER TABLE ONLY public."SpaceAccess" ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY ( space_id -) REFERENCES public."DiscourseSpace" ( +) REFERENCES public."Space" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql index 425208b65..96afdfd82 100644 --- a/packages/database/supabase/schemas/concept.sql +++ b/packages/database/supabase/schemas/concept.sql @@ -78,7 +78,7 @@ ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY ( ALTER TABLE ONLY public."Concept" ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ( space_id -) REFERENCES public."DiscourseSpace" ( +) REFERENCES public."Space" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql index d9bced1b6..13478f5f8 100644 --- a/packages/database/supabase/schemas/content.sql +++ b/packages/database/supabase/schemas/content.sql @@ -38,7 +38,7 @@ ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY ( ALTER TABLE ONLY public."Document" ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY ( space_id -) REFERENCES public."DiscourseSpace" ( +) REFERENCES public."Space" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; @@ -100,7 +100,7 @@ ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY ( ALTER TABLE ONLY public."Content" ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY ( space_id -) REFERENCES public."DiscourseSpace" ( +) REFERENCES public."Space" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; diff --git a/packages/database/supabase/schemas/space.sql b/packages/database/supabase/schemas/space.sql index 72f228a4e..122cdd023 100644 --- a/packages/database/supabase/schemas/space.sql +++ b/packages/database/supabase/schemas/space.sql @@ -1,4 +1,4 @@ -CREATE TABLE IF NOT EXISTS public."DiscoursePlatform" ( +CREATE TABLE IF NOT EXISTS public."Platform" ( id bigint DEFAULT nextval( 'public."entity_id_seq"'::regclass ) NOT NULL, @@ -6,42 +6,42 @@ CREATE TABLE IF NOT EXISTS public."DiscoursePlatform" ( url character varying NOT NULL ); -ALTER TABLE public."DiscoursePlatform" OWNER TO "postgres"; +ALTER TABLE public."Platform" OWNER TO "postgres"; -COMMENT ON TABLE public."DiscoursePlatform" IS +COMMENT ON TABLE public."Platform" IS 'A data platform where discourse happens'; -CREATE TABLE IF NOT EXISTS public."DiscourseSpace" ( +CREATE TABLE IF NOT EXISTS public."Space" ( id bigint DEFAULT nextval( 'public."entity_id_seq"'::regclass ) NOT NULL, url character varying, name character varying NOT NULL, - discourse_platform_id bigint NOT NULL + platform_id bigint NOT NULL ); -ALTER TABLE public."DiscourseSpace" OWNER TO "postgres"; +ALTER TABLE public."Space" OWNER TO "postgres"; -COMMENT ON TABLE public."DiscourseSpace" IS -'A space on a discourse platform representing a community engaged in a conversation'; +COMMENT ON TABLE public."Space" IS +'A space on a platform representing a community engaged in a conversation'; -ALTER TABLE ONLY public."DiscoursePlatform" -ADD CONSTRAINT "DiscoursePlatform_pkey" PRIMARY KEY (id); +ALTER TABLE ONLY public."Platform" +ADD CONSTRAINT "Platform_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY public."DiscourseSpace" -ADD CONSTRAINT "DiscourseSpace_pkey" PRIMARY KEY (id); +ALTER TABLE ONLY public."Space" +ADD CONSTRAINT "Space_pkey" PRIMARY KEY (id); -ALTER TABLE ONLY public."DiscourseSpace" -ADD CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" FOREIGN KEY ( - discourse_platform_id -) REFERENCES public."DiscoursePlatform" ( +ALTER TABLE ONLY public."Space" +ADD CONSTRAINT "Space_platform_id_fkey" FOREIGN KEY ( + platform_id +) REFERENCES public."Platform" ( id ) ON UPDATE CASCADE ON DELETE CASCADE; -GRANT ALL ON TABLE public."DiscoursePlatform" TO anon; -GRANT ALL ON TABLE public."DiscoursePlatform" TO authenticated; -GRANT ALL ON TABLE public."DiscoursePlatform" TO service_role; +GRANT ALL ON TABLE public."Platform" TO anon; +GRANT ALL ON TABLE public."Platform" TO authenticated; +GRANT ALL ON TABLE public."Platform" TO service_role; -GRANT ALL ON TABLE public."DiscourseSpace" TO anon; -GRANT ALL ON TABLE public."DiscourseSpace" TO authenticated; -GRANT ALL ON TABLE public."DiscourseSpace" TO service_role; +GRANT ALL ON TABLE public."Space" TO anon; +GRANT ALL ON TABLE public."Space" TO authenticated; +GRANT ALL ON TABLE public."Space" TO service_role; From 9a441658eb4af4f19fcb6b01185d63396085c8dc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 18:05:18 -0400 Subject: [PATCH 35/40] tolerate absence of sqruff --- packages/database/package.json | 4 ++-- packages/database/scripts/lint.ts | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 packages/database/scripts/lint.ts diff --git a/packages/database/package.json b/packages/database/package.json index f91951824..309cbb411 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -12,8 +12,8 @@ "dev": "supabase start", "stop": "supabase stop", "build": "npm run lint && npm run gentypes:local && cp ./types.gen.ts ../../apps/website/app/utils/supabase && npm run dbdiff", - "lint": "sqruff lint supabase/schemas", - "lint:fix": "sqruff fix supabase/schemas", + "lint": "tsx scripts/lint.ts", + "lint:fix": "tsx scripts/lint.ts -f", "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$SUPABASE_PROJECT_ID\" --schema public > types.gen.ts", "dbdiff": "supabase stop && supabase db diff", diff --git a/packages/database/scripts/lint.ts b/packages/database/scripts/lint.ts new file mode 100644 index 000000000..0d8005806 --- /dev/null +++ b/packages/database/scripts/lint.ts @@ -0,0 +1,24 @@ +import { exec } from "node:child_process"; + +const main = () => { + try { + exec("which sqruff", (err, stdout, stderr) => { + if (err) { + console.error("Could not find sqruff, you may want to install it."); + // Fail gracefully + process.exit(0); + } + const command = + process.argv.length == 3 && process.argv[2] == "-f" ? "fix" : "lint"; + exec(`sqruff ${command} supabase/schemas`, {}, (err, stdout, stderr) => { + console.log(`${stdout}`); + console.log(`${stderr}`); + process.exit(err ? err.code : 0); + }); + }); + } catch (error) { + console.error("error:", error); + process.exit(1); + } +}; +if (import.meta.url === `file://${process.argv[1]}`) main(); From 489a1d5a3526435580a053e5e00e99e5bad472c8 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 18:42:16 -0400 Subject: [PATCH 36/40] Make deploy work only on main branch --- packages/database/package.json | 6 +-- packages/database/scripts/deploy.ts | 63 +++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 packages/database/scripts/deploy.ts diff --git a/packages/database/package.json b/packages/database/package.json index 309cbb411..1c857ebf6 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -17,9 +17,9 @@ "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$SUPABASE_PROJECT_ID\" --schema public > types.gen.ts", "dbdiff": "supabase stop && supabase db diff", - "dbdiff:save": "supabase stop && supabase db diff -f ", - "deploy": "echo \"$SUPABASE_PROJECT_ID\" && supabase link --project-ref \"$SUPABASE_PROJECT_ID\" --password \"$SUPABASE_DB_PASSWORD\" && supabase db push", - "deploy:functions": "supabase functions deploy --project-ref \"$SUPABASE_PROJECT_ID\"" + "dbdiff:save": "supabase stop && supabase db diff -f", + "deploy": "tsx scripts/deploy.ts", + "deploy:functions": "tsx scripts/lint.ts -f" }, "devDependencies": { "supabase": "^2.22.12", diff --git a/packages/database/scripts/deploy.ts b/packages/database/scripts/deploy.ts new file mode 100644 index 000000000..6fd6430f1 --- /dev/null +++ b/packages/database/scripts/deploy.ts @@ -0,0 +1,63 @@ +import { exec } from "node:child_process"; +import dotenv from "dotenv"; + +dotenv.config(); + +const main = () => { + try { + exec("git status -s -b", (err, stdout, stderr) => { + if (err) { + console.error("Is git installed?"); + process.exit(1); + } + const lines = stdout.split("\n"); + if (lines[0] != "## main...main") { + console.log("Not on main branch, not deploying database"); + process.exit(0); + } + const { SUPABASE_PROJECT_ID, SUPABASE_DB_PASSWORD } = process.env; + if (!SUPABASE_PROJECT_ID) { + console.log("Please define SUPABASE_PROJECT_ID"); + process.exit(1); + } + if (!SUPABASE_DB_PASSWORD) { + console.log("Please define SUPABASE_DB_PASSWORD"); + process.exit(1); + } + exec( + `supabase link --project-ref ${SUPABASE_PROJECT_ID} --password ${SUPABASE_DB_PASSWORD}`, + (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + exec("supabase db push", (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + if (process.argv.length == 3 && process.argv[2] == "-f") { + // Also push functions + exec( + `supabase functions deploy --project-ref ${SUPABASE_PROJECT_ID}`, + (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + }, + ); + } + }); + }, + ); + }); + } catch (error) { + console.error("error:", error); + process.exit(1); + } +}; +if (import.meta.url === `file://${process.argv[1]}`) main(); From 079b89aba74066901368a0682121c9f3ac20f1e6 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 19:41:39 -0400 Subject: [PATCH 37/40] Avoid putting passwords in logs. Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- packages/database/scripts/deploy.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/database/scripts/deploy.ts b/packages/database/scripts/deploy.ts index 6fd6430f1..24b63bee3 100644 --- a/packages/database/scripts/deploy.ts +++ b/packages/database/scripts/deploy.ts @@ -24,8 +24,10 @@ const main = () => { console.log("Please define SUPABASE_DB_PASSWORD"); process.exit(1); } + // Use environment variables that are already set instead of passing as arguments exec( - `supabase link --project-ref ${SUPABASE_PROJECT_ID} --password ${SUPABASE_DB_PASSWORD}`, + `supabase link --project-ref ${SUPABASE_PROJECT_ID}`, + { env: { ...process.env, SUPABASE_DB_PASSWORD } }, (err, stdout, stderr) => { console.log(`${stdout}`); console.error(`${stderr}`); From 84a185c311fae2f83d890b6fcf3a649363778139 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 19:46:40 -0400 Subject: [PATCH 38/40] minor changes to README --- packages/database/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/database/README.md b/packages/database/README.md index 0b5b489e0..fad141a43 100644 --- a/packages/database/README.md +++ b/packages/database/README.md @@ -10,7 +10,7 @@ All CLI commands below should be run in this directory (`packages/database`.) 2. Usage: 1. Use `turbo dev`, (alias for `supabase start`) before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. 2. You may need to `supabase db pull` if changes are deployed while you work. - 3. End you work session with `npm run stop` (alias for `supabase end`) to free docker resources. + 3. End you work session with `supabase end` to free docker resources. 3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. 1. Assuming you're working on a feature branch. 2. Make changes to the schema, by editing files in `packages/database/supabase/schemas` @@ -23,4 +23,3 @@ All CLI commands below should be run in this directory (`packages/database`.) 6. If you are satisfied with the migration, create a migration file with `npm run dbdiff:save some_meaningful_migration_name` 1. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. 10. You can start using your changes again `turbo dev` - 11. When your PR gets merged to main, deploy your changes to production with `supabase db push`. (URGENT TODO: make that a CI/CD step.) From 0d7ccda2200ebdcf8489f4e9a3ea48e7c2f25101 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 19:47:55 -0400 Subject: [PATCH 39/40] Also check for undeployed changes --- packages/database/scripts/deploy.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/database/scripts/deploy.ts b/packages/database/scripts/deploy.ts index 24b63bee3..a4af380be 100644 --- a/packages/database/scripts/deploy.ts +++ b/packages/database/scripts/deploy.ts @@ -5,7 +5,7 @@ dotenv.config(); const main = () => { try { - exec("git status -s -b", (err, stdout, stderr) => { + exec("git status -s -b -uno", (err, stdout, stderr) => { if (err) { console.error("Is git installed?"); process.exit(1); @@ -15,6 +15,12 @@ const main = () => { console.log("Not on main branch, not deploying database"); process.exit(0); } + if (lines.length > 1) { + console.log( + "You seem to have uncommitted changes, not deploying database", + ); + process.exit(0); + } const { SUPABASE_PROJECT_ID, SUPABASE_DB_PASSWORD } = process.env; if (!SUPABASE_PROJECT_ID) { console.log("Please define SUPABASE_PROJECT_ID"); From c71198f6cc02e25146000802f8046ebbf2605051 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 22 May 2025 19:57:41 -0400 Subject: [PATCH 40/40] Use RESET ALL where needed, not otherwise --- packages/database/supabase/schemas/concept.sql | 3 --- packages/database/supabase/schemas/embedding.sql | 2 +- packages/database/supabase/schemas/sync.sql | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql index 96afdfd82..dbbc3686e 100644 --- a/packages/database/supabase/schemas/concept.sql +++ b/packages/database/supabase/schemas/concept.sql @@ -86,6 +86,3 @@ ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ( GRANT ALL ON TABLE public."Concept" TO anon; GRANT ALL ON TABLE public."Concept" TO authenticated; GRANT ALL ON TABLE public."Concept" TO service_role; - - -RESET ALL; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index dfc15e626..47a51c371 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -98,4 +98,4 @@ ALTER FUNCTION public.match_embeddings_for_subset_nodes ( "p_query_embedding" extensions.vector, "p_subset_roam_uids" Text []) OWNER TO "postgres" ; -set search_path to '' ; +RESET ALL; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql index edebe7a06..f5bbba07a 100644 --- a/packages/database/supabase/schemas/sync.sql +++ b/packages/database/supabase/schemas/sync.sql @@ -44,6 +44,7 @@ CREATE UNIQUE INDEX sync_info_u_idx ON public.sync_info USING btree ( "sync_target", sync_function ); +set search_path to public, extensions ; CREATE OR REPLACE FUNCTION public.end_sync_task(