diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml
new file mode 100644
index 000000000..e9f7faa2b
--- /dev/null
+++ b/.github/workflows/database-deploy.yaml
@@ -0,0 +1,23 @@
+name: Supabase deploy Function
+on:
+ workflow_dispatch:
+ push:
+ branches:
+ - main
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ env:
+ SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }}
+ SUPABASE_PROJECT_ID: ${{ secrets.SUPABASE_PROJECT_ID_PROD }}
+ SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD_PROD }}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v3
+ with:
+ node-version: "20"
+ - run: npm ci
+ - uses: supabase/setup-cli@v1
+ with:
+ version: latest
+ - run: npx turbo deploy -F @repo/database
diff --git a/packages/database/.sqruff b/packages/database/.sqruff
new file mode 100644
index 000000000..3effbf331
--- /dev/null
+++ b/packages/database/.sqruff
@@ -0,0 +1,8 @@
+[sqruff]
+dialect = postgres
+exclude_rules = CP05,LT05
+
+[sqruff:indentation]
+indent_unit = space
+tab_space_size = 4
+indented_joins = True
diff --git a/packages/database/README.md b/packages/database/README.md
new file mode 100644
index 000000000..fad141a43
--- /dev/null
+++ b/packages/database/README.md
@@ -0,0 +1,25 @@
+This contains the database schema for vector embeddings and concepts.
+All CLI commands below should be run in this directory (`packages/database`.)
+
+1. Setup
+ 1. Install [Docker](https://www.docker.com)
+ 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version)
+ 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.)
+ 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.)
+ 5. Install [sqruff](https://github.com/quarylabs/sqruff)
+2. Usage:
+ 1. Use `turbo dev`, (alias for `supabase start`) before you use your local database. URLs will be given for your local supabase database, api endpoint, etc.
+ 2. You may need to `supabase db pull` if changes are deployed while you work.
+ 3. End you work session with `supabase end` to free docker resources.
+3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process.
+ 1. Assuming you're working on a feature branch.
+ 2. Make changes to the schema, by editing files in `packages/database/supabase/schemas`
+ 3. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file.
+ 4. `turbo build`, which will do the following:
+ 1. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas`
+ 2. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts`
+ 3. See if there would be a migration to apply with `supabase db diff`
+ 5. If applying the new schema fails, repeat step 4
+ 6. If you are satisfied with the migration, create a migration file with `npm run dbdiff:save some_meaningful_migration_name`
+ 1. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`.
+ 10. You can start using your changes again `turbo dev`
diff --git a/packages/database/example.md b/packages/database/example.md
new file mode 100644
index 000000000..f7a4166e4
--- /dev/null
+++ b/packages/database/example.md
@@ -0,0 +1,76 @@
+# example...
+
+Content:
+
+* (nt1pgid) discourse-graphs/nodes/Claim
+* (nt2pgid) discourse-graphs/nodes/Hypothesis
+* (dgpgid) roam/js/discourse-graph
+ * (et1bkid) Opposes
+ * (et1r1bkid) source
+ * (et1r2bkid) destination
+ * (anyid1) If
+ * (et1sr1bkid) Page
+ * (et1sr2bkid) Block
+ * (et1sr3bkid) ParentPage
+ * (et1sr4bkid) PBlock
+ * (et1sr5bkid) SPage
+ * (et1sr6bkid) SBlock
+* (hyp1pgid) [HYP] Some hypothesis
+* (clm1pgid) [CLM] Some claim
+* (somepgid) Some page
+ * (hyp1refbkid) a block referring to [[HYP] Some hypothesis]
+ * (opp1bkid) OpposedBy
+ * (clm1refbkid) a block referring to [[CLM] Some Claim]
+
+Documents:
+
+| id | source_local_id |
+|----|-----------------|
+| 1 | nt1pgid |
+| 2 | nt2pgid |
+| 3 | dgpgid |
+| 22 | hyp1pgid |
+| 23 | clm1pgid |
+| 4 | somepgid |
+
+Content:
+
+| id | source_local_id | page_id | scale | represents_id | text |
+|----|-------------|-------------|----------|---------------|----------------------------------------------|
+| 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim |
+| 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis |
+| 7 | et1bkid | 3 | document | 18 | discourse-graphs/edges/OpposedBy |
+| 8 | somepgid | 4 | document | | Some page |
+| 24 | hyp1pgid | 22 | document | 20 | [HYP] Some hypothesis |
+| 25 | clm1pgid | 23 | document | 19 | [HYP] Some claim |
+| 9 | hyp1refbkid | 4 | block | | a block referring to [[HYP] Some hypothesis] |
+| 10 | opp1bkid | 4 | block | 21 | OpposedBy |
+| 11 | clm1refbkid | 4 | block | | a block referring to [[CLM] Some claim] |
+| 13 | et1r1bkid | 3 | block | | source |
+| 14 | et1r2bkid | 3 | block | | destination |
+
+Concept:
+
+| id | is_schema | arity | schema | name | content |
+|----|-----------|-------|--------|-----------------------|-----------|
+| 16 | true | 0 | | Claim | {} |
+| 17 | true | 0 | | Hypothesis | {} |
+| 18 | true | 2 | | Opposed-by | { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } |
+| 19 | false | 0 | 16 | [CLM] Some claim | {} |
+| 20 | false | 0 | 17 | [HYP] Some hypothesis | {} |
+| 21 | false | 2 | 18 | OpposedBy | { "concepts": {"source": 19, "destination": 20}, "occurences": [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } |
+
+Note: Open question whether the occurence structure matters, and whether it should be materialized in another table.
+(I would tend to say yes to both.)
+
+ContentLink
+
+| source | destination |
+|--------|-------------|
+| 9 | 24 |
+| 11 | 25 |
+
+Note: I would probably create a sub-Content for the link text and use this as source.
+OR use a char_start, char_end.
+
+Missing: Ontology
diff --git a/packages/database/package.json b/packages/database/package.json
new file mode 100644
index 000000000..1c857ebf6
--- /dev/null
+++ b/packages/database/package.json
@@ -0,0 +1,29 @@
+{
+ "name": "@repo/database",
+ "version": "0.0.0",
+ "private": true,
+ "license": "Apache-2.0",
+ "type": "module",
+ "exports": {
+ "./types.gen.ts": "./types.gen.ts"
+ },
+ "scripts": {
+ "init": "supabase login",
+ "dev": "supabase start",
+ "stop": "supabase stop",
+ "build": "npm run lint && npm run gentypes:local && cp ./types.gen.ts ../../apps/website/app/utils/supabase && npm run dbdiff",
+ "lint": "tsx scripts/lint.ts",
+ "lint:fix": "tsx scripts/lint.ts -f",
+ "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts",
+ "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$SUPABASE_PROJECT_ID\" --schema public > types.gen.ts",
+ "dbdiff": "supabase stop && supabase db diff",
+ "dbdiff:save": "supabase stop && supabase db diff -f",
+ "deploy": "tsx scripts/deploy.ts",
+ "deploy:functions": "tsx scripts/lint.ts -f"
+ },
+ "devDependencies": {
+ "supabase": "^2.22.12",
+ "tsx": "^4.19.2"
+ },
+ "dependencies": {}
+}
diff --git a/packages/database/schema.puml b/packages/database/schema.puml
new file mode 100644
index 000000000..914fc6e49
--- /dev/null
+++ b/packages/database/schema.puml
@@ -0,0 +1,110 @@
+@startuml
+skinparam nodesep 10
+hide circle
+hide empty members
+class "SpaceAccess" [[{An access control entry for a space}]] {
+ {field} editor : boolean
+}
+class "Account" [[{A user account on a platform}]] {
+ {field} id : integer
+ {field} write_permission : boolean
+ {field} active : boolean
+}
+class "Space" [[{A space on a platform representing a community engaged in a conversation}]] {
+ {field} id : integer
+ {field} url : string
+ {field} name : string
+}
+"SpaceAccess" --> "1" "Account" : "account"
+"SpaceAccess" --> "0..1" "Space" : "space"
+class "Platform" [[{A data platform where discourse happens}]] {
+ {field} id : integer
+ {field} name : string
+ {field} url : string
+}
+class "Content" [[{A unit of content}]] {
+ {field} id : integer
+ {field} source_local_id : string
+ {field} created : datetime
+ {field} text : string
+ {field} metadata : JSON
+ {field} scale : Scale
+ {field} last_modified : datetime
+}
+class "Document" [[{None}]] {
+ {field} id : integer
+ {field} source_local_id : string
+ {field} url : string
+ {field} created : datetime
+ {field} metadata : JSON
+ {field} last_modified : datetime
+ {field} contents : blob
+}
+class "Concept" [[{An abstract concept, claim or relation}]] {
+ {field} id : integer
+ {field} epistemic_status : EpistemicStatus
+ {field} name : string
+ {field} description : string
+ {field} created : datetime
+ {field} last_modified : datetime
+ {field} arity : integer
+ {field} content : JSON
+ {field} is_schema : boolean
+}
+"Space" --> "1" "Platform" : "platform"
+"Content" --> "0..1" "Space" : "space"
+"Document" --> "0..1" "Space" : "space"
+"Concept" --> "0..1" "Space" : "space"
+"Account" --> "1" "Platform" : "platform"
+abstract "Agent" [[{An agent that acts in the system}]] {
+ {field} id : integer
+ {field} type : EntityType
+}
+"Document" --> "0..*" "Agent" : "contributors"
+"Document" --> "1" "Agent" : "author"
+"Content" --> "1" "Document" : "document"
+class "ContentEmbedding" [[{None}]] {
+ {field} model : EmbeddingName
+ {field} vector : vector
+ {field} obsolete : boolean
+}
+"ContentEmbedding" --> "1" "Content" : "target"
+"Content" --> "0..1" "Content" : "part_of"
+"Content" --> "0..*" "Agent" : "contributors"
+"Content" --> "1" "Agent" : "creator"
+"Content" --> "1" "Agent" : "author"
+"Concept" --> "0..1" "Content" : "represented_by"
+class "ConceptSchema" [[{None}]] {
+ {field} id(i) : integer
+ {field} epistemic_status(i) : EpistemicStatus
+ {field} name(i) : string
+ {field} description(i) : string
+ {field} created(i) : datetime
+ {field} last_modified(i) : datetime
+ {field} arity(i) : integer
+ {field} content(i) : JSON
+ {field} is_schema(i) : boolean
+}
+"Concept" --> "1" "ConceptSchema" : "schema"
+"Concept" --> "0..*" "Agent" : "contributors"
+"Concept" --> "1" "Agent" : "author"
+"Concept" ^-- "ConceptSchema"
+class "Person" [[{A person using the system}]] {
+ {field} name : string
+ {field} orcid : string
+ {field} email : string
+ {field} id(i) : integer
+ {field} type(i) : EntityType
+}
+class "AutomatedAgent" [[{An automated agent}]] {
+ {field} metadata : JSON
+ {field} name : string
+ {field} deterministic : boolean
+ {field} version : string
+ {field} id(i) : integer
+ {field} type(i) : EntityType
+}
+"Account" --> "1" "Agent" : "person"
+"Agent" ^-- "Person"
+"Agent" ^-- "AutomatedAgent"
+@enduml
diff --git a/packages/database/schema.svg b/packages/database/schema.svg
new file mode 100644
index 000000000..ab1d233e5
--- /dev/null
+++ b/packages/database/schema.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/database/schema.yaml b/packages/database/schema.yaml
new file mode 100644
index 000000000..363f9ff8b
--- /dev/null
+++ b/packages/database/schema.yaml
@@ -0,0 +1,417 @@
+id: https://discoursegraphs.com/schemas/v0#
+name: discoursegraphs
+prefixes:
+ linkml: https://w3id.org/linkml/
+ dg: https://discoursegraphs.com/schemas/v0#
+default_prefix: dg
+default_range: string
+imports:
+ - linkml:types
+enums:
+ Scale:
+ description: scale value of a Content
+ permissible_values:
+ document:
+ post:
+ chunk_unit:
+ section:
+ block:
+ description: A block of content in an outline system, such as a Roam node
+ field:
+ paragraph:
+ quote:
+ sentence:
+ phrase:
+ Validation:
+ description: Whether a given value was given by a person, or suggested by an automated agent (and then possibly infirmed.)
+ permissible_values:
+ infirmed:
+ description: Infirmed by a trusted agent
+ suggested:
+ description: Suggested by a fallible agent (probabilistic, heuristic, person in training...)
+ confirmed:
+ description: Confirmed by a trusted agent
+ intrinsic:
+ description: No validation needed, this is intrinsic to the data
+ EpistemicStatus:
+ description: The epistemic status of a claim
+ permissible_values:
+ certainly_not:
+ strong_evidence_against:
+ could_be_false:
+ unknown:
+ uncertain:
+ contentious:
+ could_be_true:
+ strong_evidence_for:
+ certain:
+ EntityType:
+ description: The type of an entity
+ permissible_values:
+ Platform:
+ Space:
+ Account:
+ Person:
+ AutomatedAgent:
+ Document:
+ Content:
+ Concept:
+ ConceptSchema:
+ ContentLink:
+ Occurrence:
+ EmbeddingName:
+ description: an embedding name
+ permissible_values:
+ openai_text_embedding_ada2_1536:
+ openai_text_embedding_3_small_512:
+ openai_text_embedding_3_small_1536:
+ openai_text_embedding_3_large_256:
+ openai_text_embedding_3_large_1024:
+ openai_text_embedding_3_large_3072:
+ DerivedTextVariant:
+ description: Is the text taken as-is, or is it a computed variant?
+ permissible_values:
+ as_is:
+ neighbourhood_parent_and_children:
+ neighbourhood_parent_and_level_2_descendants:
+ neighbourhood_children:
+ neighbourhood_level_2_descendants:
+ summary:
+types:
+ JSON:
+ uri: xsd:string
+ # base: dict
+ base: str
+ description: JSON data
+ # annotations:
+ # sql_type: sqlalchemy.dialects.postgresql.JSONB
+ JSONSchema:
+ uri: xsd:string
+ base: dict
+ description: A Json schema
+ # annotations:
+ # sql_type: sqlalchemy.dialects.postgresql.JSONB
+ vector:
+ uri: xsd:float
+ base: float
+ description: A vector of floats
+ annotations:
+ sql_type: pgvector.sqlalchemy.Vector
+ blob:
+ uri: xsd:base64Binary
+ base: bytes
+ annotations:
+ sql_type: sqlalchemy.dialects.postgresql.BLOB
+ description: A binary large object
+classes:
+ Agent:
+ description: An agent that acts in the system
+ abstract: true
+ slots:
+ - id
+ - type
+ Person:
+ description: A person using the system
+ is_a: Agent
+ slots:
+ - name
+ - orcid
+ attributes:
+ email:
+ required: true
+ # TODO: known skills, i.e. what processes can they confirm.
+ AutomatedAgent:
+ description: An automated agent
+ is_a: Agent
+ slots:
+ - metadata
+ - name
+ attributes:
+ deterministic:
+ range: boolean
+ ifabsent: false
+ version:
+ range: string
+
+ Platform:
+ description: A data platform where discourse happens
+ slots:
+ - id
+ - name
+ attributes:
+ url:
+ required: true
+ Account:
+ description: A user account on a platform
+ slots:
+ - id
+ - platform
+ attributes:
+ person:
+ range: Agent
+ required: true
+ write_permission:
+ range: boolean
+ required: true
+ active:
+ range: boolean
+ required: true
+ ifabsent: true
+ Space:
+ description: A space on a platform representing a community engaged in a conversation
+ slots:
+ - id
+ - url
+ - name
+ attributes:
+ platform:
+ range: Platform
+ required: true
+ SpaceAccess:
+ description: An access control entry for a space
+ slots:
+ - space
+ attributes:
+ account:
+ range: Account
+ required: true
+ editor:
+ range: boolean
+ required: true
+ unique_keys:
+ main:
+ description: Primary key for space access
+ unique_key_slots:
+ - account
+ - space
+ Content:
+ description: A unit of content
+ slots:
+ - id
+ - document
+ - source_local_id
+ - author
+ - creator
+ - created
+ - text
+ - metadata
+ - scale
+ # - position
+ # - char_position
+ - space
+ - contributors
+ - last_modified
+ attributes:
+ part_of:
+ description: This content is part of a larger content unit
+ range: Content
+ # ContentDerivation:
+ # description: A derivation relation between content units
+ # attributes:
+ # derived_content:
+ # description: The derived content unit
+ # range: Content
+ # required: true
+ # identifier: true
+ # derived_from:
+ # description: The content unit that this variant was derived from
+ # range: Content
+ # required: true
+ # derived_variant:
+ # description: This content is a variant derived from another content unit
+ # range: DerivedTextVariant
+ # required: true
+ Document:
+ slots:
+ - id
+ - space
+ - source_local_id
+ - url
+ - created
+ - metadata
+ - last_modified
+ - author
+ - contributors
+ attributes:
+ contents:
+ range: blob
+ # Article:
+ # description: an article
+ # is_a: Document
+ # slots:
+ # - issn
+ # - abstract
+ ContentEmbedding:
+ # abstract: true
+ attributes:
+ target:
+ range: Content
+ required: true
+ identifier: true
+ model:
+ range: EmbeddingName
+ required: true
+ vector:
+ range: vector
+ array:
+ minimum_number_dimensions: 1
+ maximum_number_dimensions: 1
+ required: true
+ obsolete:
+ description: Whether this embedding is obsolete (becauses the Content was modified)
+ range: boolean
+ ifabsent: false
+ Concept:
+ description: An abstract concept, claim or relation
+ slots:
+ - id
+ - epistemic_status
+ - name
+ - description
+ - author
+ - contributors
+ - created
+ - last_modified
+ - space
+ attributes:
+ arity:
+ range: integer
+ required: true
+ ifabsent: 0
+ description: The number of roles in this relation; nodes have zero, binary relations have 2, etc.
+ schema:
+ range: ConceptSchema
+ required: true
+ content:
+ range: JSON
+ required: true
+ is_schema:
+ range: boolean
+ required: true
+ ifabsent: false
+ represented_by:
+ description: This concept is explicitly represented by a given content unit
+ range: Content
+ # update status
+ # concept has occurences and possibly a representation in a space.
+ # Are concepts space-specific? Tending to yes. So the point of convergence should be distinct.
+ # Can a concept have multiple representations? One case is a reprentation
+ # of an equivalent concept in another space.
+ # do non-claim concepts have epistemic status?
+ # The other big deal is who has authority on concept definition.
+ # Finally... concept schema. Yeah. Is it per-space? Likely.
+ # Damn, concept schema is a concept, is it not?
+ # Now, if a concept has a complex structwre based on a complex content...
+ # AH, it should be based on occurences.
+
+ ConceptSchema:
+ is_a: Concept
+
+ # Reference:
+ # abstract: true
+ # description: A link from a content fragment to something else
+ # attributes:
+ # source:
+ # range: Content
+ # required: true
+ # slots:
+ # - creator
+ # - created
+ # - validation
+ # - type
+ # # This is an aggregate of validation events
+ # # Q: What is the relationship between occurences and links? Links to Concepts in particular?
+ # # What if the concept has been materialized as content?
+ # ContentLink:
+ # description: An explicit link from a content fragment to another content.
+ # is_a: Reference
+ # attributes:
+ # target:
+ # range: Content
+ # required: true
+ # Occurrence:
+ # description: A link from a content fragment to a Concept. May be an interpretation.
+ # is_a: Reference
+ # attributes:
+ # target:
+ # range: Concept
+ # required: true
+slots:
+ id:
+ range: integer
+ identifier: true
+ required: true
+ type:
+ range: EntityType
+ required: true
+ designates_type: true
+ name:
+ required: true
+ author:
+ range: Agent
+ description: The author of content
+ required: true
+ creator:
+ range: Agent
+ description: The creator of a logical structure, such as a content subdivision
+ required: true
+ contributors:
+ multivalued: true
+ range: Agent
+ text:
+ required: true
+ description:
+ created:
+ range: datetime
+ required: true
+ description: The time when the content was created in the remote source
+ last_modified:
+ range: datetime
+ required: true
+ description: The last time the content was modified in the remote source
+ last_synced:
+ range: datetime
+ required: true
+ description: The last time the content was synced with the remote source
+ metadata:
+ range: JSON
+ orcid:
+ range: string
+ url:
+ range: string
+ platform:
+ range: Platform
+ required: true
+ issn:
+ abstract:
+ scale:
+ range: Scale
+ required: true
+ position:
+ description: The ordinal position of the content within its parent, wrt other content units of the same scale
+ range: integer
+ ifabsent: 0
+ required: true
+ char_position:
+ description: The character position of the content within its parent.
+ # Does not apply to outline sub-elements
+ range: integer
+ ifabsent: 0
+ validation:
+ range: Validation
+ required: true
+ ifabsent: Validation(suggested)
+ epistemic_status:
+ range: EpistemicStatus
+ required: true
+ ifabsent: EpistemicStatus(unknown)
+ space:
+ range: Space
+ description: The space in which the content is located
+ document:
+ range: Document
+ required: true
+ source_local_id:
+ range: string
+ description: The unique identifier of the content in the remote source
+ # In the case of a document, could it be the URL?
diff --git a/packages/database/scripts/deploy.ts b/packages/database/scripts/deploy.ts
new file mode 100644
index 000000000..a4af380be
--- /dev/null
+++ b/packages/database/scripts/deploy.ts
@@ -0,0 +1,71 @@
+import { exec } from "node:child_process";
+import dotenv from "dotenv";
+
+dotenv.config();
+
+const main = () => {
+ try {
+ exec("git status -s -b -uno", (err, stdout, stderr) => {
+ if (err) {
+ console.error("Is git installed?");
+ process.exit(1);
+ }
+ const lines = stdout.split("\n");
+ if (lines[0] != "## main...main") {
+ console.log("Not on main branch, not deploying database");
+ process.exit(0);
+ }
+ if (lines.length > 1) {
+ console.log(
+ "You seem to have uncommitted changes, not deploying database",
+ );
+ process.exit(0);
+ }
+ const { SUPABASE_PROJECT_ID, SUPABASE_DB_PASSWORD } = process.env;
+ if (!SUPABASE_PROJECT_ID) {
+ console.log("Please define SUPABASE_PROJECT_ID");
+ process.exit(1);
+ }
+ if (!SUPABASE_DB_PASSWORD) {
+ console.log("Please define SUPABASE_DB_PASSWORD");
+ process.exit(1);
+ }
+ // Use environment variables that are already set instead of passing as arguments
+ exec(
+ `supabase link --project-ref ${SUPABASE_PROJECT_ID}`,
+ { env: { ...process.env, SUPABASE_DB_PASSWORD } },
+ (err, stdout, stderr) => {
+ console.log(`${stdout}`);
+ console.error(`${stderr}`);
+ if (err) {
+ process.exit(err.code);
+ }
+ exec("supabase db push", (err, stdout, stderr) => {
+ console.log(`${stdout}`);
+ console.error(`${stderr}`);
+ if (err) {
+ process.exit(err.code);
+ }
+ if (process.argv.length == 3 && process.argv[2] == "-f") {
+ // Also push functions
+ exec(
+ `supabase functions deploy --project-ref ${SUPABASE_PROJECT_ID}`,
+ (err, stdout, stderr) => {
+ console.log(`${stdout}`);
+ console.error(`${stderr}`);
+ if (err) {
+ process.exit(err.code);
+ }
+ },
+ );
+ }
+ });
+ },
+ );
+ });
+ } catch (error) {
+ console.error("error:", error);
+ process.exit(1);
+ }
+};
+if (import.meta.url === `file://${process.argv[1]}`) main();
diff --git a/packages/database/scripts/lint.ts b/packages/database/scripts/lint.ts
new file mode 100644
index 000000000..0d8005806
--- /dev/null
+++ b/packages/database/scripts/lint.ts
@@ -0,0 +1,24 @@
+import { exec } from "node:child_process";
+
+const main = () => {
+ try {
+ exec("which sqruff", (err, stdout, stderr) => {
+ if (err) {
+ console.error("Could not find sqruff, you may want to install it.");
+ // Fail gracefully
+ process.exit(0);
+ }
+ const command =
+ process.argv.length == 3 && process.argv[2] == "-f" ? "fix" : "lint";
+ exec(`sqruff ${command} supabase/schemas`, {}, (err, stdout, stderr) => {
+ console.log(`${stdout}`);
+ console.log(`${stderr}`);
+ process.exit(err ? err.code : 0);
+ });
+ });
+ } catch (error) {
+ console.error("error:", error);
+ process.exit(1);
+ }
+};
+if (import.meta.url === `file://${process.argv[1]}`) main();
diff --git a/packages/database/supabase/.gitignore b/packages/database/supabase/.gitignore
new file mode 100644
index 000000000..ad9264f0b
--- /dev/null
+++ b/packages/database/supabase/.gitignore
@@ -0,0 +1,8 @@
+# Supabase
+.branches
+.temp
+
+# dotenvx
+.env.keys
+.env.local
+.env.*.local
diff --git a/packages/database/supabase/config.toml b/packages/database/supabase/config.toml
new file mode 100644
index 000000000..ca2dc47aa
--- /dev/null
+++ b/packages/database/supabase/config.toml
@@ -0,0 +1,323 @@
+# For detailed configuration reference documentation, visit:
+# https://supabase.com/docs/guides/local-development/cli/config
+# A string used to distinguish different Supabase projects on the same host. Defaults to the
+# working directory name when running `supabase init`.
+project_id = "discourse-graphs"
+
+[api]
+enabled = true
+# Port to use for the API URL.
+port = 54321
+# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API
+# endpoints. `public` and `graphql_public` schemas are included by default.
+schemas = ["public", "graphql_public"]
+# Extra schemas to add to the search_path of every request.
+extra_search_path = ["public", "extensions"]
+# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size
+# for accidental or malicious requests.
+max_rows = 1000
+
+[api.tls]
+# Enable HTTPS endpoints locally using a self-signed certificate.
+enabled = false
+
+[db]
+# Port to use for the local database URL.
+port = 54322
+# Port used by db diff command to initialize the shadow database.
+shadow_port = 54320
+# The database major version to use. This has to be the same as your remote database's. Run `SHOW
+# server_version;` on the remote database to check.
+major_version = 15
+
+[db.pooler]
+enabled = false
+# Port to use for the local connection pooler.
+port = 54329
+# Specifies when a server connection can be reused by other clients.
+# Configure one of the supported pooler modes: `transaction`, `session`.
+pool_mode = "transaction"
+# How many server connections to allow per user/database pair.
+default_pool_size = 20
+# Maximum number of client connections allowed.
+max_client_conn = 100
+
+# [db.vault]
+# secret_key = "env(SECRET_VALUE)"
+
+[db.migrations]
+# Specifies an ordered list of schema files that describe your database.
+# Supports glob patterns relative to supabase directory: "./schemas/*.sql"
+schema_paths = [
+ './schemas/base.sql',
+ './schemas/extensions.sql',
+ './schemas/agent.sql',
+ './schemas/space.sql',
+ './schemas/account.sql',
+ './schemas/content.sql',
+ './schemas/embedding.sql',
+ './schemas/concept.sql',
+ './schemas/contributor.sql',
+ './schemas/sync.sql',
+]
+
+[db.seed]
+# If enabled, seeds the database after migrations during a db reset.
+enabled = true
+# Specifies an ordered list of seed files to load during db reset.
+# Supports glob patterns relative to supabase directory: "./seeds/*.sql"
+sql_paths = ["./seed.sql"]
+
+[realtime]
+enabled = true
+# Bind realtime via either IPv4 or IPv6. (default: IPv4)
+# ip_version = "IPv6"
+# The maximum length in bytes of HTTP request headers. (default: 4096)
+# max_header_length = 4096
+
+[studio]
+enabled = true
+# Port to use for Supabase Studio.
+port = 54323
+# External URL of the API server that frontend connects to.
+api_url = "http://127.0.0.1"
+# OpenAI API Key to use for Supabase AI in the Supabase Studio.
+openai_api_key = "env(OPENAI_API_KEY)"
+
+# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they
+# are monitored, and you can view the emails that would have been sent from the web interface.
+[inbucket]
+enabled = true
+# Port to use for the email testing server web interface.
+port = 54324
+# Uncomment to expose additional ports for testing user applications that send emails.
+# smtp_port = 54325
+# pop3_port = 54326
+# admin_email = "admin@email.com"
+# sender_name = "Admin"
+
+[storage]
+enabled = true
+# The maximum file size allowed (e.g. "5MB", "500KB").
+file_size_limit = "50MiB"
+
+# Image transformation API is available to Supabase Pro plan.
+# [storage.image_transformation]
+# enabled = true
+
+# Uncomment to configure local storage buckets
+# [storage.buckets.images]
+# public = false
+# file_size_limit = "50MiB"
+# allowed_mime_types = ["image/png", "image/jpeg"]
+# objects_path = "./images"
+
+[auth]
+enabled = true
+# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used
+# in emails.
+# site_url = "http://127.0.0.1:3000"
+site_url = "https://discourse-graph-discourse-graphs.vercel.app/"
+# A list of *exact* URLs that auth providers are permitted to redirect to post authentication.
+additional_redirect_urls = [
+ "https://discourse-graph-discourse-graphs.vercel.app/**",
+ "https://discourse-graph-*-discourse-graphs.vercel.app/**",
+]
+# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week).
+jwt_expiry = 3600
+# If disabled, the refresh token will never expire.
+enable_refresh_token_rotation = true
+# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds.
+# Requires enable_refresh_token_rotation = true.
+refresh_token_reuse_interval = 10
+# Allow/disallow new user signups to your project.
+enable_signup = true
+# Allow/disallow anonymous sign-ins to your project.
+enable_anonymous_sign_ins = false
+# Allow/disallow testing manual linking of accounts
+enable_manual_linking = false
+# Passwords shorter than this value will be rejected as weak. Minimum 6, recommended 8 or more.
+minimum_password_length = 6
+# Passwords that do not meet the following requirements will be rejected as weak. Supported values
+# are: `letters_digits`, `lower_upper_letters_digits`, `lower_upper_letters_digits_symbols`
+password_requirements = ""
+
+[auth.rate_limit]
+# Number of emails that can be sent per hour. Requires auth.email.smtp to be enabled.
+email_sent = 2
+# Number of SMS messages that can be sent per hour. Requires auth.sms to be enabled.
+sms_sent = 30
+# Number of anonymous sign-ins that can be made per hour per IP address. Requires enable_anonymous_sign_ins = true.
+anonymous_users = 30
+# Number of sessions that can be refreshed in a 5 minute interval per IP address.
+token_refresh = 150
+# Number of sign up and sign-in requests that can be made in a 5 minute interval per IP address (excludes anonymous users).
+sign_in_sign_ups = 30
+# Number of OTP / Magic link verifications that can be made in a 5 minute interval per IP address.
+token_verifications = 30
+
+# Configure one of the supported captcha providers: `hcaptcha`, `turnstile`.
+# [auth.captcha]
+# enabled = true
+# provider = "hcaptcha"
+# secret = ""
+
+[auth.email]
+# Allow/disallow new user signups via email to your project.
+enable_signup = true
+# If enabled, a user will be required to confirm any email change on both the old, and new email
+# addresses. If disabled, only the new email is required to confirm.
+double_confirm_changes = true
+# If enabled, users need to confirm their email address before signing in.
+enable_confirmations = true
+# If enabled, users will need to reauthenticate or have logged in recently to change their password.
+secure_password_change = false
+# Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email.
+max_frequency = "1m0s"
+# Number of characters used in the email OTP.
+otp_length = 6
+# Number of seconds before the email OTP expires (defaults to 1 hour).
+otp_expiry = 86400
+
+# Use a production-ready SMTP server
+# [auth.email.smtp]
+# enabled = true
+# host = "smtp.sendgrid.net"
+# port = 587
+# user = "apikey"
+# pass = "env(SENDGRID_API_KEY)"
+# admin_email = "admin@email.com"
+# sender_name = "Admin"
+
+# Uncomment to customize email template
+# [auth.email.template.invite]
+# subject = "You have been invited"
+# content_path = "./supabase/templates/invite.html"
+
+[auth.sms]
+# Allow/disallow new user signups via SMS to your project.
+enable_signup = false
+# If enabled, users need to confirm their phone number before signing in.
+enable_confirmations = false
+# Template for sending OTP to users
+template = "Your code is {{ .Code }}"
+# Controls the minimum amount of time that must pass before sending another sms otp.
+max_frequency = "5s"
+
+# Use pre-defined map of phone number to OTP for testing.
+# [auth.sms.test_otp]
+# 4152127777 = "123456"
+
+# Configure logged in session timeouts.
+# [auth.sessions]
+# Force log out after the specified duration.
+# timebox = "24h"
+# Force log out if the user has been inactive longer than the specified duration.
+# inactivity_timeout = "8h"
+
+# This hook runs before a token is issued and allows you to add additional claims based on the authentication method used.
+# [auth.hook.custom_access_token]
+# enabled = true
+# uri = "pg-functions:////"
+
+# Configure one of the supported SMS providers: `twilio`, `twilio_verify`, `messagebird`, `textlocal`, `vonage`.
+[auth.sms.twilio]
+enabled = false
+account_sid = ""
+message_service_sid = ""
+# DO NOT commit your Twilio auth token to git. Use environment variable substitution instead:
+auth_token = "env(SUPABASE_AUTH_SMS_TWILIO_AUTH_TOKEN)"
+
+# Multi-factor-authentication is available to Supabase Pro plan.
+[auth.mfa]
+# Control how many MFA factors can be enrolled at once per user.
+max_enrolled_factors = 10
+
+# Control MFA via App Authenticator (TOTP)
+[auth.mfa.totp]
+enroll_enabled = true
+verify_enabled = true
+
+# Configure MFA via Phone Messaging
+[auth.mfa.phone]
+enroll_enabled = false
+verify_enabled = false
+otp_length = 6
+template = "Your code is {{ .Code }}"
+max_frequency = "5s"
+
+# Configure MFA via WebAuthn
+# [auth.mfa.web_authn]
+# enroll_enabled = true
+# verify_enabled = true
+
+# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`,
+# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin_oidc`, `notion`, `twitch`,
+# `twitter`, `slack`, `spotify`, `workos`, `zoom`.
+[auth.external.apple]
+enabled = false
+client_id = ""
+# DO NOT commit your OAuth provider secret to git. Use environment variable substitution instead:
+secret = "env(SUPABASE_AUTH_EXTERNAL_APPLE_SECRET)"
+# Overrides the default auth redirectUrl.
+redirect_uri = ""
+# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure,
+# or any other third-party OIDC providers.
+url = ""
+# If enabled, the nonce check will be skipped. Required for local sign in with Google auth.
+skip_nonce_check = false
+
+# Use Firebase Auth as a third-party provider alongside Supabase Auth.
+[auth.third_party.firebase]
+enabled = false
+# project_id = "my-firebase-project"
+
+# Use Auth0 as a third-party provider alongside Supabase Auth.
+[auth.third_party.auth0]
+enabled = false
+# tenant = "my-auth0-tenant"
+# tenant_region = "us"
+
+# Use AWS Cognito (Amplify) as a third-party provider alongside Supabase Auth.
+[auth.third_party.aws_cognito]
+enabled = false
+# user_pool_id = "my-user-pool-id"
+# user_pool_region = "us-east-1"
+
+# Use Clerk as a third-party provider alongside Supabase Auth.
+[auth.third_party.clerk]
+enabled = false
+# Obtain from https://clerk.com/setup/supabase
+# domain = "example.clerk.accounts.dev"
+
+[edge_runtime]
+enabled = true
+# Configure one of the supported request policies: `oneshot`, `per_worker`.
+# Use `oneshot` for hot reload, or `per_worker` for load testing.
+policy = "oneshot"
+# Port to attach the Chrome inspector for debugging edge functions.
+inspector_port = 8083
+# The Deno major version to use.
+deno_version = 1
+
+# [edge_runtime.secrets]
+# secret_key = "env(SECRET_VALUE)"
+
+[analytics]
+enabled = true
+port = 54327
+# Configure one of the supported backends: `postgres`, `bigquery`.
+backend = "postgres"
+
+# Experimental features may be deprecated any time
+[experimental]
+# Configures Postgres storage engine to use OrioleDB (S3)
+orioledb_version = ""
+# Configures S3 bucket URL, eg. .s3-.amazonaws.com
+s3_host = "env(S3_HOST)"
+# Configures S3 bucket region, eg. us-east-1
+s3_region = "env(S3_REGION)"
+# Configures AWS_ACCESS_KEY_ID for S3 bucket
+s3_access_key = "env(S3_ACCESS_KEY)"
+# Configures AWS_SECRET_ACCESS_KEY for S3 bucket
+s3_secret_key = "env(S3_SECRET_KEY)"
diff --git a/packages/database/supabase/migrations/20250504195841_remote_schema.sql b/packages/database/supabase/migrations/20250504195841_remote_schema.sql
new file mode 100644
index 000000000..89b3d4e4c
--- /dev/null
+++ b/packages/database/supabase/migrations/20250504195841_remote_schema.sql
@@ -0,0 +1,9 @@
+create extension if not exists "pg_jsonschema" with schema "extensions";
+
+create extension if not exists "pg_stat_monitor" with schema "extensions";
+
+create extension if not exists "pgroonga" with schema "extensions";
+
+create extension if not exists "vector" with schema "extensions";
+
+
diff --git a/packages/database/supabase/migrations/20250504202930_content_tables.sql b/packages/database/supabase/migrations/20250504202930_content_tables.sql
new file mode 100644
index 000000000..52f3cda7c
--- /dev/null
+++ b/packages/database/supabase/migrations/20250504202930_content_tables.sql
@@ -0,0 +1,211 @@
+
+CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq
+ AS BIGINT
+ START WITH 1
+ INCREMENT BY 1
+ NO MINVALUE
+ NO MAXVALUE
+ CACHE 1;
+
+
+CREATE TYPE "EntityType" AS ENUM ('Platform', 'Space', 'Account', 'Person', 'AutomatedAgent', 'Document', 'Content', 'Concept', 'ConceptSchema', 'ContentLink', 'Occurrence');
+
+CREATE TYPE "Scale" AS ENUM ('document', 'post', 'chunk_unit', 'section', 'block', 'field', 'paragraph', 'quote', 'sentence', 'phrase');
+
+CREATE TYPE "EmbeddingName" AS ENUM ('openai_text_embedding_ada2_1536', 'openai_text_embedding_3_small_512', 'openai_text_embedding_3_small_1536', 'openai_text_embedding_3_large_256', 'openai_text_embedding_3_large_1024', 'openai_text_embedding_3_large_3072');
+
+CREATE TYPE "EpistemicStatus" AS ENUM ('certainly_not', 'strong_evidence_against', 'could_be_false', 'unknown', 'uncertain', 'contentious', 'could_be_true', 'strong_evidence_for', 'certain');
+
+CREATE TABLE "Agent" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ type "EntityType" NOT NULL
+);
+COMMENT ON TABLE "Agent" IS 'An agent that acts in the system';
+
+
+CREATE TABLE "Person" (
+ id BIGINT NOT NULL PRIMARY KEY,
+ name VARCHAR NOT NULL,
+ orcid VARCHAR(20),
+ email VARCHAR NOT NULL,
+ CONSTRAINT person_id_fkey FOREIGN KEY (id)
+ REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON TABLE "Person" IS 'A person using the system';
+
+
+CREATE TABLE "AutomatedAgent" (
+ id BIGINT NOT NULL PRIMARY KEY,
+ name VARCHAR NOT NULL,
+ metadata JSONB NOT NULL DEFAULT '{}',
+ deterministic BOOLEAN DEFAULT FALSE,
+ version VARCHAR,
+ CONSTRAINT person_id_fkey FOREIGN KEY (id)
+ REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON TABLE "AutomatedAgent" IS 'An automated agent';
+
+
+CREATE TABLE "DiscoursePlatform" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ name VARCHAR NOT NULL,
+ url VARCHAR NOT NULL
+);
+COMMENT ON TABLE "DiscoursePlatform" IS 'A data platform where discourse happens';
+
+
+CREATE TABLE "Account" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ platform_id BIGINT NOT NULL,
+ person_id BIGINT NOT NULL,
+ write_permission BOOLEAN NOT NULL,
+ active BOOLEAN NOT NULL DEFAULT TRUE,
+ FOREIGN KEY(platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(person_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON TABLE "Account" IS 'A user account on a discourse platform';
+
+
+CREATE TABLE "DiscourseSpace" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ url VARCHAR,
+ name VARCHAR NOT NULL,
+ discourse_platform_id BIGINT NOT NULL,
+ FOREIGN KEY(discourse_platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON TABLE "DiscourseSpace" IS 'A space on a discourse platform representing a community engaged in a conversation';
+
+
+CREATE TABLE "SpaceAccess" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ space_id BIGINT,
+ account_id BIGINT NOT NULL,
+ editor BOOLEAN NOT NULL,
+ UNIQUE (account_id, space_id),
+ FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(account_id) REFERENCES "Account" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON TABLE "SpaceAccess" IS 'An access control entry for a space';
+COMMENT ON COLUMN "SpaceAccess".space_id IS 'The space in which the content is located';
+
+
+CREATE TABLE "Document" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ space_id BIGINT,
+ source_local_id VARCHAR,
+ url VARCHAR,
+ last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ created TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ metadata JSONB NOT NULL DEFAULT '{}',
+ last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ author_id BIGINT NOT NULL,
+ contents OID,
+ FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+COMMENT ON COLUMN "Document".space_id IS 'The space in which the content is located';
+COMMENT ON COLUMN "Document".source_local_id IS 'The unique identifier of the content in the remote source';
+COMMENT ON COLUMN "Document".last_synced IS 'The last time the content was synced with the remote source';
+COMMENT ON COLUMN "Document".created IS 'The time when the content was created in the remote source';
+COMMENT ON COLUMN "Document".last_modified IS 'The last time the content was modified in the remote source';
+COMMENT ON COLUMN "Document".author_id IS 'The author of content';
+COMMENT ON COLUMN "Document".contents IS 'A large object OID for the downloaded raw content';
+
+CREATE TABLE "Concept" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ epistemic_status "EpistemicStatus" NOT NULL DEFAULT 'unknown',
+ name VARCHAR NOT NULL,
+ description TEXT,
+ author_id BIGINT,
+ created TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ space_id BIGINT,
+ arity SMALLINT NOT NULL DEFAULT 0,
+ schema_id BIGINT,
+ content JSONB NOT NULL DEFAULT '{}',
+ is_schema BOOLEAN NOT NULL DEFAULT FALSE,
+ FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE,
+ FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(schema_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE
+);
+CREATE INDEX "Concept_space" ON "Concept" (space_id);
+CREATE INDEX "Concept_schema" ON "Concept" (schema_id);
+CREATE INDEX "Concept_content" ON "Concept" USING GIN (content jsonb_path_ops);
+
+
+COMMENT ON TABLE "Concept" IS 'An abstract concept, claim or relation';
+COMMENT ON COLUMN "Concept".author_id IS 'The author of content';
+COMMENT ON COLUMN "Concept".created IS 'The time when the content was created in the remote source';
+COMMENT ON COLUMN "Concept".last_modified IS 'The last time the content was modified in the remote source';
+COMMENT ON COLUMN "Concept".last_synced IS 'The last time the content was synced with the remote source';
+COMMENT ON COLUMN "Concept".space_id IS 'The space in which the content is located';
+
+
+CREATE TABLE "Content" (
+ id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass),
+ document_id BIGINT NOT NULL,
+ source_local_id VARCHAR,
+ author_id BIGINT,
+ creator_id BIGINT,
+ created TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ text TEXT NOT NULL,
+ metadata JSONB NOT NULL DEFAULT '{}',
+ scale "Scale" NOT NULL,
+ space_id BIGINT,
+ last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ part_of_id BIGINT,
+ represents_id BIGINT,
+ FOREIGN KEY(document_id) REFERENCES "Document" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE,
+ FOREIGN KEY(creator_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE,
+ FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(part_of_id) REFERENCES "Content" (id) ON DELETE SET NULL ON UPDATE CASCADE,
+ FOREIGN KEY(represents_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE
+);
+
+CREATE INDEX "Content_text" ON "Content" USING pgroonga (text);
+CREATE INDEX "Content_space" ON "Content" (space_id);
+CREATE INDEX "Content_document" ON "Content" (document_id);
+CREATE INDEX "Content_part_of" ON "Content" (part_of_id);
+CREATE INDEX "Content_represents" ON "Content" (represents_id);
+
+COMMENT ON TABLE "Content" IS 'A unit of content';
+COMMENT ON COLUMN "Content".source_local_id IS 'The unique identifier of the content in the remote source';
+COMMENT ON COLUMN "Content".author_id IS 'The author of content';
+COMMENT ON COLUMN "Content".creator_id IS 'The creator of a logical structure, such as a content subdivision';
+COMMENT ON COLUMN "Content".created IS 'The time when the content was created in the remote source';
+COMMENT ON COLUMN "Content".space_id IS 'The space in which the content is located';
+COMMENT ON COLUMN "Content".last_modified IS 'The last time the content was modified in the remote source';
+COMMENT ON COLUMN "Content".last_synced IS 'The last time the content was synced with the remote source';
+COMMENT ON COLUMN "Content".part_of_id IS 'This content is part of a larger content unit';
+COMMENT ON COLUMN "Content".represents_id IS 'This content explicitly represents a concept';
+
+
+CREATE TABLE concept_contributors (
+ concept_id BIGINT,
+ contributor_id BIGINT,
+ PRIMARY KEY (concept_id, contributor_id),
+ FOREIGN KEY(concept_id) REFERENCES "Concept" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+
+CREATE TABLE "ContentEmbedding_openai_text_embedding_3_small_1536" (
+ target_id BIGINT NOT NULL,
+ model "EmbeddingName" NOT NULL DEFAULT 'openai_text_embedding_3_small_1536',
+ vector extensions.vector(1536) NOT NULL,
+ obsolete BOOLEAN DEFAULT FALSE,
+ PRIMARY KEY (target_id),
+ FOREIGN KEY(target_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+
+CREATE TABLE content_contributors (
+ content_id BIGINT,
+ contributor_id BIGINT,
+ PRIMARY KEY (content_id, contributor_id),
+ FOREIGN KEY(content_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE,
+ FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE
+);
diff --git a/packages/database/supabase/migrations/20250506174523_content_idx_id.sql b/packages/database/supabase/migrations/20250506174523_content_idx_id.sql
new file mode 100644
index 000000000..8e9ddfc55
--- /dev/null
+++ b/packages/database/supabase/migrations/20250506174523_content_idx_id.sql
@@ -0,0 +1,2 @@
+CREATE UNIQUE INDEX "Content_space_and_id" ON "Content" (space_id, source_local_id) WHERE
+source_local_id IS NOT NULL;
diff --git a/packages/database/supabase/migrations/20250512142307_sync_table.sql b/packages/database/supabase/migrations/20250512142307_sync_table.sql
new file mode 100644
index 000000000..51a7b7a14
--- /dev/null
+++ b/packages/database/supabase/migrations/20250512142307_sync_table.sql
@@ -0,0 +1,107 @@
+CREATE TYPE task_status AS ENUM ('active', 'timeout', 'complete', 'failed');
+
+CREATE TABLE sync_info (
+ id SERIAL PRIMARY KEY,
+ sync_target BIGINT,
+ sync_function VARCHAR(20),
+ status task_status DEFAULT 'active',
+ worker varchar(100) NOT NULL,
+ failure_count SMALLINT DEFAULT 0,
+ last_task_start TIMESTAMP WITH TIME ZONE,
+ last_task_end TIMESTAMP WITH TIME ZONE,
+ task_times_out_at TIMESTAMP WITH TIME ZONE
+);
+
+CREATE UNIQUE INDEX sync_info_u_idx on sync_info (sync_target, sync_function);
+
+CREATE OR REPLACE FUNCTION propose_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), timeout INTERVAL, task_interval INTERVAL)
+ RETURNS INTERVAL AS $$
+DECLARE s_id INTEGER;
+DECLARE timeout_as TIMESTAMP WITH TIME ZONE;
+DECLARE start_time TIMESTAMP WITH TIME ZONE;
+DECLARE t_worker VARCHAR;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE;
+DECLARE result INTERVAL = NULL;
+BEGIN
+ ASSERT timeout * 2 < task_interval;
+ ASSERT timeout >= '1s'::interval;
+ ASSERT task_interval >= '5s'::interval;
+ start_time := now();
+ INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at)
+ VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout)
+ ON CONFLICT DO NOTHING
+ RETURNING id INTO s_id;
+ -- zut il renvoie null...
+ IF s_id IS NOT NULL THEN
+ -- totally new_row, I'm on the task
+ RETURN NULL;
+ END IF;
+ -- now we know it pre-existed. Maybe already active.
+ SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ PERFORM pg_advisory_lock(s_id);
+ SELECT worker, status, failure_count, last_task_start, last_task_end, task_times_out_at
+ INTO t_worker, t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at
+ FROM sync_info
+ WHERE id = s_id;
+
+ IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN
+ t_status := 'timeout';
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ -- basic backoff
+ task_interval := task_interval * (1+t_failure_count);
+ IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN
+ -- we are ready to take on the task
+ UPDATE sync_info
+ SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count
+ WHERE id=s_id;
+ ELSE
+ -- the task has been tried recently enough
+ IF t_status = 'timeout' THEN
+ UPDATE sync_info
+ SET status=t_status, failure_count=t_failure_count
+ WHERE id=s_id;
+ END IF;
+ result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now();
+ END IF;
+
+ PERFORM pg_advisory_unlock(s_id);
+ RETURN result;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION end_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), s_status task_status) RETURNS VOID AS $$
+DECLARE t_id INTEGER;
+DECLARE t_target varchar;
+DECLARE t_worker varchar;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+BEGIN
+ SELECT id, worker, status, failure_count, last_task_end
+ INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end
+ FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ ASSERT s_status > 'active';
+ ASSERT t_worker = s_worker, "Wrong worker";
+ ASSERT s_status >= t_status, "do not go back in status";
+ IF s_status = 'complete' THEN
+ t_last_task_end := now();
+ t_failure_count := 0;
+ ELSE
+ IF t_status != s_status THEN
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ END IF;
+
+ UPDATE sync_info
+ SET status = s_status,
+ task_times_out_at=null,
+ last_task_end=t_last_task_end,
+ failure_count=t_failure_count
+ WHERE id=t_id;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/packages/database/supabase/migrations/20250513173724_content_concept_key.sql b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql
new file mode 100644
index 000000000..2891c3ac2
--- /dev/null
+++ b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql
@@ -0,0 +1,91 @@
+-- rename constraint
+
+alter table "public"."AutomatedAgent" drop constraint "person_id_fkey";
+
+alter table "public"."AutomatedAgent" add constraint "automated_agent_id_fkey" FOREIGN KEY (id) REFERENCES "Agent"(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+-- now handled by sync_table
+
+alter table "public"."Concept" drop column "last_synced";
+
+alter table "public"."Content" drop column "last_synced";
+
+alter table "public"."Document" drop column "last_synced";
+
+-- transfer of column
+
+alter table "public"."Concept" add column "represented_by_id" bigint;
+
+alter table "public"."Concept" add constraint "Concept_represented_by_id_fkey" FOREIGN KEY (represented_by_id) REFERENCES "Content"(id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" (represented_by_id);
+
+-- transfer data
+
+UPDATE public."Concept" SET represented_by_id = public."Content".id
+ FROM public."Content"
+ WHERE public."Concept".id=represents_id;
+
+-- drop the Content column
+
+alter table "public"."Content" drop constraint "Content_represents_id_fkey";
+
+drop index if exists "public"."Content_represents";
+
+alter table "public"."Content" drop column "represents_id";
+
+-- Content embedding functions
+
+set check_function_bodies = off;
+
+-- strangely the check fails to interpret <=>, despite the vector extension being installed.
+
+CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer)
+ RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision)
+ LANGUAGE sql
+ STABLE
+AS $function$
+SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ 1 - (ce.vector <=> query_embedding) AS similarity
+FROM "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce
+JOIN "public"."Content" AS c ON ce.target_id = c.id
+WHERE 1 - (ce.vector <=> query_embedding) > match_threshold
+ AND ce.obsolete = FALSE
+ORDER BY
+ ce.vector <=> query_embedding ASC
+LIMIT match_count;
+$function$
+;
+
+CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[])
+ RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision)
+ LANGUAGE sql
+ STABLE
+AS $function$
+WITH subset_content_with_embeddings AS (
+ -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset
+ SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ ce.vector AS embedding_vector
+ FROM "public"."Content" AS c
+ JOIN "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id
+ WHERE
+ c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs
+ AND ce.obsolete = FALSE
+)
+SELECT
+ ss_ce.content_id,
+ ss_ce.roam_uid,
+ ss_ce.text_content,
+ 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity
+FROM subset_content_with_embeddings AS ss_ce
+ORDER BY similarity DESC; -- Order by calculated similarity, highest first
+$function$
+;
+
+set check_function_bodies = on;
diff --git a/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql
new file mode 100644
index 000000000..18b0b242e
--- /dev/null
+++ b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql
@@ -0,0 +1,95 @@
+CREATE OR REPLACE FUNCTION public.end_sync_task(s_target bigint, s_function character varying, s_worker character varying, s_status task_status)
+ RETURNS void
+ LANGUAGE plpgsql
+AS $function$
+DECLARE t_id INTEGER;
+DECLARE t_worker varchar;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+BEGIN
+ SELECT id, worker, status, failure_count, last_task_end
+ INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end
+ FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ ASSERT s_status > 'active';
+ ASSERT t_worker = s_worker, 'Wrong worker';
+ ASSERT s_status >= t_status, 'do not go back in status';
+ IF s_status = 'complete' THEN
+ t_last_task_end := now();
+ t_failure_count := 0;
+ ELSE
+ IF t_status != s_status THEN
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ END IF;
+
+ UPDATE sync_info
+ SET status = s_status,
+ task_times_out_at=null,
+ last_task_end=t_last_task_end,
+ failure_count=t_failure_count
+ WHERE id=t_id;
+END;
+$function$
+;
+
+CREATE OR REPLACE FUNCTION public.propose_sync_task(s_target bigint, s_function character varying, s_worker character varying, timeout interval, task_interval interval)
+ RETURNS interval
+ LANGUAGE plpgsql
+AS $function$
+DECLARE s_id INTEGER;
+DECLARE start_time TIMESTAMP WITH TIME ZONE;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE;
+DECLARE result INTERVAL = NULL;
+BEGIN
+ ASSERT timeout * 2 < task_interval;
+ ASSERT timeout >= '1s'::interval;
+ ASSERT task_interval >= '5s'::interval;
+ start_time := now();
+ INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at)
+ VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout)
+ ON CONFLICT DO NOTHING
+ RETURNING id INTO s_id;
+ -- zut il renvoie null...
+ IF s_id IS NOT NULL THEN
+ -- totally new_row, I'm on the task
+ RETURN NULL;
+ END IF;
+ -- now we know it pre-existed. Maybe already active.
+ SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ PERFORM pg_advisory_lock(s_id);
+ SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at
+ INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at
+ FROM sync_info
+ WHERE id = s_id;
+
+ IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN
+ t_status := 'timeout';
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ -- basic backoff
+ task_interval := task_interval * (1+t_failure_count);
+ IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN
+ -- we are ready to take on the task
+ UPDATE sync_info
+ SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count
+ WHERE id=s_id;
+ ELSE
+ -- the task has been tried recently enough
+ IF t_status = 'timeout' THEN
+ UPDATE sync_info
+ SET status=t_status, failure_count=t_failure_count
+ WHERE id=s_id;
+ END IF;
+ result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now();
+ END IF;
+
+ PERFORM pg_advisory_unlock(s_id);
+ RETURN result;
+END;
+$function$
+;
diff --git a/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql
new file mode 100644
index 000000000..1e9b32a95
--- /dev/null
+++ b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql
@@ -0,0 +1,48 @@
+CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer)
+ RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision)
+ LANGUAGE sql
+ STABLE
+AS $function$
+SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ 1 - (ce.vector <=> query_embedding) AS similarity
+FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce
+JOIN public."Content" AS c ON ce.target_id = c.id
+WHERE 1 - (ce.vector <=> query_embedding) > match_threshold
+ AND ce.obsolete = FALSE
+ AND (current_document_id IS NULL OR c.document_id = current_document_id)
+ORDER BY
+ ce.vector <=> query_embedding ASC
+LIMIT match_count;
+$function$;
+
+-- Supabase wants to replace this function for no obvious reason. Letting it.
+
+CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[])
+ RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision)
+ LANGUAGE sql
+ STABLE
+AS $function$
+WITH subset_content_with_embeddings AS (
+ -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset
+ SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ ce.vector AS embedding_vector
+ FROM public."Content" AS c
+ JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id
+ WHERE
+ c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs
+ AND ce.obsolete = FALSE
+)
+SELECT
+ ss_ce.content_id,
+ ss_ce.roam_uid,
+ ss_ce.text_content,
+ 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity
+FROM subset_content_with_embeddings AS ss_ce
+ORDER BY similarity DESC; -- Order by calculated similarity, highest first
+$function$;
diff --git a/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql
new file mode 100644
index 000000000..ec23fd887
--- /dev/null
+++ b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql
@@ -0,0 +1,38 @@
+CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb)
+ RETURNS TABLE(uid_to_sync text)
+ LANGUAGE plpgsql
+AS $function$
+ DECLARE
+ node_info jsonb;
+ roam_node_uid TEXT;
+ roam_node_edit_epoch_ms BIGINT;
+ content_db_last_modified_epoch_ms BIGINT;
+ BEGIN
+ FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam)
+ LOOP
+ roam_node_uid := (node_info->>'uid')::text;
+ roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint;
+
+ -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds
+ -- Assumes your 'last_modified' column in 'Content' is a timestamp type
+ SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000
+ INTO content_db_last_modified_epoch_ms
+ FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted)
+ WHERE c.source_local_id = roam_node_uid;
+
+ IF NOT FOUND THEN
+ -- Node does not exist in Supabase Content table, so it needs sync
+ uid_to_sync := roam_node_uid;
+ RETURN NEXT;
+ ELSE
+ -- Node exists, compare timestamps
+ IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN
+ uid_to_sync := roam_node_uid;
+ RETURN NEXT;
+ END IF;
+ END IF;
+ END LOOP;
+ RETURN;
+ END;
+ $function$
+;
diff --git a/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql
new file mode 100644
index 000000000..dcc2e7851
--- /dev/null
+++ b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql
@@ -0,0 +1,11 @@
+ALTER TABLE public."DiscoursePlatform" RENAME TO "Platform";
+ALTER TABLE public."Platform" RENAME CONSTRAINT "DiscoursePlatform_pkey" TO "Platform_pkey";
+
+ALTER TABLE public."DiscourseSpace" RENAME TO "Space";
+ALTER TABLE public."Space" RENAME CONSTRAINT "DiscourseSpace_pkey" TO "Space_pkey";
+ALTER TABLE public."Space" RENAME COLUMN discourse_platform_id TO platform_id;
+ALTER TABLE PUBLIC."Space" RENAME CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" TO "Space_platform_id_fkey";
+
+COMMENT ON TABLE public."Space" IS
+'A space on a platform representing a community engaged in a conversation';
+COMMENT ON TABLE public."Account" IS 'A user account on a platform';
diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql
new file mode 100644
index 000000000..9914188d9
--- /dev/null
+++ b/packages/database/supabase/schemas/account.sql
@@ -0,0 +1,76 @@
+CREATE TABLE IF NOT EXISTS public."Account" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ platform_id bigint NOT NULL,
+ person_id bigint NOT NULL,
+ write_permission boolean NOT NULL,
+ active boolean DEFAULT true NOT NULL
+);
+
+ALTER TABLE public."Account" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Account" IS 'A user account on a platform';
+
+
+ALTER TABLE ONLY public."Account"
+ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY (
+ person_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public."Account"
+ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY (
+ platform_id
+) REFERENCES public."Platform" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public."Account"
+ADD CONSTRAINT "Account_pkey" PRIMARY KEY (id);
+
+
+CREATE TABLE IF NOT EXISTS public."SpaceAccess" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ space_id bigint,
+ account_id bigint NOT NULL,
+ editor boolean NOT NULL
+);
+
+ALTER TABLE ONLY public."SpaceAccess"
+ADD CONSTRAINT "SpaceAccess_account_id_space_id_key" UNIQUE (
+ account_id, space_id
+);
+
+ALTER TABLE ONLY public."SpaceAccess"
+ADD CONSTRAINT "SpaceAccess_pkey" PRIMARY KEY (id);
+
+
+ALTER TABLE public."SpaceAccess" OWNER TO "postgres";
+
+COMMENT ON TABLE public."SpaceAccess" IS 'An access control entry for a space';
+
+COMMENT ON COLUMN public."SpaceAccess".space_id IS 'The space in which the content is located';
+
+
+ALTER TABLE ONLY public."SpaceAccess"
+ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY (
+ account_id
+) REFERENCES public."Account" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public."SpaceAccess"
+ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY (
+ space_id
+) REFERENCES public."Space" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+GRANT ALL ON TABLE public."SpaceAccess" TO anon;
+GRANT ALL ON TABLE public."SpaceAccess" TO authenticated;
+GRANT ALL ON TABLE public."SpaceAccess" TO service_role;
+
+
+GRANT ALL ON TABLE public."Account" TO anon;
+GRANT ALL ON TABLE public."Account" TO authenticated;
+GRANT ALL ON TABLE public."Account" TO service_role;
diff --git a/packages/database/supabase/schemas/agent.sql b/packages/database/supabase/schemas/agent.sql
new file mode 100644
index 000000000..ecbf3f511
--- /dev/null
+++ b/packages/database/supabase/schemas/agent.sql
@@ -0,0 +1,68 @@
+CREATE TABLE IF NOT EXISTS public."Agent" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ type public."EntityType" NOT NULL
+);
+
+
+ALTER TABLE ONLY public."Agent"
+ADD CONSTRAINT "Agent_pkey" PRIMARY KEY (id);
+
+ALTER TABLE public."Agent" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Agent" IS 'An agent that acts in the system';
+
+CREATE TABLE IF NOT EXISTS public."AutomatedAgent" (
+ id bigint NOT NULL,
+ name character varying NOT NULL,
+ metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
+ deterministic boolean DEFAULT false,
+ version character varying
+);
+
+ALTER TABLE ONLY public."AutomatedAgent"
+ADD CONSTRAINT "AutomatedAgent_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."AutomatedAgent"
+ADD CONSTRAINT automated_agent_id_fkey FOREIGN KEY (
+ id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+ALTER TABLE public."AutomatedAgent" OWNER TO "postgres";
+
+COMMENT ON TABLE public."AutomatedAgent" IS 'An automated agent';
+
+CREATE TABLE IF NOT EXISTS public."Person" (
+ id bigint NOT NULL,
+ name character varying NOT NULL,
+ orcid character varying(20),
+ email character varying NOT NULL
+);
+
+ALTER TABLE ONLY public."Person"
+ADD CONSTRAINT "Person_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Person"
+ADD CONSTRAINT person_id_fkey FOREIGN KEY (
+ id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+ALTER TABLE public."Person" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Person" IS 'A person using the system';
+
+
+GRANT ALL ON TABLE public."Agent" TO anon;
+GRANT ALL ON TABLE public."Agent" TO authenticated;
+GRANT ALL ON TABLE public."Agent" TO service_role;
+
+GRANT ALL ON TABLE public."AutomatedAgent" TO anon;
+GRANT ALL ON TABLE public."AutomatedAgent" TO authenticated;
+GRANT ALL ON TABLE public."AutomatedAgent" TO service_role;
+
+GRANT ALL ON TABLE public."Person" TO anon;
+GRANT ALL ON TABLE public."Person" TO authenticated;
+GRANT ALL ON TABLE public."Person" TO service_role;
diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql
new file mode 100644
index 000000000..64947f9e2
--- /dev/null
+++ b/packages/database/supabase/schemas/base.sql
@@ -0,0 +1,65 @@
+SET statement_timeout = 0;
+SET lock_timeout = 0;
+SET idle_in_transaction_session_timeout = 0;
+SET client_encoding = 'UTF8';
+SET standard_conforming_strings = on;
+SELECT pg_catalog.set_config('search_path', '', false);
+SET check_function_bodies = true;
+SET xmloption = content;
+SET client_min_messages = warning;
+SET row_security = on;
+SET default_tablespace = '';
+SET default_table_access_method = heap;
+
+COMMENT ON SCHEMA public IS 'standard public schema';
+
+
+ALTER PUBLICATION supabase_realtime OWNER TO postgres;
+
+GRANT USAGE ON SCHEMA public TO postgres;
+GRANT USAGE ON SCHEMA public TO anon;
+GRANT USAGE ON SCHEMA public TO authenticated;
+GRANT USAGE ON SCHEMA public TO service_role;
+
+CREATE TYPE public."EntityType" AS ENUM (
+ 'Platform',
+ 'Space',
+ 'Account',
+ 'Person',
+ 'AutomatedAgent',
+ 'Document',
+ 'Content',
+ 'Concept',
+ 'ConceptSchema',
+ 'ContentLink',
+ 'Occurrence'
+);
+
+ALTER TYPE public."EntityType" OWNER TO postgres;
+
+CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq
+START WITH 1
+INCREMENT BY 1
+NO MINVALUE
+NO MAXVALUE
+CACHE 1;
+
+ALTER SEQUENCE public.entity_id_seq OWNER TO "postgres";
+
+
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO postgres;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO anon;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO authenticated;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO service_role;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO postgres;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO anon;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO authenticated;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO service_role;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO postgres;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO anon;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO authenticated;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO service_role;
+
+GRANT ALL ON SEQUENCE public.entity_id_seq TO anon;
+GRANT ALL ON SEQUENCE public.entity_id_seq TO authenticated;
+GRANT ALL ON SEQUENCE public.entity_id_seq TO service_role;
diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql
new file mode 100644
index 000000000..dbbc3686e
--- /dev/null
+++ b/packages/database/supabase/schemas/concept.sql
@@ -0,0 +1,88 @@
+CREATE TYPE public."EpistemicStatus" AS ENUM (
+ 'certainly_not',
+ 'strong_evidence_against',
+ 'could_be_false',
+ 'unknown',
+ 'uncertain',
+ 'contentious',
+ 'could_be_true',
+ 'strong_evidence_for',
+ 'certain'
+);
+
+ALTER TYPE public."EpistemicStatus" OWNER TO postgres;
+
+
+CREATE TABLE IF NOT EXISTS public."Concept" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ epistemic_status public."EpistemicStatus" DEFAULT 'unknown'::public."EpistemicStatus" NOT NULL,
+ name character varying NOT NULL,
+ description text,
+ author_id bigint,
+ created timestamp without time zone NOT NULL,
+ last_modified timestamp without time zone NOT NULL,
+ space_id bigint,
+ arity smallint DEFAULT 0 NOT NULL,
+ schema_id bigint,
+ content jsonb DEFAULT '{}'::jsonb NOT NULL,
+ is_schema boolean DEFAULT false NOT NULL,
+ represented_by_id bigint
+);
+
+ALTER TABLE public."Concept" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Concept" IS 'An abstract concept, claim or relation';
+
+COMMENT ON COLUMN public."Concept".author_id IS 'The author of content';
+
+COMMENT ON COLUMN public."Concept".created IS 'The time when the content was created in the remote source';
+
+COMMENT ON COLUMN public."Concept".last_modified IS 'The last time the content was modified in the remote source';
+
+COMMENT ON COLUMN public."Concept".space_id IS 'The space in which the content is located';
+
+
+ALTER TABLE ONLY public."Concept"
+ADD CONSTRAINT "Concept_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Concept"
+ADD FOREIGN KEY (represented_by_id) REFERENCES public."Content" (
+ id
+) ON DELETE SET NULL ON UPDATE CASCADE;
+
+CREATE INDEX "Concept_content" ON public."Concept" USING gin (
+ content jsonb_path_ops
+);
+
+CREATE INDEX "Concept_schema" ON public."Concept" USING btree (schema_id);
+
+CREATE INDEX "Concept_space" ON public."Concept" USING btree (space_id);
+
+CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" (
+ represented_by_id
+);
+
+
+ALTER TABLE ONLY public."Concept"
+ADD CONSTRAINT "Concept_author_id_fkey" FOREIGN KEY (
+ author_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+ALTER TABLE ONLY public."Concept"
+ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY (
+ schema_id
+) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+ALTER TABLE ONLY public."Concept"
+ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY (
+ space_id
+) REFERENCES public."Space" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+GRANT ALL ON TABLE public."Concept" TO anon;
+GRANT ALL ON TABLE public."Concept" TO authenticated;
+GRANT ALL ON TABLE public."Concept" TO service_role;
diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql
new file mode 100644
index 000000000..13478f5f8
--- /dev/null
+++ b/packages/database/supabase/schemas/content.sql
@@ -0,0 +1,148 @@
+CREATE TYPE public."Scale" AS ENUM (
+ 'document',
+ 'post',
+ 'chunk_unit',
+ 'section',
+ 'block',
+ 'field',
+ 'paragraph',
+ 'quote',
+ 'sentence',
+ 'phrase'
+);
+
+ALTER TYPE public."Scale" OWNER TO postgres;
+
+CREATE TABLE IF NOT EXISTS public."Document" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ space_id bigint,
+ source_local_id character varying,
+ url character varying,
+ "created" timestamp without time zone NOT NULL,
+ metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
+ last_modified timestamp without time zone NOT NULL,
+ author_id bigint NOT NULL,
+ contents oid
+);
+
+ALTER TABLE ONLY public."Document"
+ADD CONSTRAINT "Document_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Document"
+ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY (
+ author_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public."Document"
+ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY (
+ space_id
+) REFERENCES public."Space" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE public."Document" OWNER TO "postgres";
+
+COMMENT ON COLUMN public."Document".space_id IS 'The space in which the content is located';
+
+COMMENT ON COLUMN public."Document".source_local_id IS 'The unique identifier of the content in the remote source';
+
+COMMENT ON COLUMN public."Document".created IS 'The time when the content was created in the remote source';
+
+COMMENT ON COLUMN public."Document".last_modified IS 'The last time the content was modified in the remote source';
+
+COMMENT ON COLUMN public."Document".author_id IS 'The author of content';
+
+COMMENT ON COLUMN public."Document".contents IS 'A large object OID for the downloaded raw content';
+
+
+CREATE TABLE IF NOT EXISTS public."Content" (
+ id bigint DEFAULT nextval(
+ 'public.entity_id_seq'::regclass
+ ) NOT NULL,
+ document_id bigint NOT NULL,
+ source_local_id character varying,
+ author_id bigint,
+ creator_id bigint,
+ created timestamp without time zone NOT NULL,
+ text text NOT NULL,
+ metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
+ scale public."Scale" NOT NULL,
+ space_id bigint,
+ last_modified timestamp without time zone NOT NULL,
+ part_of_id bigint
+);
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_author_id_fkey" FOREIGN KEY (
+ author_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_creator_id_fkey" FOREIGN KEY (
+ creator_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_document_id_fkey" FOREIGN KEY (
+ document_id
+) REFERENCES public."Document" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY (
+ part_of_id
+) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE SET NULL;
+
+ALTER TABLE ONLY public."Content"
+ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY (
+ space_id
+) REFERENCES public."Space" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+CREATE INDEX "Content_document" ON public."Content" USING btree (
+ document_id
+);
+
+CREATE INDEX "Content_part_of" ON public."Content" USING btree (
+ part_of_id
+);
+
+CREATE INDEX "Content_space" ON public."Content" USING btree (space_id);
+
+CREATE UNIQUE INDEX "Content_space_and_id" ON public."Content" USING btree (
+ space_id, source_local_id
+) WHERE (source_local_id IS NOT NULL);
+
+CREATE INDEX "Content_text" ON public."Content" USING pgroonga (text);
+
+ALTER TABLE public."Content" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Content" IS 'A unit of content';
+
+COMMENT ON COLUMN public."Content".source_local_id IS 'The unique identifier of the content in the remote source';
+
+COMMENT ON COLUMN public."Content".author_id IS 'The author of content';
+
+COMMENT ON COLUMN public."Content".creator_id IS 'The creator of a logical structure, such as a content subdivision';
+
+COMMENT ON COLUMN public."Content".created IS 'The time when the content was created in the remote source';
+
+COMMENT ON COLUMN public."Content".space_id IS 'The space in which the content is located';
+
+COMMENT ON COLUMN public."Content".last_modified IS 'The last time the content was modified in the remote source';
+
+COMMENT ON COLUMN public."Content".part_of_id IS 'This content is part of a larger content unit';
+
+
+GRANT ALL ON TABLE public."Document" TO anon;
+GRANT ALL ON TABLE public."Document" TO authenticated;
+GRANT ALL ON TABLE public."Document" TO service_role;
+
+GRANT ALL ON TABLE public."Content" TO anon;
+GRANT ALL ON TABLE public."Content" TO authenticated;
+GRANT ALL ON TABLE public."Content" TO service_role;
diff --git a/packages/database/supabase/schemas/contributor.sql b/packages/database/supabase/schemas/contributor.sql
new file mode 100644
index 000000000..f83b9a40a
--- /dev/null
+++ b/packages/database/supabase/schemas/contributor.sql
@@ -0,0 +1,52 @@
+CREATE TABLE IF NOT EXISTS public.content_contributors (
+ content_id bigint NOT NULL,
+ contributor_id bigint NOT NULL
+);
+
+ALTER TABLE ONLY public.content_contributors
+ADD CONSTRAINT content_contributors_pkey PRIMARY KEY (
+ content_id, contributor_id
+);
+
+ALTER TABLE ONLY public.content_contributors
+ADD CONSTRAINT content_contributors_content_id_fkey FOREIGN KEY (
+ content_id
+) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public.content_contributors
+ADD CONSTRAINT content_contributors_contributor_id_fkey FOREIGN KEY (
+ contributor_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE public.content_contributors OWNER TO "postgres";
+
+
+CREATE TABLE IF NOT EXISTS public.concept_contributors (
+ concept_id bigint NOT NULL,
+ contributor_id bigint NOT NULL
+);
+
+ALTER TABLE public.concept_contributors OWNER TO "postgres";
+
+ALTER TABLE ONLY public.concept_contributors
+ADD CONSTRAINT concept_contributors_concept_id_fkey FOREIGN KEY (
+ concept_id
+) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public.concept_contributors
+ADD CONSTRAINT concept_contributors_contributor_id_fkey FOREIGN KEY (
+ contributor_id
+) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+ALTER TABLE ONLY public.concept_contributors
+ADD CONSTRAINT concept_contributors_pkey PRIMARY KEY (
+ concept_id, contributor_id
+);
+
+GRANT ALL ON TABLE public.concept_contributors TO anon;
+GRANT ALL ON TABLE public.concept_contributors TO authenticated;
+GRANT ALL ON TABLE public.concept_contributors TO service_role;
+
+GRANT ALL ON TABLE public.content_contributors TO anon;
+GRANT ALL ON TABLE public.content_contributors TO authenticated;
+GRANT ALL ON TABLE public.content_contributors TO service_role;
diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql
new file mode 100644
index 000000000..47a51c371
--- /dev/null
+++ b/packages/database/supabase/schemas/embedding.sql
@@ -0,0 +1,101 @@
+CREATE TYPE public."EmbeddingName" AS ENUM (
+ 'openai_text_embedding_ada2_1536',
+ 'openai_text_embedding_3_small_512',
+ 'openai_text_embedding_3_small_1536',
+ 'openai_text_embedding_3_large_256',
+ 'openai_text_embedding_3_large_1024',
+ 'openai_text_embedding_3_large_3072'
+);
+
+ALTER TYPE public."EmbeddingName" OWNER TO "postgres";
+
+CREATE TABLE IF NOT EXISTS public."ContentEmbedding_openai_text_embedding_3_small_1536" (
+target_id bigint NOT NULL,
+"model" public."EmbeddingName" DEFAULT 'openai_text_embedding_3_small_1536'::public."EmbeddingName" NOT NULL,
+"vector" extensions.vector (1536) NOT NULL,
+obsolete boolean DEFAULT false
+) ;
+
+ALTER TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" OWNER TO "postgres" ;
+
+ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536"
+ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1536_pkey" PRIMARY KEY (target_id) ;
+
+ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536"
+ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" FOREIGN KEY (target_id) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE ;
+
+GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "anon" ;
+GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated" ;
+GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role" ;
+
+set search_path to public, extensions ;
+
+CREATE OR REPLACE FUNCTION public.match_content_embeddings (
+query_embedding extensions.vector,
+match_threshold double precision,
+match_count integer,
+current_document_id integer DEFAULT NULL::integer)
+RETURNS TABLE (
+content_id bigint,
+roam_uid Text,
+text_content Text,
+similarity double precision)
+LANGUAGE sql STABLE
+AS $$
+SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ 1 - (ce.vector <=> query_embedding) AS similarity
+FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce
+JOIN public."Content" AS c ON ce.target_id = c.id
+WHERE 1 - (ce.vector <=> query_embedding) > match_threshold
+ AND ce.obsolete = FALSE
+ AND (current_document_id IS NULL OR c.document_id = current_document_id)
+ORDER BY
+ ce.vector <=> query_embedding ASC
+LIMIT match_count;
+$$ ;
+
+ALTER FUNCTION public.match_content_embeddings (
+query_embedding extensions.vector,
+match_threshold double precision,
+match_count integer,
+current_document_id integer) OWNER TO "postgres" ;
+
+CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes (
+"p_query_embedding" extensions.vector,
+"p_subset_roam_uids" Text [])
+RETURNS TABLE (content_id bigint,
+roam_uid Text,
+text_content Text,
+similarity double precision)
+LANGUAGE sql STABLE
+AS $$
+WITH subset_content_with_embeddings AS (
+ -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset
+ SELECT
+ c.id AS content_id,
+ c.source_local_id AS roam_uid,
+ c.text AS text_content,
+ ce.vector AS embedding_vector
+ FROM public."Content" AS c
+ JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id
+ WHERE
+ c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs
+ AND ce.obsolete = FALSE
+)
+SELECT
+ ss_ce.content_id,
+ ss_ce.roam_uid,
+ ss_ce.text_content,
+ 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity
+FROM subset_content_with_embeddings AS ss_ce
+ORDER BY similarity DESC; -- Order by calculated similarity, highest first
+$$ ;
+
+ALTER FUNCTION public.match_embeddings_for_subset_nodes (
+"p_query_embedding" extensions.vector, "p_subset_roam_uids" Text [])
+OWNER TO "postgres" ;
+
+RESET ALL;
diff --git a/packages/database/supabase/schemas/extensions.sql b/packages/database/supabase/schemas/extensions.sql
new file mode 100644
index 000000000..83ccd84bb
--- /dev/null
+++ b/packages/database/supabase/schemas/extensions.sql
@@ -0,0 +1,15 @@
+CREATE SCHEMA IF NOT EXISTS extensions;
+CREATE SCHEMA IF NOT EXISTS graphql;
+CREATE SCHEMA IF NOT EXISTS vault;
+
+CREATE EXTENSION IF NOT EXISTS pg_cron WITH SCHEMA pg_catalog;
+CREATE EXTENSION IF NOT EXISTS pgroonga WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS pg_graphql WITH SCHEMA graphql;
+CREATE EXTENSION IF NOT EXISTS pg_jsonschema WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS pg_stat_monitor WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS pgjwt WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS supabase_vault WITH SCHEMA vault;
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA extensions;
+CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA extensions;
diff --git a/packages/database/supabase/schemas/space.sql b/packages/database/supabase/schemas/space.sql
new file mode 100644
index 000000000..122cdd023
--- /dev/null
+++ b/packages/database/supabase/schemas/space.sql
@@ -0,0 +1,47 @@
+CREATE TABLE IF NOT EXISTS public."Platform" (
+ id bigint DEFAULT nextval(
+ 'public."entity_id_seq"'::regclass
+ ) NOT NULL,
+ name character varying NOT NULL,
+ url character varying NOT NULL
+);
+
+ALTER TABLE public."Platform" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Platform" IS
+'A data platform where discourse happens';
+
+CREATE TABLE IF NOT EXISTS public."Space" (
+ id bigint DEFAULT nextval(
+ 'public."entity_id_seq"'::regclass
+ ) NOT NULL,
+ url character varying,
+ name character varying NOT NULL,
+ platform_id bigint NOT NULL
+);
+
+ALTER TABLE public."Space" OWNER TO "postgres";
+
+COMMENT ON TABLE public."Space" IS
+'A space on a platform representing a community engaged in a conversation';
+
+ALTER TABLE ONLY public."Platform"
+ADD CONSTRAINT "Platform_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Space"
+ADD CONSTRAINT "Space_pkey" PRIMARY KEY (id);
+
+ALTER TABLE ONLY public."Space"
+ADD CONSTRAINT "Space_platform_id_fkey" FOREIGN KEY (
+ platform_id
+) REFERENCES public."Platform" (
+ id
+) ON UPDATE CASCADE ON DELETE CASCADE;
+
+GRANT ALL ON TABLE public."Platform" TO anon;
+GRANT ALL ON TABLE public."Platform" TO authenticated;
+GRANT ALL ON TABLE public."Platform" TO service_role;
+
+GRANT ALL ON TABLE public."Space" TO anon;
+GRANT ALL ON TABLE public."Space" TO authenticated;
+GRANT ALL ON TABLE public."Space" TO service_role;
diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql
new file mode 100644
index 000000000..f5bbba07a
--- /dev/null
+++ b/packages/database/supabase/schemas/sync.sql
@@ -0,0 +1,261 @@
+CREATE TYPE public.task_status AS ENUM (
+ 'active',
+ 'timeout',
+ 'complete',
+ 'failed'
+);
+
+ALTER TYPE public.task_status OWNER TO "postgres";
+
+CREATE TABLE IF NOT EXISTS public.sync_info (
+ id integer NOT NULL,
+ sync_target bigint,
+ sync_function character varying(20),
+ status public.task_status DEFAULT 'active'::public.task_status,
+ worker character varying(100) NOT NULL,
+ failure_count smallint DEFAULT 0,
+ last_task_start timestamp with time zone,
+ last_task_end timestamp with time zone,
+ task_times_out_at timestamp with time zone
+);
+
+ALTER TABLE public.sync_info OWNER TO "postgres";
+
+CREATE SEQUENCE IF NOT EXISTS public.sync_info_id_seq
+AS integer
+START WITH 1
+INCREMENT BY 1
+NO MINVALUE
+NO MAXVALUE
+CACHE 1;
+
+ALTER TABLE public.sync_info_id_seq OWNER TO "postgres";
+
+ALTER SEQUENCE public.sync_info_id_seq OWNED BY public.sync_info.id;
+
+ALTER TABLE ONLY public.sync_info ALTER COLUMN id SET DEFAULT nextval(
+ 'public.sync_info_id_seq'::regclass
+);
+
+ALTER TABLE ONLY public.sync_info
+ADD CONSTRAINT sync_info_pkey PRIMARY KEY (id);
+
+CREATE UNIQUE INDEX sync_info_u_idx ON public.sync_info USING btree (
+ "sync_target", sync_function
+);
+
+set search_path to public, extensions ;
+
+
+CREATE OR REPLACE FUNCTION public.end_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ s_status public.task_status
+) RETURNS void
+LANGUAGE plpgsql
+AS $$
+DECLARE t_id INTEGER;
+DECLARE t_worker varchar;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+BEGIN
+ SELECT id, worker, status, failure_count, last_task_end
+ INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end
+ FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ ASSERT s_status > 'active';
+ ASSERT t_worker = s_worker, 'Wrong worker';
+ ASSERT s_status >= t_status, 'do not go back in status';
+ IF s_status = 'complete' THEN
+ t_last_task_end := now();
+ t_failure_count := 0;
+ ELSE
+ IF t_status != s_status THEN
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ END IF;
+
+ UPDATE sync_info
+ SET status = s_status,
+ task_times_out_at=null,
+ last_task_end=t_last_task_end,
+ failure_count=t_failure_count
+ WHERE id=t_id;
+END;
+$$;
+
+ALTER FUNCTION public.end_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ s_status public.task_status
+) OWNER TO "postgres";
+
+
+CREATE OR REPLACE FUNCTION public.propose_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ "timeout" interval,
+ "task_interval" interval
+) RETURNS interval
+LANGUAGE plpgsql
+AS $$
+DECLARE s_id INTEGER;
+DECLARE start_time TIMESTAMP WITH TIME ZONE;
+DECLARE t_status task_status;
+DECLARE t_failure_count SMALLINT;
+DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE;
+DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE;
+DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE;
+DECLARE result INTERVAL = NULL;
+BEGIN
+ ASSERT timeout * 2 < task_interval;
+ ASSERT timeout >= '1s'::interval;
+ ASSERT task_interval >= '5s'::interval;
+ start_time := now();
+ INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at)
+ VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout)
+ ON CONFLICT DO NOTHING
+ RETURNING id INTO s_id;
+ -- zut il renvoie null...
+ IF s_id IS NOT NULL THEN
+ -- totally new_row, I'm on the task
+ RETURN NULL;
+ END IF;
+ -- now we know it pre-existed. Maybe already active.
+ SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function;
+ PERFORM pg_advisory_lock(s_id);
+ SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at
+ INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at
+ FROM sync_info
+ WHERE id = s_id;
+
+ IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN
+ t_status := 'timeout';
+ t_failure_count := t_failure_count + 1;
+ END IF;
+ -- basic backoff
+ task_interval := task_interval * (1+t_failure_count);
+ IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN
+ -- we are ready to take on the task
+ UPDATE sync_info
+ SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count
+ WHERE id=s_id;
+ ELSE
+ -- the task has been tried recently enough
+ IF t_status = 'timeout' THEN
+ UPDATE sync_info
+ SET status=t_status, failure_count=t_failure_count
+ WHERE id=s_id;
+ END IF;
+ result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now();
+ END IF;
+
+ PERFORM pg_advisory_unlock(s_id);
+ RETURN result;
+END;
+$$;
+
+ALTER FUNCTION public.propose_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ "timeout" interval,
+ "task_interval" interval
+) OWNER TO "postgres";
+
+CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb)
+RETURNS TABLE (uid_to_sync text)
+LANGUAGE plpgsql
+AS $function$
+ DECLARE
+ node_info jsonb;
+ roam_node_uid TEXT;
+ roam_node_edit_epoch_ms BIGINT;
+ content_db_last_modified_epoch_ms BIGINT;
+ BEGIN
+ FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam)
+ LOOP
+ roam_node_uid := (node_info->>'uid')::text;
+ roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint;
+
+ -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds
+ -- Assumes your 'last_modified' column in 'Content' is a timestamp type
+ SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000
+ INTO content_db_last_modified_epoch_ms
+ FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted)
+ WHERE c.source_local_id = roam_node_uid;
+
+ IF NOT FOUND THEN
+ -- Node does not exist in Supabase Content table, so it needs sync
+ uid_to_sync := roam_node_uid;
+ RETURN NEXT;
+ ELSE
+ -- Node exists, compare timestamps
+ IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN
+ uid_to_sync := roam_node_uid;
+ RETURN NEXT;
+ END IF;
+ END IF;
+ END LOOP;
+ RETURN;
+ END;
+ $function$
+;
+
+GRANT ALL ON TABLE public.sync_info TO "anon";
+GRANT ALL ON TABLE public.sync_info TO "authenticated";
+GRANT ALL ON TABLE public.sync_info TO "service_role";
+
+GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "anon";
+GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "authenticated";
+GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "service_role";
+
+GRANT ALL ON FUNCTION public.end_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ s_status public.task_status
+) TO "anon";
+GRANT ALL ON FUNCTION public.end_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ s_status public.task_status
+) TO "authenticated";
+GRANT ALL ON FUNCTION public.end_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ s_status public.task_status
+) TO "service_role";
+
+GRANT ALL ON FUNCTION public.propose_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ "timeout" interval,
+ "task_interval" interval
+) TO "anon";
+GRANT ALL ON FUNCTION public.propose_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ "timeout" interval,
+ "task_interval" interval
+) TO "authenticated";
+GRANT ALL ON FUNCTION public.propose_sync_task(
+ s_target bigint,
+ s_function character varying,
+ s_worker character varying,
+ "timeout" interval,
+ "task_interval" interval
+) TO "service_role";
+
+GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "anon";
+GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "authenticated";
+GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "service_role";
+
+RESET ALL;
diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts
new file mode 100644
index 000000000..4c32dc33e
--- /dev/null
+++ b/packages/database/types.gen.ts
@@ -0,0 +1,797 @@
+export type Json =
+ | string
+ | number
+ | boolean
+ | null
+ | { [key: string]: Json | undefined }
+ | Json[]
+
+export type Database = {
+ public: {
+ Tables: {
+ Account: {
+ Row: {
+ active: boolean
+ id: number
+ person_id: number
+ platform_id: number
+ write_permission: boolean
+ }
+ Insert: {
+ active?: boolean
+ id?: number
+ person_id: number
+ platform_id: number
+ write_permission: boolean
+ }
+ Update: {
+ active?: boolean
+ id?: number
+ person_id?: number
+ platform_id?: number
+ write_permission?: boolean
+ }
+ Relationships: [
+ {
+ foreignKeyName: "Account_person_id_fkey"
+ columns: ["person_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Account_platform_id_fkey"
+ columns: ["platform_id"]
+ isOneToOne: false
+ referencedRelation: "DiscoursePlatform"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ Agent: {
+ Row: {
+ id: number
+ type: Database["public"]["Enums"]["EntityType"]
+ }
+ Insert: {
+ id?: number
+ type: Database["public"]["Enums"]["EntityType"]
+ }
+ Update: {
+ id?: number
+ type?: Database["public"]["Enums"]["EntityType"]
+ }
+ Relationships: []
+ }
+ AutomatedAgent: {
+ Row: {
+ deterministic: boolean | null
+ id: number
+ metadata: Json
+ name: string
+ version: string | null
+ }
+ Insert: {
+ deterministic?: boolean | null
+ id: number
+ metadata?: Json
+ name: string
+ version?: string | null
+ }
+ Update: {
+ deterministic?: boolean | null
+ id?: number
+ metadata?: Json
+ name?: string
+ version?: string | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "automated_agent_id_fkey"
+ columns: ["id"]
+ isOneToOne: true
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ Concept: {
+ Row: {
+ arity: number
+ author_id: number | null
+ content: Json
+ created: string
+ description: string | null
+ epistemic_status: Database["public"]["Enums"]["EpistemicStatus"]
+ id: number
+ is_schema: boolean
+ last_modified: string
+ name: string
+ represented_by_id: number | null
+ schema_id: number | null
+ space_id: number | null
+ }
+ Insert: {
+ arity?: number
+ author_id?: number | null
+ content?: Json
+ created: string
+ description?: string | null
+ epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"]
+ id?: number
+ is_schema?: boolean
+ last_modified: string
+ name: string
+ represented_by_id?: number | null
+ schema_id?: number | null
+ space_id?: number | null
+ }
+ Update: {
+ arity?: number
+ author_id?: number | null
+ content?: Json
+ created?: string
+ description?: string | null
+ epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"]
+ id?: number
+ is_schema?: boolean
+ last_modified?: string
+ name?: string
+ represented_by_id?: number | null
+ schema_id?: number | null
+ space_id?: number | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "Concept_author_id_fkey"
+ columns: ["author_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Concept_represented_by_id_fkey"
+ columns: ["represented_by_id"]
+ isOneToOne: false
+ referencedRelation: "Content"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Concept_schema_id_fkey"
+ columns: ["schema_id"]
+ isOneToOne: false
+ referencedRelation: "Concept"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Concept_space_id_fkey"
+ columns: ["space_id"]
+ isOneToOne: false
+ referencedRelation: "DiscourseSpace"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ concept_contributors: {
+ Row: {
+ concept_id: number
+ contributor_id: number
+ }
+ Insert: {
+ concept_id: number
+ contributor_id: number
+ }
+ Update: {
+ concept_id?: number
+ contributor_id?: number
+ }
+ Relationships: [
+ {
+ foreignKeyName: "concept_contributors_concept_id_fkey"
+ columns: ["concept_id"]
+ isOneToOne: false
+ referencedRelation: "Concept"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "concept_contributors_contributor_id_fkey"
+ columns: ["contributor_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ Content: {
+ Row: {
+ author_id: number | null
+ created: string
+ creator_id: number | null
+ document_id: number
+ id: number
+ last_modified: string
+ metadata: Json
+ part_of_id: number | null
+ scale: Database["public"]["Enums"]["Scale"]
+ source_local_id: string | null
+ space_id: number | null
+ text: string
+ }
+ Insert: {
+ author_id?: number | null
+ created: string
+ creator_id?: number | null
+ document_id: number
+ id?: number
+ last_modified: string
+ metadata?: Json
+ part_of_id?: number | null
+ scale: Database["public"]["Enums"]["Scale"]
+ source_local_id?: string | null
+ space_id?: number | null
+ text: string
+ }
+ Update: {
+ author_id?: number | null
+ created?: string
+ creator_id?: number | null
+ document_id?: number
+ id?: number
+ last_modified?: string
+ metadata?: Json
+ part_of_id?: number | null
+ scale?: Database["public"]["Enums"]["Scale"]
+ source_local_id?: string | null
+ space_id?: number | null
+ text?: string
+ }
+ Relationships: [
+ {
+ foreignKeyName: "Content_author_id_fkey"
+ columns: ["author_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Content_creator_id_fkey"
+ columns: ["creator_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Content_document_id_fkey"
+ columns: ["document_id"]
+ isOneToOne: false
+ referencedRelation: "Document"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Content_part_of_id_fkey"
+ columns: ["part_of_id"]
+ isOneToOne: false
+ referencedRelation: "Content"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Content_space_id_fkey"
+ columns: ["space_id"]
+ isOneToOne: false
+ referencedRelation: "DiscourseSpace"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ content_contributors: {
+ Row: {
+ content_id: number
+ contributor_id: number
+ }
+ Insert: {
+ content_id: number
+ contributor_id: number
+ }
+ Update: {
+ content_id?: number
+ contributor_id?: number
+ }
+ Relationships: [
+ {
+ foreignKeyName: "content_contributors_content_id_fkey"
+ columns: ["content_id"]
+ isOneToOne: false
+ referencedRelation: "Content"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "content_contributors_contributor_id_fkey"
+ columns: ["contributor_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ ContentEmbedding_openai_text_embedding_3_small_1536: {
+ Row: {
+ model: Database["public"]["Enums"]["EmbeddingName"]
+ obsolete: boolean | null
+ target_id: number
+ vector: string
+ }
+ Insert: {
+ model?: Database["public"]["Enums"]["EmbeddingName"]
+ obsolete?: boolean | null
+ target_id: number
+ vector: string
+ }
+ Update: {
+ model?: Database["public"]["Enums"]["EmbeddingName"]
+ obsolete?: boolean | null
+ target_id?: number
+ vector?: string
+ }
+ Relationships: [
+ {
+ foreignKeyName: "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey"
+ columns: ["target_id"]
+ isOneToOne: true
+ referencedRelation: "Content"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ DiscoursePlatform: {
+ Row: {
+ id: number
+ name: string
+ url: string
+ }
+ Insert: {
+ id?: number
+ name: string
+ url: string
+ }
+ Update: {
+ id?: number
+ name?: string
+ url?: string
+ }
+ Relationships: []
+ }
+ DiscourseSpace: {
+ Row: {
+ discourse_platform_id: number
+ id: number
+ name: string
+ url: string | null
+ }
+ Insert: {
+ discourse_platform_id: number
+ id?: number
+ name: string
+ url?: string | null
+ }
+ Update: {
+ discourse_platform_id?: number
+ id?: number
+ name?: string
+ url?: string | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "DiscourseSpace_discourse_platform_id_fkey"
+ columns: ["discourse_platform_id"]
+ isOneToOne: false
+ referencedRelation: "DiscoursePlatform"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ Document: {
+ Row: {
+ author_id: number
+ contents: unknown | null
+ created: string
+ id: number
+ last_modified: string
+ metadata: Json
+ source_local_id: string | null
+ space_id: number | null
+ url: string | null
+ }
+ Insert: {
+ author_id: number
+ contents?: unknown | null
+ created: string
+ id?: number
+ last_modified: string
+ metadata?: Json
+ source_local_id?: string | null
+ space_id?: number | null
+ url?: string | null
+ }
+ Update: {
+ author_id?: number
+ contents?: unknown | null
+ created?: string
+ id?: number
+ last_modified?: string
+ metadata?: Json
+ source_local_id?: string | null
+ space_id?: number | null
+ url?: string | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "Document_author_id_fkey"
+ columns: ["author_id"]
+ isOneToOne: false
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "Document_space_id_fkey"
+ columns: ["space_id"]
+ isOneToOne: false
+ referencedRelation: "DiscourseSpace"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ Person: {
+ Row: {
+ email: string
+ id: number
+ name: string
+ orcid: string | null
+ }
+ Insert: {
+ email: string
+ id: number
+ name: string
+ orcid?: string | null
+ }
+ Update: {
+ email?: string
+ id?: number
+ name?: string
+ orcid?: string | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "person_id_fkey"
+ columns: ["id"]
+ isOneToOne: true
+ referencedRelation: "Agent"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ SpaceAccess: {
+ Row: {
+ account_id: number
+ editor: boolean
+ id: number
+ space_id: number | null
+ }
+ Insert: {
+ account_id: number
+ editor: boolean
+ id?: number
+ space_id?: number | null
+ }
+ Update: {
+ account_id?: number
+ editor?: boolean
+ id?: number
+ space_id?: number | null
+ }
+ Relationships: [
+ {
+ foreignKeyName: "SpaceAccess_account_id_fkey"
+ columns: ["account_id"]
+ isOneToOne: false
+ referencedRelation: "Account"
+ referencedColumns: ["id"]
+ },
+ {
+ foreignKeyName: "SpaceAccess_space_id_fkey"
+ columns: ["space_id"]
+ isOneToOne: false
+ referencedRelation: "DiscourseSpace"
+ referencedColumns: ["id"]
+ },
+ ]
+ }
+ sync_info: {
+ Row: {
+ failure_count: number | null
+ id: number
+ last_task_end: string | null
+ last_task_start: string | null
+ status: Database["public"]["Enums"]["task_status"] | null
+ sync_function: string | null
+ sync_target: number | null
+ task_times_out_at: string | null
+ worker: string
+ }
+ Insert: {
+ failure_count?: number | null
+ id?: number
+ last_task_end?: string | null
+ last_task_start?: string | null
+ status?: Database["public"]["Enums"]["task_status"] | null
+ sync_function?: string | null
+ sync_target?: number | null
+ task_times_out_at?: string | null
+ worker: string
+ }
+ Update: {
+ failure_count?: number | null
+ id?: number
+ last_task_end?: string | null
+ last_task_start?: string | null
+ status?: Database["public"]["Enums"]["task_status"] | null
+ sync_function?: string | null
+ sync_target?: number | null
+ task_times_out_at?: string | null
+ worker?: string
+ }
+ Relationships: []
+ }
+ }
+ Views: {
+ [_ in never]: never
+ }
+ Functions: {
+ end_sync_task: {
+ Args: {
+ s_target: number
+ s_function: string
+ s_worker: string
+ s_status: Database["public"]["Enums"]["task_status"]
+ }
+ Returns: undefined
+ }
+ match_content_embeddings: {
+ Args: {
+ query_embedding: string
+ match_threshold: number
+ match_count: number
+ current_document_id?: number
+ }
+ Returns: {
+ content_id: number
+ roam_uid: string
+ text_content: string
+ similarity: number
+ }[]
+ }
+ match_embeddings_for_subset_nodes: {
+ Args: { p_query_embedding: string; p_subset_roam_uids: string[] }
+ Returns: {
+ content_id: number
+ roam_uid: string
+ text_content: string
+ similarity: number
+ }[]
+ }
+ propose_sync_task: {
+ Args: {
+ s_target: number
+ s_function: string
+ s_worker: string
+ timeout: unknown
+ task_interval: unknown
+ }
+ Returns: unknown
+ }
+ }
+ Enums: {
+ EmbeddingName:
+ | "openai_text_embedding_ada2_1536"
+ | "openai_text_embedding_3_small_512"
+ | "openai_text_embedding_3_small_1536"
+ | "openai_text_embedding_3_large_256"
+ | "openai_text_embedding_3_large_1024"
+ | "openai_text_embedding_3_large_3072"
+ EntityType:
+ | "Platform"
+ | "Space"
+ | "Account"
+ | "Person"
+ | "AutomatedAgent"
+ | "Document"
+ | "Content"
+ | "Concept"
+ | "ConceptSchema"
+ | "ContentLink"
+ | "Occurrence"
+ EpistemicStatus:
+ | "certainly_not"
+ | "strong_evidence_against"
+ | "could_be_false"
+ | "unknown"
+ | "uncertain"
+ | "contentious"
+ | "could_be_true"
+ | "strong_evidence_for"
+ | "certain"
+ Scale:
+ | "document"
+ | "post"
+ | "chunk_unit"
+ | "section"
+ | "block"
+ | "field"
+ | "paragraph"
+ | "quote"
+ | "sentence"
+ | "phrase"
+ task_status: "active" | "timeout" | "complete" | "failed"
+ }
+ CompositeTypes: {
+ [_ in never]: never
+ }
+ }
+}
+
+type DefaultSchema = Database[Extract]
+
+export type Tables<
+ DefaultSchemaTableNameOrOptions extends
+ | keyof (DefaultSchema["Tables"] & DefaultSchema["Views"])
+ | { schema: keyof Database },
+ TableName extends DefaultSchemaTableNameOrOptions extends {
+ schema: keyof Database
+ }
+ ? keyof (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] &
+ Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])
+ : never = never,
+> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
+ ? (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] &
+ Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])[TableName] extends {
+ Row: infer R
+ }
+ ? R
+ : never
+ : DefaultSchemaTableNameOrOptions extends keyof (DefaultSchema["Tables"] &
+ DefaultSchema["Views"])
+ ? (DefaultSchema["Tables"] &
+ DefaultSchema["Views"])[DefaultSchemaTableNameOrOptions] extends {
+ Row: infer R
+ }
+ ? R
+ : never
+ : never
+
+export type TablesInsert<
+ DefaultSchemaTableNameOrOptions extends
+ | keyof DefaultSchema["Tables"]
+ | { schema: keyof Database },
+ TableName extends DefaultSchemaTableNameOrOptions extends {
+ schema: keyof Database
+ }
+ ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"]
+ : never = never,
+> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
+ ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends {
+ Insert: infer I
+ }
+ ? I
+ : never
+ : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"]
+ ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends {
+ Insert: infer I
+ }
+ ? I
+ : never
+ : never
+
+export type TablesUpdate<
+ DefaultSchemaTableNameOrOptions extends
+ | keyof DefaultSchema["Tables"]
+ | { schema: keyof Database },
+ TableName extends DefaultSchemaTableNameOrOptions extends {
+ schema: keyof Database
+ }
+ ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"]
+ : never = never,
+> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
+ ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends {
+ Update: infer U
+ }
+ ? U
+ : never
+ : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"]
+ ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends {
+ Update: infer U
+ }
+ ? U
+ : never
+ : never
+
+export type Enums<
+ DefaultSchemaEnumNameOrOptions extends
+ | keyof DefaultSchema["Enums"]
+ | { schema: keyof Database },
+ EnumName extends DefaultSchemaEnumNameOrOptions extends {
+ schema: keyof Database
+ }
+ ? keyof Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"]
+ : never = never,
+> = DefaultSchemaEnumNameOrOptions extends { schema: keyof Database }
+ ? Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"][EnumName]
+ : DefaultSchemaEnumNameOrOptions extends keyof DefaultSchema["Enums"]
+ ? DefaultSchema["Enums"][DefaultSchemaEnumNameOrOptions]
+ : never
+
+export type CompositeTypes<
+ PublicCompositeTypeNameOrOptions extends
+ | keyof DefaultSchema["CompositeTypes"]
+ | { schema: keyof Database },
+ CompositeTypeName extends PublicCompositeTypeNameOrOptions extends {
+ schema: keyof Database
+ }
+ ? keyof Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"]
+ : never = never,
+> = PublicCompositeTypeNameOrOptions extends { schema: keyof Database }
+ ? Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"][CompositeTypeName]
+ : PublicCompositeTypeNameOrOptions extends keyof DefaultSchema["CompositeTypes"]
+ ? DefaultSchema["CompositeTypes"][PublicCompositeTypeNameOrOptions]
+ : never
+
+export const Constants = {
+ public: {
+ Enums: {
+ EmbeddingName: [
+ "openai_text_embedding_ada2_1536",
+ "openai_text_embedding_3_small_512",
+ "openai_text_embedding_3_small_1536",
+ "openai_text_embedding_3_large_256",
+ "openai_text_embedding_3_large_1024",
+ "openai_text_embedding_3_large_3072",
+ ],
+ EntityType: [
+ "Platform",
+ "Space",
+ "Account",
+ "Person",
+ "AutomatedAgent",
+ "Document",
+ "Content",
+ "Concept",
+ "ConceptSchema",
+ "ContentLink",
+ "Occurrence",
+ ],
+ EpistemicStatus: [
+ "certainly_not",
+ "strong_evidence_against",
+ "could_be_false",
+ "unknown",
+ "uncertain",
+ "contentious",
+ "could_be_true",
+ "strong_evidence_for",
+ "certain",
+ ],
+ Scale: [
+ "document",
+ "post",
+ "chunk_unit",
+ "section",
+ "block",
+ "field",
+ "paragraph",
+ "quote",
+ "sentence",
+ "phrase",
+ ],
+ task_status: ["active", "timeout", "complete", "failed"],
+ },
+ },
+} as const
+
diff --git a/turbo.json b/turbo.json
index 45f57fbdd..c52dabc35 100644
--- a/turbo.json
+++ b/turbo.json
@@ -32,7 +32,10 @@
"passThroughEnv": [
"BLOB_READ_WRITE_TOKEN",
"GITHUB_REF_NAME",
- "GITHUB_HEAD_REF"
+ "GITHUB_HEAD_REF",
+ "SUPABASE_PROJECT_ID",
+ "SUPABASE_DB_PASSWORD",
+ "SUPABASE_ACCESS_TOKEN"
]
},
"publish": {