diff --git a/.github/workflows/database-deploy.yaml b/.github/workflows/database-deploy.yaml new file mode 100644 index 000000000..e9f7faa2b --- /dev/null +++ b/.github/workflows/database-deploy.yaml @@ -0,0 +1,23 @@ +name: Supabase deploy Function +on: + workflow_dispatch: + push: + branches: + - main +jobs: + deploy: + runs-on: ubuntu-latest + env: + SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }} + SUPABASE_PROJECT_ID: ${{ secrets.SUPABASE_PROJECT_ID_PROD }} + SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD_PROD }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v3 + with: + node-version: "20" + - run: npm ci + - uses: supabase/setup-cli@v1 + with: + version: latest + - run: npx turbo deploy -F @repo/database diff --git a/packages/database/.sqruff b/packages/database/.sqruff new file mode 100644 index 000000000..3effbf331 --- /dev/null +++ b/packages/database/.sqruff @@ -0,0 +1,8 @@ +[sqruff] +dialect = postgres +exclude_rules = CP05,LT05 + +[sqruff:indentation] +indent_unit = space +tab_space_size = 4 +indented_joins = True diff --git a/packages/database/README.md b/packages/database/README.md new file mode 100644 index 000000000..fad141a43 --- /dev/null +++ b/packages/database/README.md @@ -0,0 +1,25 @@ +This contains the database schema for vector embeddings and concepts. +All CLI commands below should be run in this directory (`packages/database`.) + +1. Setup + 1. Install [Docker](https://www.docker.com) + 2. Install the [supabase CLI](https://supabase.com/docs/guides/local-development). (There is a brew version) + 3. `supabase login` with your (account-specific) supabase access token. (TODO: Create a group access token.) + 4. `supabase link`. It will ask you for a project name, use `discourse-graphs`. (Production for now.) It will also ask you for the database password (See 1password.) + 5. Install [sqruff](https://github.com/quarylabs/sqruff) +2. Usage: + 1. Use `turbo dev`, (alias for `supabase start`) before you use your local database. URLs will be given for your local supabase database, api endpoint, etc. + 2. You may need to `supabase db pull` if changes are deployed while you work. + 3. End you work session with `supabase end` to free docker resources. +3. Development: We follow the supabase [Declarative Database Schema](https://supabase.com/docs/guides/local-development/declarative-database-schemas) process. + 1. Assuming you're working on a feature branch. + 2. Make changes to the schema, by editing files in `packages/database/supabase/schemas` + 3. If you created a new schema file, make sure to add it to `[db.migrations] schema_paths` in `packages/database/supabase/config.toml`. Schema files are applied in that order, you may need to be strategic in placing your file. + 4. `turbo build`, which will do the following: + 1. Check your logic with `sqruff lint supabase/schemas`, and eventually `sqruff fix supabase/schemas` + 2. Regenerate the types file with `supabase gen types typescript --local > types.gen.ts` + 3. See if there would be a migration to apply with `supabase db diff` + 5. If applying the new schema fails, repeat step 4 + 6. If you are satisfied with the migration, create a migration file with `npm run dbdiff:save some_meaningful_migration_name` + 1. If all goes well, there should be a new file named `supbase/migration/2..._some_meaningful_migration_name.sql` which you should `git add`. + 10. You can start using your changes again `turbo dev` diff --git a/packages/database/example.md b/packages/database/example.md new file mode 100644 index 000000000..f7a4166e4 --- /dev/null +++ b/packages/database/example.md @@ -0,0 +1,76 @@ +# example... + +Content: + +* (nt1pgid) discourse-graphs/nodes/Claim +* (nt2pgid) discourse-graphs/nodes/Hypothesis +* (dgpgid) roam/js/discourse-graph + * (et1bkid) Opposes + * (et1r1bkid) source + * (et1r2bkid) destination + * (anyid1) If + * (et1sr1bkid) Page + * (et1sr2bkid) Block + * (et1sr3bkid) ParentPage + * (et1sr4bkid) PBlock + * (et1sr5bkid) SPage + * (et1sr6bkid) SBlock +* (hyp1pgid) [HYP] Some hypothesis +* (clm1pgid) [CLM] Some claim +* (somepgid) Some page + * (hyp1refbkid) a block referring to [[HYP] Some hypothesis] + * (opp1bkid) OpposedBy + * (clm1refbkid) a block referring to [[CLM] Some Claim] + +Documents: + +| id | source_local_id | +|----|-----------------| +| 1 | nt1pgid | +| 2 | nt2pgid | +| 3 | dgpgid | +| 22 | hyp1pgid | +| 23 | clm1pgid | +| 4 | somepgid | + +Content: + +| id | source_local_id | page_id | scale | represents_id | text | +|----|-------------|-------------|----------|---------------|----------------------------------------------| +| 5 | nt1pgid | 1 | document | 16 | discourse-graphs/nodes/Claim | +| 6 | nt2pgid | 2 | document | 17 | discourse-graphs/nodes/Hypothesis | +| 7 | et1bkid | 3 | document | 18 | discourse-graphs/edges/OpposedBy | +| 8 | somepgid | 4 | document | | Some page | +| 24 | hyp1pgid | 22 | document | 20 | [HYP] Some hypothesis | +| 25 | clm1pgid | 23 | document | 19 | [HYP] Some claim | +| 9 | hyp1refbkid | 4 | block | | a block referring to [[HYP] Some hypothesis] | +| 10 | opp1bkid | 4 | block | 21 | OpposedBy | +| 11 | clm1refbkid | 4 | block | | a block referring to [[CLM] Some claim] | +| 13 | et1r1bkid | 3 | block | | source | +| 14 | et1r2bkid | 3 | block | | destination | + +Concept: + +| id | is_schema | arity | schema | name | content | +|----|-----------|-------|--------|-----------------------|-----------| +| 16 | true | 0 | | Claim | {} | +| 17 | true | 0 | | Hypothesis | {} | +| 18 | true | 2 | | Opposed-by | { "roles": ["source", "destination"], "representation": ["source", "sourceref", "destination", "destinationref", "predicate"] } | +| 19 | false | 0 | 16 | [CLM] Some claim | {} | +| 20 | false | 0 | 17 | [HYP] Some hypothesis | {} | +| 21 | false | 2 | 18 | OpposedBy | { "concepts": {"source": 19, "destination": 20}, "occurences": [{"sourceref": 11, "destinationref": 9, "source": 25, "destination": 24, "predicate": 10 }] } | + +Note: Open question whether the occurence structure matters, and whether it should be materialized in another table. +(I would tend to say yes to both.) + +ContentLink + +| source | destination | +|--------|-------------| +| 9 | 24 | +| 11 | 25 | + +Note: I would probably create a sub-Content for the link text and use this as source. +OR use a char_start, char_end. + +Missing: Ontology diff --git a/packages/database/package.json b/packages/database/package.json new file mode 100644 index 000000000..1c857ebf6 --- /dev/null +++ b/packages/database/package.json @@ -0,0 +1,29 @@ +{ + "name": "@repo/database", + "version": "0.0.0", + "private": true, + "license": "Apache-2.0", + "type": "module", + "exports": { + "./types.gen.ts": "./types.gen.ts" + }, + "scripts": { + "init": "supabase login", + "dev": "supabase start", + "stop": "supabase stop", + "build": "npm run lint && npm run gentypes:local && cp ./types.gen.ts ../../apps/website/app/utils/supabase && npm run dbdiff", + "lint": "tsx scripts/lint.ts", + "lint:fix": "tsx scripts/lint.ts -f", + "gentypes:local": "supabase start && supabase gen types typescript --local --schema public > types.gen.ts", + "gentypes:production": "supabase start && supabase gen types typescript --project-id \"$SUPABASE_PROJECT_ID\" --schema public > types.gen.ts", + "dbdiff": "supabase stop && supabase db diff", + "dbdiff:save": "supabase stop && supabase db diff -f", + "deploy": "tsx scripts/deploy.ts", + "deploy:functions": "tsx scripts/lint.ts -f" + }, + "devDependencies": { + "supabase": "^2.22.12", + "tsx": "^4.19.2" + }, + "dependencies": {} +} diff --git a/packages/database/schema.puml b/packages/database/schema.puml new file mode 100644 index 000000000..914fc6e49 --- /dev/null +++ b/packages/database/schema.puml @@ -0,0 +1,110 @@ +@startuml +skinparam nodesep 10 +hide circle +hide empty members +class "SpaceAccess" [[{An access control entry for a space}]] { + {field} editor : boolean +} +class "Account" [[{A user account on a platform}]] { + {field} id : integer + {field} write_permission : boolean + {field} active : boolean +} +class "Space" [[{A space on a platform representing a community engaged in a conversation}]] { + {field} id : integer + {field} url : string + {field} name : string +} +"SpaceAccess" --> "1" "Account" : "account" +"SpaceAccess" --> "0..1" "Space" : "space" +class "Platform" [[{A data platform where discourse happens}]] { + {field} id : integer + {field} name : string + {field} url : string +} +class "Content" [[{A unit of content}]] { + {field} id : integer + {field} source_local_id : string + {field} created : datetime + {field} text : string + {field} metadata : JSON + {field} scale : Scale + {field} last_modified : datetime +} +class "Document" [[{None}]] { + {field} id : integer + {field} source_local_id : string + {field} url : string + {field} created : datetime + {field} metadata : JSON + {field} last_modified : datetime + {field} contents : blob +} +class "Concept" [[{An abstract concept, claim or relation}]] { + {field} id : integer + {field} epistemic_status : EpistemicStatus + {field} name : string + {field} description : string + {field} created : datetime + {field} last_modified : datetime + {field} arity : integer + {field} content : JSON + {field} is_schema : boolean +} +"Space" --> "1" "Platform" : "platform" +"Content" --> "0..1" "Space" : "space" +"Document" --> "0..1" "Space" : "space" +"Concept" --> "0..1" "Space" : "space" +"Account" --> "1" "Platform" : "platform" +abstract "Agent" [[{An agent that acts in the system}]] { + {field} id : integer + {field} type : EntityType +} +"Document" --> "0..*" "Agent" : "contributors" +"Document" --> "1" "Agent" : "author" +"Content" --> "1" "Document" : "document" +class "ContentEmbedding" [[{None}]] { + {field} model : EmbeddingName + {field} vector : vector + {field} obsolete : boolean +} +"ContentEmbedding" --> "1" "Content" : "target" +"Content" --> "0..1" "Content" : "part_of" +"Content" --> "0..*" "Agent" : "contributors" +"Content" --> "1" "Agent" : "creator" +"Content" --> "1" "Agent" : "author" +"Concept" --> "0..1" "Content" : "represented_by" +class "ConceptSchema" [[{None}]] { + {field} id(i) : integer + {field} epistemic_status(i) : EpistemicStatus + {field} name(i) : string + {field} description(i) : string + {field} created(i) : datetime + {field} last_modified(i) : datetime + {field} arity(i) : integer + {field} content(i) : JSON + {field} is_schema(i) : boolean +} +"Concept" --> "1" "ConceptSchema" : "schema" +"Concept" --> "0..*" "Agent" : "contributors" +"Concept" --> "1" "Agent" : "author" +"Concept" ^-- "ConceptSchema" +class "Person" [[{A person using the system}]] { + {field} name : string + {field} orcid : string + {field} email : string + {field} id(i) : integer + {field} type(i) : EntityType +} +class "AutomatedAgent" [[{An automated agent}]] { + {field} metadata : JSON + {field} name : string + {field} deterministic : boolean + {field} version : string + {field} id(i) : integer + {field} type(i) : EntityType +} +"Account" --> "1" "Agent" : "person" +"Agent" ^-- "Person" +"Agent" ^-- "AutomatedAgent" +@enduml diff --git a/packages/database/schema.svg b/packages/database/schema.svg new file mode 100644 index 000000000..ab1d233e5 --- /dev/null +++ b/packages/database/schema.svg @@ -0,0 +1 @@ +SpaceAccesseditor : booleanAccountid : integerwrite_permission : booleanactive : booleanSpaceid : integerurl : stringname : stringPlatformid : integername : stringurl : stringContentid : integersource_local_id : stringcreated : datetimetext : stringmetadata : JSONscale : Scalelast_modified : datetimeDocumentid : integersource_local_id : stringurl : stringcreated : datetimemetadata : JSONlast_modified : datetimecontents : blobConceptid : integerepistemic_status : EpistemicStatusname : stringdescription : stringcreated : datetimelast_modified : datetimearity : integercontent : JSONis_schema : booleanAgentid : integertype : EntityTypeContentEmbeddingmodel : EmbeddingNamevector : vectorobsolete : booleanConceptSchemaid(i) : integerepistemic_status(i) : EpistemicStatusname(i) : stringdescription(i) : stringcreated(i) : datetimelast_modified(i) : datetimearity(i) : integercontent(i) : JSONis_schema(i) : booleanPersonname : stringorcid : stringemail : stringid(i) : integertype(i) : EntityTypeAutomatedAgentmetadata : JSONname : stringdeterministic : booleanversion : stringid(i) : integertype(i) : EntityTypeaccount1space0..1platform1space0..1space0..1space0..1platform1contributors0..*author1document1target1part_of0..1contributors0..*creator1author1represented_by0..1schema1contributors0..*author1person1 \ No newline at end of file diff --git a/packages/database/schema.yaml b/packages/database/schema.yaml new file mode 100644 index 000000000..363f9ff8b --- /dev/null +++ b/packages/database/schema.yaml @@ -0,0 +1,417 @@ +id: https://discoursegraphs.com/schemas/v0# +name: discoursegraphs +prefixes: + linkml: https://w3id.org/linkml/ + dg: https://discoursegraphs.com/schemas/v0# +default_prefix: dg +default_range: string +imports: + - linkml:types +enums: + Scale: + description: scale value of a Content + permissible_values: + document: + post: + chunk_unit: + section: + block: + description: A block of content in an outline system, such as a Roam node + field: + paragraph: + quote: + sentence: + phrase: + Validation: + description: Whether a given value was given by a person, or suggested by an automated agent (and then possibly infirmed.) + permissible_values: + infirmed: + description: Infirmed by a trusted agent + suggested: + description: Suggested by a fallible agent (probabilistic, heuristic, person in training...) + confirmed: + description: Confirmed by a trusted agent + intrinsic: + description: No validation needed, this is intrinsic to the data + EpistemicStatus: + description: The epistemic status of a claim + permissible_values: + certainly_not: + strong_evidence_against: + could_be_false: + unknown: + uncertain: + contentious: + could_be_true: + strong_evidence_for: + certain: + EntityType: + description: The type of an entity + permissible_values: + Platform: + Space: + Account: + Person: + AutomatedAgent: + Document: + Content: + Concept: + ConceptSchema: + ContentLink: + Occurrence: + EmbeddingName: + description: an embedding name + permissible_values: + openai_text_embedding_ada2_1536: + openai_text_embedding_3_small_512: + openai_text_embedding_3_small_1536: + openai_text_embedding_3_large_256: + openai_text_embedding_3_large_1024: + openai_text_embedding_3_large_3072: + DerivedTextVariant: + description: Is the text taken as-is, or is it a computed variant? + permissible_values: + as_is: + neighbourhood_parent_and_children: + neighbourhood_parent_and_level_2_descendants: + neighbourhood_children: + neighbourhood_level_2_descendants: + summary: +types: + JSON: + uri: xsd:string + # base: dict + base: str + description: JSON data + # annotations: + # sql_type: sqlalchemy.dialects.postgresql.JSONB + JSONSchema: + uri: xsd:string + base: dict + description: A Json schema + # annotations: + # sql_type: sqlalchemy.dialects.postgresql.JSONB + vector: + uri: xsd:float + base: float + description: A vector of floats + annotations: + sql_type: pgvector.sqlalchemy.Vector + blob: + uri: xsd:base64Binary + base: bytes + annotations: + sql_type: sqlalchemy.dialects.postgresql.BLOB + description: A binary large object +classes: + Agent: + description: An agent that acts in the system + abstract: true + slots: + - id + - type + Person: + description: A person using the system + is_a: Agent + slots: + - name + - orcid + attributes: + email: + required: true + # TODO: known skills, i.e. what processes can they confirm. + AutomatedAgent: + description: An automated agent + is_a: Agent + slots: + - metadata + - name + attributes: + deterministic: + range: boolean + ifabsent: false + version: + range: string + + Platform: + description: A data platform where discourse happens + slots: + - id + - name + attributes: + url: + required: true + Account: + description: A user account on a platform + slots: + - id + - platform + attributes: + person: + range: Agent + required: true + write_permission: + range: boolean + required: true + active: + range: boolean + required: true + ifabsent: true + Space: + description: A space on a platform representing a community engaged in a conversation + slots: + - id + - url + - name + attributes: + platform: + range: Platform + required: true + SpaceAccess: + description: An access control entry for a space + slots: + - space + attributes: + account: + range: Account + required: true + editor: + range: boolean + required: true + unique_keys: + main: + description: Primary key for space access + unique_key_slots: + - account + - space + Content: + description: A unit of content + slots: + - id + - document + - source_local_id + - author + - creator + - created + - text + - metadata + - scale + # - position + # - char_position + - space + - contributors + - last_modified + attributes: + part_of: + description: This content is part of a larger content unit + range: Content + # ContentDerivation: + # description: A derivation relation between content units + # attributes: + # derived_content: + # description: The derived content unit + # range: Content + # required: true + # identifier: true + # derived_from: + # description: The content unit that this variant was derived from + # range: Content + # required: true + # derived_variant: + # description: This content is a variant derived from another content unit + # range: DerivedTextVariant + # required: true + Document: + slots: + - id + - space + - source_local_id + - url + - created + - metadata + - last_modified + - author + - contributors + attributes: + contents: + range: blob + # Article: + # description: an article + # is_a: Document + # slots: + # - issn + # - abstract + ContentEmbedding: + # abstract: true + attributes: + target: + range: Content + required: true + identifier: true + model: + range: EmbeddingName + required: true + vector: + range: vector + array: + minimum_number_dimensions: 1 + maximum_number_dimensions: 1 + required: true + obsolete: + description: Whether this embedding is obsolete (becauses the Content was modified) + range: boolean + ifabsent: false + Concept: + description: An abstract concept, claim or relation + slots: + - id + - epistemic_status + - name + - description + - author + - contributors + - created + - last_modified + - space + attributes: + arity: + range: integer + required: true + ifabsent: 0 + description: The number of roles in this relation; nodes have zero, binary relations have 2, etc. + schema: + range: ConceptSchema + required: true + content: + range: JSON + required: true + is_schema: + range: boolean + required: true + ifabsent: false + represented_by: + description: This concept is explicitly represented by a given content unit + range: Content + # update status + # concept has occurences and possibly a representation in a space. + # Are concepts space-specific? Tending to yes. So the point of convergence should be distinct. + # Can a concept have multiple representations? One case is a reprentation + # of an equivalent concept in another space. + # do non-claim concepts have epistemic status? + # The other big deal is who has authority on concept definition. + # Finally... concept schema. Yeah. Is it per-space? Likely. + # Damn, concept schema is a concept, is it not? + # Now, if a concept has a complex structwre based on a complex content... + # AH, it should be based on occurences. + + ConceptSchema: + is_a: Concept + + # Reference: + # abstract: true + # description: A link from a content fragment to something else + # attributes: + # source: + # range: Content + # required: true + # slots: + # - creator + # - created + # - validation + # - type + # # This is an aggregate of validation events + # # Q: What is the relationship between occurences and links? Links to Concepts in particular? + # # What if the concept has been materialized as content? + # ContentLink: + # description: An explicit link from a content fragment to another content. + # is_a: Reference + # attributes: + # target: + # range: Content + # required: true + # Occurrence: + # description: A link from a content fragment to a Concept. May be an interpretation. + # is_a: Reference + # attributes: + # target: + # range: Concept + # required: true +slots: + id: + range: integer + identifier: true + required: true + type: + range: EntityType + required: true + designates_type: true + name: + required: true + author: + range: Agent + description: The author of content + required: true + creator: + range: Agent + description: The creator of a logical structure, such as a content subdivision + required: true + contributors: + multivalued: true + range: Agent + text: + required: true + description: + created: + range: datetime + required: true + description: The time when the content was created in the remote source + last_modified: + range: datetime + required: true + description: The last time the content was modified in the remote source + last_synced: + range: datetime + required: true + description: The last time the content was synced with the remote source + metadata: + range: JSON + orcid: + range: string + url: + range: string + platform: + range: Platform + required: true + issn: + abstract: + scale: + range: Scale + required: true + position: + description: The ordinal position of the content within its parent, wrt other content units of the same scale + range: integer + ifabsent: 0 + required: true + char_position: + description: The character position of the content within its parent. + # Does not apply to outline sub-elements + range: integer + ifabsent: 0 + validation: + range: Validation + required: true + ifabsent: Validation(suggested) + epistemic_status: + range: EpistemicStatus + required: true + ifabsent: EpistemicStatus(unknown) + space: + range: Space + description: The space in which the content is located + document: + range: Document + required: true + source_local_id: + range: string + description: The unique identifier of the content in the remote source + # In the case of a document, could it be the URL? diff --git a/packages/database/scripts/deploy.ts b/packages/database/scripts/deploy.ts new file mode 100644 index 000000000..a4af380be --- /dev/null +++ b/packages/database/scripts/deploy.ts @@ -0,0 +1,71 @@ +import { exec } from "node:child_process"; +import dotenv from "dotenv"; + +dotenv.config(); + +const main = () => { + try { + exec("git status -s -b -uno", (err, stdout, stderr) => { + if (err) { + console.error("Is git installed?"); + process.exit(1); + } + const lines = stdout.split("\n"); + if (lines[0] != "## main...main") { + console.log("Not on main branch, not deploying database"); + process.exit(0); + } + if (lines.length > 1) { + console.log( + "You seem to have uncommitted changes, not deploying database", + ); + process.exit(0); + } + const { SUPABASE_PROJECT_ID, SUPABASE_DB_PASSWORD } = process.env; + if (!SUPABASE_PROJECT_ID) { + console.log("Please define SUPABASE_PROJECT_ID"); + process.exit(1); + } + if (!SUPABASE_DB_PASSWORD) { + console.log("Please define SUPABASE_DB_PASSWORD"); + process.exit(1); + } + // Use environment variables that are already set instead of passing as arguments + exec( + `supabase link --project-ref ${SUPABASE_PROJECT_ID}`, + { env: { ...process.env, SUPABASE_DB_PASSWORD } }, + (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + exec("supabase db push", (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + if (process.argv.length == 3 && process.argv[2] == "-f") { + // Also push functions + exec( + `supabase functions deploy --project-ref ${SUPABASE_PROJECT_ID}`, + (err, stdout, stderr) => { + console.log(`${stdout}`); + console.error(`${stderr}`); + if (err) { + process.exit(err.code); + } + }, + ); + } + }); + }, + ); + }); + } catch (error) { + console.error("error:", error); + process.exit(1); + } +}; +if (import.meta.url === `file://${process.argv[1]}`) main(); diff --git a/packages/database/scripts/lint.ts b/packages/database/scripts/lint.ts new file mode 100644 index 000000000..0d8005806 --- /dev/null +++ b/packages/database/scripts/lint.ts @@ -0,0 +1,24 @@ +import { exec } from "node:child_process"; + +const main = () => { + try { + exec("which sqruff", (err, stdout, stderr) => { + if (err) { + console.error("Could not find sqruff, you may want to install it."); + // Fail gracefully + process.exit(0); + } + const command = + process.argv.length == 3 && process.argv[2] == "-f" ? "fix" : "lint"; + exec(`sqruff ${command} supabase/schemas`, {}, (err, stdout, stderr) => { + console.log(`${stdout}`); + console.log(`${stderr}`); + process.exit(err ? err.code : 0); + }); + }); + } catch (error) { + console.error("error:", error); + process.exit(1); + } +}; +if (import.meta.url === `file://${process.argv[1]}`) main(); diff --git a/packages/database/supabase/.gitignore b/packages/database/supabase/.gitignore new file mode 100644 index 000000000..ad9264f0b --- /dev/null +++ b/packages/database/supabase/.gitignore @@ -0,0 +1,8 @@ +# Supabase +.branches +.temp + +# dotenvx +.env.keys +.env.local +.env.*.local diff --git a/packages/database/supabase/config.toml b/packages/database/supabase/config.toml new file mode 100644 index 000000000..ca2dc47aa --- /dev/null +++ b/packages/database/supabase/config.toml @@ -0,0 +1,323 @@ +# For detailed configuration reference documentation, visit: +# https://supabase.com/docs/guides/local-development/cli/config +# A string used to distinguish different Supabase projects on the same host. Defaults to the +# working directory name when running `supabase init`. +project_id = "discourse-graphs" + +[api] +enabled = true +# Port to use for the API URL. +port = 54321 +# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API +# endpoints. `public` and `graphql_public` schemas are included by default. +schemas = ["public", "graphql_public"] +# Extra schemas to add to the search_path of every request. +extra_search_path = ["public", "extensions"] +# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size +# for accidental or malicious requests. +max_rows = 1000 + +[api.tls] +# Enable HTTPS endpoints locally using a self-signed certificate. +enabled = false + +[db] +# Port to use for the local database URL. +port = 54322 +# Port used by db diff command to initialize the shadow database. +shadow_port = 54320 +# The database major version to use. This has to be the same as your remote database's. Run `SHOW +# server_version;` on the remote database to check. +major_version = 15 + +[db.pooler] +enabled = false +# Port to use for the local connection pooler. +port = 54329 +# Specifies when a server connection can be reused by other clients. +# Configure one of the supported pooler modes: `transaction`, `session`. +pool_mode = "transaction" +# How many server connections to allow per user/database pair. +default_pool_size = 20 +# Maximum number of client connections allowed. +max_client_conn = 100 + +# [db.vault] +# secret_key = "env(SECRET_VALUE)" + +[db.migrations] +# Specifies an ordered list of schema files that describe your database. +# Supports glob patterns relative to supabase directory: "./schemas/*.sql" +schema_paths = [ + './schemas/base.sql', + './schemas/extensions.sql', + './schemas/agent.sql', + './schemas/space.sql', + './schemas/account.sql', + './schemas/content.sql', + './schemas/embedding.sql', + './schemas/concept.sql', + './schemas/contributor.sql', + './schemas/sync.sql', +] + +[db.seed] +# If enabled, seeds the database after migrations during a db reset. +enabled = true +# Specifies an ordered list of seed files to load during db reset. +# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +sql_paths = ["./seed.sql"] + +[realtime] +enabled = true +# Bind realtime via either IPv4 or IPv6. (default: IPv4) +# ip_version = "IPv6" +# The maximum length in bytes of HTTP request headers. (default: 4096) +# max_header_length = 4096 + +[studio] +enabled = true +# Port to use for Supabase Studio. +port = 54323 +# External URL of the API server that frontend connects to. +api_url = "http://127.0.0.1" +# OpenAI API Key to use for Supabase AI in the Supabase Studio. +openai_api_key = "env(OPENAI_API_KEY)" + +# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they +# are monitored, and you can view the emails that would have been sent from the web interface. +[inbucket] +enabled = true +# Port to use for the email testing server web interface. +port = 54324 +# Uncomment to expose additional ports for testing user applications that send emails. +# smtp_port = 54325 +# pop3_port = 54326 +# admin_email = "admin@email.com" +# sender_name = "Admin" + +[storage] +enabled = true +# The maximum file size allowed (e.g. "5MB", "500KB"). +file_size_limit = "50MiB" + +# Image transformation API is available to Supabase Pro plan. +# [storage.image_transformation] +# enabled = true + +# Uncomment to configure local storage buckets +# [storage.buckets.images] +# public = false +# file_size_limit = "50MiB" +# allowed_mime_types = ["image/png", "image/jpeg"] +# objects_path = "./images" + +[auth] +enabled = true +# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used +# in emails. +# site_url = "http://127.0.0.1:3000" +site_url = "https://discourse-graph-discourse-graphs.vercel.app/" +# A list of *exact* URLs that auth providers are permitted to redirect to post authentication. +additional_redirect_urls = [ + "https://discourse-graph-discourse-graphs.vercel.app/**", + "https://discourse-graph-*-discourse-graphs.vercel.app/**", +] +# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week). +jwt_expiry = 3600 +# If disabled, the refresh token will never expire. +enable_refresh_token_rotation = true +# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds. +# Requires enable_refresh_token_rotation = true. +refresh_token_reuse_interval = 10 +# Allow/disallow new user signups to your project. +enable_signup = true +# Allow/disallow anonymous sign-ins to your project. +enable_anonymous_sign_ins = false +# Allow/disallow testing manual linking of accounts +enable_manual_linking = false +# Passwords shorter than this value will be rejected as weak. Minimum 6, recommended 8 or more. +minimum_password_length = 6 +# Passwords that do not meet the following requirements will be rejected as weak. Supported values +# are: `letters_digits`, `lower_upper_letters_digits`, `lower_upper_letters_digits_symbols` +password_requirements = "" + +[auth.rate_limit] +# Number of emails that can be sent per hour. Requires auth.email.smtp to be enabled. +email_sent = 2 +# Number of SMS messages that can be sent per hour. Requires auth.sms to be enabled. +sms_sent = 30 +# Number of anonymous sign-ins that can be made per hour per IP address. Requires enable_anonymous_sign_ins = true. +anonymous_users = 30 +# Number of sessions that can be refreshed in a 5 minute interval per IP address. +token_refresh = 150 +# Number of sign up and sign-in requests that can be made in a 5 minute interval per IP address (excludes anonymous users). +sign_in_sign_ups = 30 +# Number of OTP / Magic link verifications that can be made in a 5 minute interval per IP address. +token_verifications = 30 + +# Configure one of the supported captcha providers: `hcaptcha`, `turnstile`. +# [auth.captcha] +# enabled = true +# provider = "hcaptcha" +# secret = "" + +[auth.email] +# Allow/disallow new user signups via email to your project. +enable_signup = true +# If enabled, a user will be required to confirm any email change on both the old, and new email +# addresses. If disabled, only the new email is required to confirm. +double_confirm_changes = true +# If enabled, users need to confirm their email address before signing in. +enable_confirmations = true +# If enabled, users will need to reauthenticate or have logged in recently to change their password. +secure_password_change = false +# Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email. +max_frequency = "1m0s" +# Number of characters used in the email OTP. +otp_length = 6 +# Number of seconds before the email OTP expires (defaults to 1 hour). +otp_expiry = 86400 + +# Use a production-ready SMTP server +# [auth.email.smtp] +# enabled = true +# host = "smtp.sendgrid.net" +# port = 587 +# user = "apikey" +# pass = "env(SENDGRID_API_KEY)" +# admin_email = "admin@email.com" +# sender_name = "Admin" + +# Uncomment to customize email template +# [auth.email.template.invite] +# subject = "You have been invited" +# content_path = "./supabase/templates/invite.html" + +[auth.sms] +# Allow/disallow new user signups via SMS to your project. +enable_signup = false +# If enabled, users need to confirm their phone number before signing in. +enable_confirmations = false +# Template for sending OTP to users +template = "Your code is {{ .Code }}" +# Controls the minimum amount of time that must pass before sending another sms otp. +max_frequency = "5s" + +# Use pre-defined map of phone number to OTP for testing. +# [auth.sms.test_otp] +# 4152127777 = "123456" + +# Configure logged in session timeouts. +# [auth.sessions] +# Force log out after the specified duration. +# timebox = "24h" +# Force log out if the user has been inactive longer than the specified duration. +# inactivity_timeout = "8h" + +# This hook runs before a token is issued and allows you to add additional claims based on the authentication method used. +# [auth.hook.custom_access_token] +# enabled = true +# uri = "pg-functions:////" + +# Configure one of the supported SMS providers: `twilio`, `twilio_verify`, `messagebird`, `textlocal`, `vonage`. +[auth.sms.twilio] +enabled = false +account_sid = "" +message_service_sid = "" +# DO NOT commit your Twilio auth token to git. Use environment variable substitution instead: +auth_token = "env(SUPABASE_AUTH_SMS_TWILIO_AUTH_TOKEN)" + +# Multi-factor-authentication is available to Supabase Pro plan. +[auth.mfa] +# Control how many MFA factors can be enrolled at once per user. +max_enrolled_factors = 10 + +# Control MFA via App Authenticator (TOTP) +[auth.mfa.totp] +enroll_enabled = true +verify_enabled = true + +# Configure MFA via Phone Messaging +[auth.mfa.phone] +enroll_enabled = false +verify_enabled = false +otp_length = 6 +template = "Your code is {{ .Code }}" +max_frequency = "5s" + +# Configure MFA via WebAuthn +# [auth.mfa.web_authn] +# enroll_enabled = true +# verify_enabled = true + +# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`, +# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin_oidc`, `notion`, `twitch`, +# `twitter`, `slack`, `spotify`, `workos`, `zoom`. +[auth.external.apple] +enabled = false +client_id = "" +# DO NOT commit your OAuth provider secret to git. Use environment variable substitution instead: +secret = "env(SUPABASE_AUTH_EXTERNAL_APPLE_SECRET)" +# Overrides the default auth redirectUrl. +redirect_uri = "" +# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure, +# or any other third-party OIDC providers. +url = "" +# If enabled, the nonce check will be skipped. Required for local sign in with Google auth. +skip_nonce_check = false + +# Use Firebase Auth as a third-party provider alongside Supabase Auth. +[auth.third_party.firebase] +enabled = false +# project_id = "my-firebase-project" + +# Use Auth0 as a third-party provider alongside Supabase Auth. +[auth.third_party.auth0] +enabled = false +# tenant = "my-auth0-tenant" +# tenant_region = "us" + +# Use AWS Cognito (Amplify) as a third-party provider alongside Supabase Auth. +[auth.third_party.aws_cognito] +enabled = false +# user_pool_id = "my-user-pool-id" +# user_pool_region = "us-east-1" + +# Use Clerk as a third-party provider alongside Supabase Auth. +[auth.third_party.clerk] +enabled = false +# Obtain from https://clerk.com/setup/supabase +# domain = "example.clerk.accounts.dev" + +[edge_runtime] +enabled = true +# Configure one of the supported request policies: `oneshot`, `per_worker`. +# Use `oneshot` for hot reload, or `per_worker` for load testing. +policy = "oneshot" +# Port to attach the Chrome inspector for debugging edge functions. +inspector_port = 8083 +# The Deno major version to use. +deno_version = 1 + +# [edge_runtime.secrets] +# secret_key = "env(SECRET_VALUE)" + +[analytics] +enabled = true +port = 54327 +# Configure one of the supported backends: `postgres`, `bigquery`. +backend = "postgres" + +# Experimental features may be deprecated any time +[experimental] +# Configures Postgres storage engine to use OrioleDB (S3) +orioledb_version = "" +# Configures S3 bucket URL, eg. .s3-.amazonaws.com +s3_host = "env(S3_HOST)" +# Configures S3 bucket region, eg. us-east-1 +s3_region = "env(S3_REGION)" +# Configures AWS_ACCESS_KEY_ID for S3 bucket +s3_access_key = "env(S3_ACCESS_KEY)" +# Configures AWS_SECRET_ACCESS_KEY for S3 bucket +s3_secret_key = "env(S3_SECRET_KEY)" diff --git a/packages/database/supabase/migrations/20250504195841_remote_schema.sql b/packages/database/supabase/migrations/20250504195841_remote_schema.sql new file mode 100644 index 000000000..89b3d4e4c --- /dev/null +++ b/packages/database/supabase/migrations/20250504195841_remote_schema.sql @@ -0,0 +1,9 @@ +create extension if not exists "pg_jsonschema" with schema "extensions"; + +create extension if not exists "pg_stat_monitor" with schema "extensions"; + +create extension if not exists "pgroonga" with schema "extensions"; + +create extension if not exists "vector" with schema "extensions"; + + diff --git a/packages/database/supabase/migrations/20250504202930_content_tables.sql b/packages/database/supabase/migrations/20250504202930_content_tables.sql new file mode 100644 index 000000000..52f3cda7c --- /dev/null +++ b/packages/database/supabase/migrations/20250504202930_content_tables.sql @@ -0,0 +1,211 @@ + +CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq + AS BIGINT + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +CREATE TYPE "EntityType" AS ENUM ('Platform', 'Space', 'Account', 'Person', 'AutomatedAgent', 'Document', 'Content', 'Concept', 'ConceptSchema', 'ContentLink', 'Occurrence'); + +CREATE TYPE "Scale" AS ENUM ('document', 'post', 'chunk_unit', 'section', 'block', 'field', 'paragraph', 'quote', 'sentence', 'phrase'); + +CREATE TYPE "EmbeddingName" AS ENUM ('openai_text_embedding_ada2_1536', 'openai_text_embedding_3_small_512', 'openai_text_embedding_3_small_1536', 'openai_text_embedding_3_large_256', 'openai_text_embedding_3_large_1024', 'openai_text_embedding_3_large_3072'); + +CREATE TYPE "EpistemicStatus" AS ENUM ('certainly_not', 'strong_evidence_against', 'could_be_false', 'unknown', 'uncertain', 'contentious', 'could_be_true', 'strong_evidence_for', 'certain'); + +CREATE TABLE "Agent" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + type "EntityType" NOT NULL +); +COMMENT ON TABLE "Agent" IS 'An agent that acts in the system'; + + +CREATE TABLE "Person" ( + id BIGINT NOT NULL PRIMARY KEY, + name VARCHAR NOT NULL, + orcid VARCHAR(20), + email VARCHAR NOT NULL, + CONSTRAINT person_id_fkey FOREIGN KEY (id) + REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "Person" IS 'A person using the system'; + + +CREATE TABLE "AutomatedAgent" ( + id BIGINT NOT NULL PRIMARY KEY, + name VARCHAR NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + deterministic BOOLEAN DEFAULT FALSE, + version VARCHAR, + CONSTRAINT person_id_fkey FOREIGN KEY (id) + REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "AutomatedAgent" IS 'An automated agent'; + + +CREATE TABLE "DiscoursePlatform" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + name VARCHAR NOT NULL, + url VARCHAR NOT NULL +); +COMMENT ON TABLE "DiscoursePlatform" IS 'A data platform where discourse happens'; + + +CREATE TABLE "Account" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + platform_id BIGINT NOT NULL, + person_id BIGINT NOT NULL, + write_permission BOOLEAN NOT NULL, + active BOOLEAN NOT NULL DEFAULT TRUE, + FOREIGN KEY(platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(person_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "Account" IS 'A user account on a discourse platform'; + + +CREATE TABLE "DiscourseSpace" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + url VARCHAR, + name VARCHAR NOT NULL, + discourse_platform_id BIGINT NOT NULL, + FOREIGN KEY(discourse_platform_id) REFERENCES "DiscoursePlatform" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "DiscourseSpace" IS 'A space on a discourse platform representing a community engaged in a conversation'; + + +CREATE TABLE "SpaceAccess" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + space_id BIGINT, + account_id BIGINT NOT NULL, + editor BOOLEAN NOT NULL, + UNIQUE (account_id, space_id), + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(account_id) REFERENCES "Account" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON TABLE "SpaceAccess" IS 'An access control entry for a space'; +COMMENT ON COLUMN "SpaceAccess".space_id IS 'The space in which the content is located'; + + +CREATE TABLE "Document" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + space_id BIGINT, + source_local_id VARCHAR, + url VARCHAR, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + author_id BIGINT NOT NULL, + contents OID, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); +COMMENT ON COLUMN "Document".space_id IS 'The space in which the content is located'; +COMMENT ON COLUMN "Document".source_local_id IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN "Document".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Document".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Document".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Document".author_id IS 'The author of content'; +COMMENT ON COLUMN "Document".contents IS 'A large object OID for the downloaded raw content'; + +CREATE TABLE "Concept" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + epistemic_status "EpistemicStatus" NOT NULL DEFAULT 'unknown', + name VARCHAR NOT NULL, + description TEXT, + author_id BIGINT, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + space_id BIGINT, + arity SMALLINT NOT NULL DEFAULT 0, + schema_id BIGINT, + content JSONB NOT NULL DEFAULT '{}', + is_schema BOOLEAN NOT NULL DEFAULT FALSE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(schema_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE +); +CREATE INDEX "Concept_space" ON "Concept" (space_id); +CREATE INDEX "Concept_schema" ON "Concept" (schema_id); +CREATE INDEX "Concept_content" ON "Concept" USING GIN (content jsonb_path_ops); + + +COMMENT ON TABLE "Concept" IS 'An abstract concept, claim or relation'; +COMMENT ON COLUMN "Concept".author_id IS 'The author of content'; +COMMENT ON COLUMN "Concept".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Concept".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Concept".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Concept".space_id IS 'The space in which the content is located'; + + +CREATE TABLE "Content" ( + id BIGINT NOT NULL PRIMARY KEY DEFAULT nextval('public.entity_id_seq'::regclass), + document_id BIGINT NOT NULL, + source_local_id VARCHAR, + author_id BIGINT, + creator_id BIGINT, + created TIMESTAMP WITHOUT TIME ZONE NOT NULL, + text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}', + scale "Scale" NOT NULL, + space_id BIGINT, + last_modified TIMESTAMP WITHOUT TIME ZONE NOT NULL, + last_synced TIMESTAMP WITHOUT TIME ZONE NOT NULL, + part_of_id BIGINT, + represents_id BIGINT, + FOREIGN KEY(document_id) REFERENCES "Document" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(author_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(creator_id) REFERENCES "Agent" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(space_id) REFERENCES "DiscourseSpace" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(part_of_id) REFERENCES "Content" (id) ON DELETE SET NULL ON UPDATE CASCADE, + FOREIGN KEY(represents_id) REFERENCES "Concept" (id) ON DELETE SET NULL ON UPDATE CASCADE +); + +CREATE INDEX "Content_text" ON "Content" USING pgroonga (text); +CREATE INDEX "Content_space" ON "Content" (space_id); +CREATE INDEX "Content_document" ON "Content" (document_id); +CREATE INDEX "Content_part_of" ON "Content" (part_of_id); +CREATE INDEX "Content_represents" ON "Content" (represents_id); + +COMMENT ON TABLE "Content" IS 'A unit of content'; +COMMENT ON COLUMN "Content".source_local_id IS 'The unique identifier of the content in the remote source'; +COMMENT ON COLUMN "Content".author_id IS 'The author of content'; +COMMENT ON COLUMN "Content".creator_id IS 'The creator of a logical structure, such as a content subdivision'; +COMMENT ON COLUMN "Content".created IS 'The time when the content was created in the remote source'; +COMMENT ON COLUMN "Content".space_id IS 'The space in which the content is located'; +COMMENT ON COLUMN "Content".last_modified IS 'The last time the content was modified in the remote source'; +COMMENT ON COLUMN "Content".last_synced IS 'The last time the content was synced with the remote source'; +COMMENT ON COLUMN "Content".part_of_id IS 'This content is part of a larger content unit'; +COMMENT ON COLUMN "Content".represents_id IS 'This content explicitly represents a concept'; + + +CREATE TABLE concept_contributors ( + concept_id BIGINT, + contributor_id BIGINT, + PRIMARY KEY (concept_id, contributor_id), + FOREIGN KEY(concept_id) REFERENCES "Concept" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); + + +CREATE TABLE "ContentEmbedding_openai_text_embedding_3_small_1536" ( + target_id BIGINT NOT NULL, + model "EmbeddingName" NOT NULL DEFAULT 'openai_text_embedding_3_small_1536', + vector extensions.vector(1536) NOT NULL, + obsolete BOOLEAN DEFAULT FALSE, + PRIMARY KEY (target_id), + FOREIGN KEY(target_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE +); + + +CREATE TABLE content_contributors ( + content_id BIGINT, + contributor_id BIGINT, + PRIMARY KEY (content_id, contributor_id), + FOREIGN KEY(content_id) REFERENCES "Content" (id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(contributor_id) REFERENCES "Agent" (id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/packages/database/supabase/migrations/20250506174523_content_idx_id.sql b/packages/database/supabase/migrations/20250506174523_content_idx_id.sql new file mode 100644 index 000000000..8e9ddfc55 --- /dev/null +++ b/packages/database/supabase/migrations/20250506174523_content_idx_id.sql @@ -0,0 +1,2 @@ +CREATE UNIQUE INDEX "Content_space_and_id" ON "Content" (space_id, source_local_id) WHERE +source_local_id IS NOT NULL; diff --git a/packages/database/supabase/migrations/20250512142307_sync_table.sql b/packages/database/supabase/migrations/20250512142307_sync_table.sql new file mode 100644 index 000000000..51a7b7a14 --- /dev/null +++ b/packages/database/supabase/migrations/20250512142307_sync_table.sql @@ -0,0 +1,107 @@ +CREATE TYPE task_status AS ENUM ('active', 'timeout', 'complete', 'failed'); + +CREATE TABLE sync_info ( + id SERIAL PRIMARY KEY, + sync_target BIGINT, + sync_function VARCHAR(20), + status task_status DEFAULT 'active', + worker varchar(100) NOT NULL, + failure_count SMALLINT DEFAULT 0, + last_task_start TIMESTAMP WITH TIME ZONE, + last_task_end TIMESTAMP WITH TIME ZONE, + task_times_out_at TIMESTAMP WITH TIME ZONE +); + +CREATE UNIQUE INDEX sync_info_u_idx on sync_info (sync_target, sync_function); + +CREATE OR REPLACE FUNCTION propose_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), timeout INTERVAL, task_interval INTERVAL) + RETURNS INTERVAL AS $$ +DECLARE s_id INTEGER; +DECLARE timeout_as TIMESTAMP WITH TIME ZONE; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_worker VARCHAR; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT worker, status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_worker, t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION end_sync_task(s_target BIGINT, s_function VARCHAR(20), s_worker varchar(100), s_status task_status) RETURNS VOID AS $$ +DECLARE t_id INTEGER; +DECLARE t_target varchar; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, "Wrong worker"; + ASSERT s_status >= t_status, "do not go back in status"; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$$ LANGUAGE plpgsql; diff --git a/packages/database/supabase/migrations/20250513173724_content_concept_key.sql b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql new file mode 100644 index 000000000..2891c3ac2 --- /dev/null +++ b/packages/database/supabase/migrations/20250513173724_content_concept_key.sql @@ -0,0 +1,91 @@ +-- rename constraint + +alter table "public"."AutomatedAgent" drop constraint "person_id_fkey"; + +alter table "public"."AutomatedAgent" add constraint "automated_agent_id_fkey" FOREIGN KEY (id) REFERENCES "Agent"(id) ON UPDATE CASCADE ON DELETE CASCADE; + +-- now handled by sync_table + +alter table "public"."Concept" drop column "last_synced"; + +alter table "public"."Content" drop column "last_synced"; + +alter table "public"."Document" drop column "last_synced"; + +-- transfer of column + +alter table "public"."Concept" add column "represented_by_id" bigint; + +alter table "public"."Concept" add constraint "Concept_represented_by_id_fkey" FOREIGN KEY (represented_by_id) REFERENCES "Content"(id) ON UPDATE CASCADE ON DELETE SET NULL; + +CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" (represented_by_id); + +-- transfer data + +UPDATE public."Concept" SET represented_by_id = public."Content".id + FROM public."Content" + WHERE public."Concept".id=represents_id; + +-- drop the Content column + +alter table "public"."Content" drop constraint "Content_represents_id_fkey"; + +drop index if exists "public"."Content_represents"; + +alter table "public"."Content" drop column "represents_id"; + +-- Content embedding functions + +set check_function_bodies = off; + +-- strangely the check fails to interpret <=>, despite the vector extension being installed. + +CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN "public"."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$function$ +; + +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[]) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM "public"."Content" AS c + JOIN "public"."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$function$ +; + +set check_function_bodies = on; diff --git a/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql new file mode 100644 index 000000000..18b0b242e --- /dev/null +++ b/packages/database/supabase/migrations/20250517154122_plpgsql_linting.sql @@ -0,0 +1,95 @@ +CREATE OR REPLACE FUNCTION public.end_sync_task(s_target bigint, s_function character varying, s_worker character varying, s_status task_status) + RETURNS void + LANGUAGE plpgsql +AS $function$ +DECLARE t_id INTEGER; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, 'Wrong worker'; + ASSERT s_status >= t_status, 'do not go back in status'; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION public.propose_sync_task(s_target bigint, s_function character varying, s_worker character varying, timeout interval, task_interval interval) + RETURNS interval + LANGUAGE plpgsql +AS $function$ +DECLARE s_id INTEGER; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$function$ +; diff --git a/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql new file mode 100644 index 000000000..1e9b32a95 --- /dev/null +++ b/packages/database/supabase/migrations/20250520132747_restrict_search_by_document.sql @@ -0,0 +1,48 @@ +CREATE OR REPLACE FUNCTION public.match_content_embeddings(query_embedding vector, match_threshold double precision, match_count integer, current_document_id integer DEFAULT NULL::integer) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN public."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE + AND (current_document_id IS NULL OR c.document_id = current_document_id) +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$function$; + +-- Supabase wants to replace this function for no obvious reason. Letting it. + +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes(p_query_embedding vector, p_subset_roam_uids text[]) + RETURNS TABLE(content_id bigint, roam_uid text, text_content text, similarity double precision) + LANGUAGE sql + STABLE +AS $function$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM public."Content" AS c + JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$function$; diff --git a/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql new file mode 100644 index 000000000..ec23fd887 --- /dev/null +++ b/packages/database/supabase/migrations/20250520133551_nodes_needing_sync.sql @@ -0,0 +1,38 @@ +CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) + RETURNS TABLE(uid_to_sync text) + LANGUAGE plpgsql +AS $function$ + DECLARE + node_info jsonb; + roam_node_uid TEXT; + roam_node_edit_epoch_ms BIGINT; + content_db_last_modified_epoch_ms BIGINT; + BEGIN + FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam) + LOOP + roam_node_uid := (node_info->>'uid')::text; + roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint; + + -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds + -- Assumes your 'last_modified' column in 'Content' is a timestamp type + SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000 + INTO content_db_last_modified_epoch_ms + FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted) + WHERE c.source_local_id = roam_node_uid; + + IF NOT FOUND THEN + -- Node does not exist in Supabase Content table, so it needs sync + uid_to_sync := roam_node_uid; + RETURN NEXT; + ELSE + -- Node exists, compare timestamps + IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN + uid_to_sync := roam_node_uid; + RETURN NEXT; + END IF; + END IF; + END LOOP; + RETURN; + END; + $function$ +; diff --git a/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql new file mode 100644 index 000000000..dcc2e7851 --- /dev/null +++ b/packages/database/supabase/migrations/20250522193823_rename_discourse_space.sql @@ -0,0 +1,11 @@ +ALTER TABLE public."DiscoursePlatform" RENAME TO "Platform"; +ALTER TABLE public."Platform" RENAME CONSTRAINT "DiscoursePlatform_pkey" TO "Platform_pkey"; + +ALTER TABLE public."DiscourseSpace" RENAME TO "Space"; +ALTER TABLE public."Space" RENAME CONSTRAINT "DiscourseSpace_pkey" TO "Space_pkey"; +ALTER TABLE public."Space" RENAME COLUMN discourse_platform_id TO platform_id; +ALTER TABLE PUBLIC."Space" RENAME CONSTRAINT "DiscourseSpace_discourse_platform_id_fkey" TO "Space_platform_id_fkey"; + +COMMENT ON TABLE public."Space" IS +'A space on a platform representing a community engaged in a conversation'; +COMMENT ON TABLE public."Account" IS 'A user account on a platform'; diff --git a/packages/database/supabase/schemas/account.sql b/packages/database/supabase/schemas/account.sql new file mode 100644 index 000000000..9914188d9 --- /dev/null +++ b/packages/database/supabase/schemas/account.sql @@ -0,0 +1,76 @@ +CREATE TABLE IF NOT EXISTS public."Account" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + platform_id bigint NOT NULL, + person_id bigint NOT NULL, + write_permission boolean NOT NULL, + active boolean DEFAULT true NOT NULL +); + +ALTER TABLE public."Account" OWNER TO "postgres"; + +COMMENT ON TABLE public."Account" IS 'A user account on a platform'; + + +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_person_id_fkey" FOREIGN KEY ( + person_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_platform_id_fkey" FOREIGN KEY ( + platform_id +) REFERENCES public."Platform" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."Account" +ADD CONSTRAINT "Account_pkey" PRIMARY KEY (id); + + +CREATE TABLE IF NOT EXISTS public."SpaceAccess" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + space_id bigint, + account_id bigint NOT NULL, + editor boolean NOT NULL +); + +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_account_id_space_id_key" UNIQUE ( + account_id, space_id +); + +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_pkey" PRIMARY KEY (id); + + +ALTER TABLE public."SpaceAccess" OWNER TO "postgres"; + +COMMENT ON TABLE public."SpaceAccess" IS 'An access control entry for a space'; + +COMMENT ON COLUMN public."SpaceAccess".space_id IS 'The space in which the content is located'; + + +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_account_id_fkey" FOREIGN KEY ( + account_id +) REFERENCES public."Account" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."SpaceAccess" +ADD CONSTRAINT "SpaceAccess_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."Space" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +GRANT ALL ON TABLE public."SpaceAccess" TO anon; +GRANT ALL ON TABLE public."SpaceAccess" TO authenticated; +GRANT ALL ON TABLE public."SpaceAccess" TO service_role; + + +GRANT ALL ON TABLE public."Account" TO anon; +GRANT ALL ON TABLE public."Account" TO authenticated; +GRANT ALL ON TABLE public."Account" TO service_role; diff --git a/packages/database/supabase/schemas/agent.sql b/packages/database/supabase/schemas/agent.sql new file mode 100644 index 000000000..ecbf3f511 --- /dev/null +++ b/packages/database/supabase/schemas/agent.sql @@ -0,0 +1,68 @@ +CREATE TABLE IF NOT EXISTS public."Agent" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + type public."EntityType" NOT NULL +); + + +ALTER TABLE ONLY public."Agent" +ADD CONSTRAINT "Agent_pkey" PRIMARY KEY (id); + +ALTER TABLE public."Agent" OWNER TO "postgres"; + +COMMENT ON TABLE public."Agent" IS 'An agent that acts in the system'; + +CREATE TABLE IF NOT EXISTS public."AutomatedAgent" ( + id bigint NOT NULL, + name character varying NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + deterministic boolean DEFAULT false, + version character varying +); + +ALTER TABLE ONLY public."AutomatedAgent" +ADD CONSTRAINT "AutomatedAgent_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."AutomatedAgent" +ADD CONSTRAINT automated_agent_id_fkey FOREIGN KEY ( + id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + + +ALTER TABLE public."AutomatedAgent" OWNER TO "postgres"; + +COMMENT ON TABLE public."AutomatedAgent" IS 'An automated agent'; + +CREATE TABLE IF NOT EXISTS public."Person" ( + id bigint NOT NULL, + name character varying NOT NULL, + orcid character varying(20), + email character varying NOT NULL +); + +ALTER TABLE ONLY public."Person" +ADD CONSTRAINT "Person_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Person" +ADD CONSTRAINT person_id_fkey FOREIGN KEY ( + id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + + +ALTER TABLE public."Person" OWNER TO "postgres"; + +COMMENT ON TABLE public."Person" IS 'A person using the system'; + + +GRANT ALL ON TABLE public."Agent" TO anon; +GRANT ALL ON TABLE public."Agent" TO authenticated; +GRANT ALL ON TABLE public."Agent" TO service_role; + +GRANT ALL ON TABLE public."AutomatedAgent" TO anon; +GRANT ALL ON TABLE public."AutomatedAgent" TO authenticated; +GRANT ALL ON TABLE public."AutomatedAgent" TO service_role; + +GRANT ALL ON TABLE public."Person" TO anon; +GRANT ALL ON TABLE public."Person" TO authenticated; +GRANT ALL ON TABLE public."Person" TO service_role; diff --git a/packages/database/supabase/schemas/base.sql b/packages/database/supabase/schemas/base.sql new file mode 100644 index 000000000..64947f9e2 --- /dev/null +++ b/packages/database/supabase/schemas/base.sql @@ -0,0 +1,65 @@ +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = true; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = on; +SET default_tablespace = ''; +SET default_table_access_method = heap; + +COMMENT ON SCHEMA public IS 'standard public schema'; + + +ALTER PUBLICATION supabase_realtime OWNER TO postgres; + +GRANT USAGE ON SCHEMA public TO postgres; +GRANT USAGE ON SCHEMA public TO anon; +GRANT USAGE ON SCHEMA public TO authenticated; +GRANT USAGE ON SCHEMA public TO service_role; + +CREATE TYPE public."EntityType" AS ENUM ( + 'Platform', + 'Space', + 'Account', + 'Person', + 'AutomatedAgent', + 'Document', + 'Content', + 'Concept', + 'ConceptSchema', + 'ContentLink', + 'Occurrence' +); + +ALTER TYPE public."EntityType" OWNER TO postgres; + +CREATE SEQUENCE IF NOT EXISTS public.entity_id_seq +START WITH 1 +INCREMENT BY 1 +NO MINVALUE +NO MAXVALUE +CACHE 1; + +ALTER SEQUENCE public.entity_id_seq OWNER TO "postgres"; + + +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON SEQUENCES TO service_role; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON FUNCTIONS TO service_role; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO postgres; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO anon; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO authenticated; +ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public GRANT ALL ON TABLES TO service_role; + +GRANT ALL ON SEQUENCE public.entity_id_seq TO anon; +GRANT ALL ON SEQUENCE public.entity_id_seq TO authenticated; +GRANT ALL ON SEQUENCE public.entity_id_seq TO service_role; diff --git a/packages/database/supabase/schemas/concept.sql b/packages/database/supabase/schemas/concept.sql new file mode 100644 index 000000000..dbbc3686e --- /dev/null +++ b/packages/database/supabase/schemas/concept.sql @@ -0,0 +1,88 @@ +CREATE TYPE public."EpistemicStatus" AS ENUM ( + 'certainly_not', + 'strong_evidence_against', + 'could_be_false', + 'unknown', + 'uncertain', + 'contentious', + 'could_be_true', + 'strong_evidence_for', + 'certain' +); + +ALTER TYPE public."EpistemicStatus" OWNER TO postgres; + + +CREATE TABLE IF NOT EXISTS public."Concept" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + epistemic_status public."EpistemicStatus" DEFAULT 'unknown'::public."EpistemicStatus" NOT NULL, + name character varying NOT NULL, + description text, + author_id bigint, + created timestamp without time zone NOT NULL, + last_modified timestamp without time zone NOT NULL, + space_id bigint, + arity smallint DEFAULT 0 NOT NULL, + schema_id bigint, + content jsonb DEFAULT '{}'::jsonb NOT NULL, + is_schema boolean DEFAULT false NOT NULL, + represented_by_id bigint +); + +ALTER TABLE public."Concept" OWNER TO "postgres"; + +COMMENT ON TABLE public."Concept" IS 'An abstract concept, claim or relation'; + +COMMENT ON COLUMN public."Concept".author_id IS 'The author of content'; + +COMMENT ON COLUMN public."Concept".created IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN public."Concept".last_modified IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN public."Concept".space_id IS 'The space in which the content is located'; + + +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Concept" +ADD FOREIGN KEY (represented_by_id) REFERENCES public."Content" ( + id +) ON DELETE SET NULL ON UPDATE CASCADE; + +CREATE INDEX "Concept_content" ON public."Concept" USING gin ( + content jsonb_path_ops +); + +CREATE INDEX "Concept_schema" ON public."Concept" USING btree (schema_id); + +CREATE INDEX "Concept_space" ON public."Concept" USING btree (space_id); + +CREATE UNIQUE INDEX "Concept_represented_by" ON public."Concept" ( + represented_by_id +); + + +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_schema_id_fkey" FOREIGN KEY ( + schema_id +) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Concept" +ADD CONSTRAINT "Concept_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."Space" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + + +GRANT ALL ON TABLE public."Concept" TO anon; +GRANT ALL ON TABLE public."Concept" TO authenticated; +GRANT ALL ON TABLE public."Concept" TO service_role; diff --git a/packages/database/supabase/schemas/content.sql b/packages/database/supabase/schemas/content.sql new file mode 100644 index 000000000..13478f5f8 --- /dev/null +++ b/packages/database/supabase/schemas/content.sql @@ -0,0 +1,148 @@ +CREATE TYPE public."Scale" AS ENUM ( + 'document', + 'post', + 'chunk_unit', + 'section', + 'block', + 'field', + 'paragraph', + 'quote', + 'sentence', + 'phrase' +); + +ALTER TYPE public."Scale" OWNER TO postgres; + +CREATE TABLE IF NOT EXISTS public."Document" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + space_id bigint, + source_local_id character varying, + url character varying, + "created" timestamp without time zone NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + last_modified timestamp without time zone NOT NULL, + author_id bigint NOT NULL, + contents oid +); + +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."Document" +ADD CONSTRAINT "Document_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."Space" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE public."Document" OWNER TO "postgres"; + +COMMENT ON COLUMN public."Document".space_id IS 'The space in which the content is located'; + +COMMENT ON COLUMN public."Document".source_local_id IS 'The unique identifier of the content in the remote source'; + +COMMENT ON COLUMN public."Document".created IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN public."Document".last_modified IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN public."Document".author_id IS 'The author of content'; + +COMMENT ON COLUMN public."Document".contents IS 'A large object OID for the downloaded raw content'; + + +CREATE TABLE IF NOT EXISTS public."Content" ( + id bigint DEFAULT nextval( + 'public.entity_id_seq'::regclass + ) NOT NULL, + document_id bigint NOT NULL, + source_local_id character varying, + author_id bigint, + creator_id bigint, + created timestamp without time zone NOT NULL, + text text NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + scale public."Scale" NOT NULL, + space_id bigint, + last_modified timestamp without time zone NOT NULL, + part_of_id bigint +); + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_author_id_fkey" FOREIGN KEY ( + author_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_creator_id_fkey" FOREIGN KEY ( + creator_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_document_id_fkey" FOREIGN KEY ( + document_id +) REFERENCES public."Document" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_part_of_id_fkey" FOREIGN KEY ( + part_of_id +) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE SET NULL; + +ALTER TABLE ONLY public."Content" +ADD CONSTRAINT "Content_space_id_fkey" FOREIGN KEY ( + space_id +) REFERENCES public."Space" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +CREATE INDEX "Content_document" ON public."Content" USING btree ( + document_id +); + +CREATE INDEX "Content_part_of" ON public."Content" USING btree ( + part_of_id +); + +CREATE INDEX "Content_space" ON public."Content" USING btree (space_id); + +CREATE UNIQUE INDEX "Content_space_and_id" ON public."Content" USING btree ( + space_id, source_local_id +) WHERE (source_local_id IS NOT NULL); + +CREATE INDEX "Content_text" ON public."Content" USING pgroonga (text); + +ALTER TABLE public."Content" OWNER TO "postgres"; + +COMMENT ON TABLE public."Content" IS 'A unit of content'; + +COMMENT ON COLUMN public."Content".source_local_id IS 'The unique identifier of the content in the remote source'; + +COMMENT ON COLUMN public."Content".author_id IS 'The author of content'; + +COMMENT ON COLUMN public."Content".creator_id IS 'The creator of a logical structure, such as a content subdivision'; + +COMMENT ON COLUMN public."Content".created IS 'The time when the content was created in the remote source'; + +COMMENT ON COLUMN public."Content".space_id IS 'The space in which the content is located'; + +COMMENT ON COLUMN public."Content".last_modified IS 'The last time the content was modified in the remote source'; + +COMMENT ON COLUMN public."Content".part_of_id IS 'This content is part of a larger content unit'; + + +GRANT ALL ON TABLE public."Document" TO anon; +GRANT ALL ON TABLE public."Document" TO authenticated; +GRANT ALL ON TABLE public."Document" TO service_role; + +GRANT ALL ON TABLE public."Content" TO anon; +GRANT ALL ON TABLE public."Content" TO authenticated; +GRANT ALL ON TABLE public."Content" TO service_role; diff --git a/packages/database/supabase/schemas/contributor.sql b/packages/database/supabase/schemas/contributor.sql new file mode 100644 index 000000000..f83b9a40a --- /dev/null +++ b/packages/database/supabase/schemas/contributor.sql @@ -0,0 +1,52 @@ +CREATE TABLE IF NOT EXISTS public.content_contributors ( + content_id bigint NOT NULL, + contributor_id bigint NOT NULL +); + +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_pkey PRIMARY KEY ( + content_id, contributor_id +); + +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_content_id_fkey FOREIGN KEY ( + content_id +) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public.content_contributors +ADD CONSTRAINT content_contributors_contributor_id_fkey FOREIGN KEY ( + contributor_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE public.content_contributors OWNER TO "postgres"; + + +CREATE TABLE IF NOT EXISTS public.concept_contributors ( + concept_id bigint NOT NULL, + contributor_id bigint NOT NULL +); + +ALTER TABLE public.concept_contributors OWNER TO "postgres"; + +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_concept_id_fkey FOREIGN KEY ( + concept_id +) REFERENCES public."Concept" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_contributor_id_fkey FOREIGN KEY ( + contributor_id +) REFERENCES public."Agent" (id) ON UPDATE CASCADE ON DELETE CASCADE; + +ALTER TABLE ONLY public.concept_contributors +ADD CONSTRAINT concept_contributors_pkey PRIMARY KEY ( + concept_id, contributor_id +); + +GRANT ALL ON TABLE public.concept_contributors TO anon; +GRANT ALL ON TABLE public.concept_contributors TO authenticated; +GRANT ALL ON TABLE public.concept_contributors TO service_role; + +GRANT ALL ON TABLE public.content_contributors TO anon; +GRANT ALL ON TABLE public.content_contributors TO authenticated; +GRANT ALL ON TABLE public.content_contributors TO service_role; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql new file mode 100644 index 000000000..47a51c371 --- /dev/null +++ b/packages/database/supabase/schemas/embedding.sql @@ -0,0 +1,101 @@ +CREATE TYPE public."EmbeddingName" AS ENUM ( + 'openai_text_embedding_ada2_1536', + 'openai_text_embedding_3_small_512', + 'openai_text_embedding_3_small_1536', + 'openai_text_embedding_3_large_256', + 'openai_text_embedding_3_large_1024', + 'openai_text_embedding_3_large_3072' +); + +ALTER TYPE public."EmbeddingName" OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS public."ContentEmbedding_openai_text_embedding_3_small_1536" ( +target_id bigint NOT NULL, +"model" public."EmbeddingName" DEFAULT 'openai_text_embedding_3_small_1536'::public."EmbeddingName" NOT NULL, +"vector" extensions.vector (1536) NOT NULL, +obsolete boolean DEFAULT false +) ; + +ALTER TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" OWNER TO "postgres" ; + +ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536" +ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1536_pkey" PRIMARY KEY (target_id) ; + +ALTER TABLE ONLY public."ContentEmbedding_openai_text_embedding_3_small_1536" +ADD CONSTRAINT "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" FOREIGN KEY (target_id) REFERENCES public."Content" (id) ON UPDATE CASCADE ON DELETE CASCADE ; + +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "anon" ; +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "authenticated" ; +GRANT ALL ON TABLE public."ContentEmbedding_openai_text_embedding_3_small_1536" TO "service_role" ; + +set search_path to public, extensions ; + +CREATE OR REPLACE FUNCTION public.match_content_embeddings ( +query_embedding extensions.vector, +match_threshold double precision, +match_count integer, +current_document_id integer DEFAULT NULL::integer) +RETURNS TABLE ( +content_id bigint, +roam_uid Text, +text_content Text, +similarity double precision) +LANGUAGE sql STABLE +AS $$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (ce.vector <=> query_embedding) AS similarity +FROM public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce +JOIN public."Content" AS c ON ce.target_id = c.id +WHERE 1 - (ce.vector <=> query_embedding) > match_threshold + AND ce.obsolete = FALSE + AND (current_document_id IS NULL OR c.document_id = current_document_id) +ORDER BY + ce.vector <=> query_embedding ASC +LIMIT match_count; +$$ ; + +ALTER FUNCTION public.match_content_embeddings ( +query_embedding extensions.vector, +match_threshold double precision, +match_count integer, +current_document_id integer) OWNER TO "postgres" ; + +CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes ( +"p_query_embedding" extensions.vector, +"p_subset_roam_uids" Text []) +RETURNS TABLE (content_id bigint, +roam_uid Text, +text_content Text, +similarity double precision) +LANGUAGE sql STABLE +AS $$ +WITH subset_content_with_embeddings AS ( + -- Step 1: Identify content and fetch embeddings ONLY for the nodes in the provided Roam UID subset + SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + ce.vector AS embedding_vector + FROM public."Content" AS c + JOIN public."ContentEmbedding_openai_text_embedding_3_small_1536" AS ce ON c.id = ce.target_id + WHERE + c.source_local_id = ANY(p_subset_roam_uids) -- Filter Content by the provided Roam UIDs + AND ce.obsolete = FALSE +) +SELECT + ss_ce.content_id, + ss_ce.roam_uid, + ss_ce.text_content, + 1 - (ss_ce.embedding_vector <=> p_query_embedding) AS similarity +FROM subset_content_with_embeddings AS ss_ce +ORDER BY similarity DESC; -- Order by calculated similarity, highest first +$$ ; + +ALTER FUNCTION public.match_embeddings_for_subset_nodes ( +"p_query_embedding" extensions.vector, "p_subset_roam_uids" Text []) +OWNER TO "postgres" ; + +RESET ALL; diff --git a/packages/database/supabase/schemas/extensions.sql b/packages/database/supabase/schemas/extensions.sql new file mode 100644 index 000000000..83ccd84bb --- /dev/null +++ b/packages/database/supabase/schemas/extensions.sql @@ -0,0 +1,15 @@ +CREATE SCHEMA IF NOT EXISTS extensions; +CREATE SCHEMA IF NOT EXISTS graphql; +CREATE SCHEMA IF NOT EXISTS vault; + +CREATE EXTENSION IF NOT EXISTS pg_cron WITH SCHEMA pg_catalog; +CREATE EXTENSION IF NOT EXISTS pgroonga WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_graphql WITH SCHEMA graphql; +CREATE EXTENSION IF NOT EXISTS pg_jsonschema WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_stat_monitor WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS pgjwt WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS supabase_vault WITH SCHEMA vault; +CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA extensions; +CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA extensions; diff --git a/packages/database/supabase/schemas/space.sql b/packages/database/supabase/schemas/space.sql new file mode 100644 index 000000000..122cdd023 --- /dev/null +++ b/packages/database/supabase/schemas/space.sql @@ -0,0 +1,47 @@ +CREATE TABLE IF NOT EXISTS public."Platform" ( + id bigint DEFAULT nextval( + 'public."entity_id_seq"'::regclass + ) NOT NULL, + name character varying NOT NULL, + url character varying NOT NULL +); + +ALTER TABLE public."Platform" OWNER TO "postgres"; + +COMMENT ON TABLE public."Platform" IS +'A data platform where discourse happens'; + +CREATE TABLE IF NOT EXISTS public."Space" ( + id bigint DEFAULT nextval( + 'public."entity_id_seq"'::regclass + ) NOT NULL, + url character varying, + name character varying NOT NULL, + platform_id bigint NOT NULL +); + +ALTER TABLE public."Space" OWNER TO "postgres"; + +COMMENT ON TABLE public."Space" IS +'A space on a platform representing a community engaged in a conversation'; + +ALTER TABLE ONLY public."Platform" +ADD CONSTRAINT "Platform_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Space" +ADD CONSTRAINT "Space_pkey" PRIMARY KEY (id); + +ALTER TABLE ONLY public."Space" +ADD CONSTRAINT "Space_platform_id_fkey" FOREIGN KEY ( + platform_id +) REFERENCES public."Platform" ( + id +) ON UPDATE CASCADE ON DELETE CASCADE; + +GRANT ALL ON TABLE public."Platform" TO anon; +GRANT ALL ON TABLE public."Platform" TO authenticated; +GRANT ALL ON TABLE public."Platform" TO service_role; + +GRANT ALL ON TABLE public."Space" TO anon; +GRANT ALL ON TABLE public."Space" TO authenticated; +GRANT ALL ON TABLE public."Space" TO service_role; diff --git a/packages/database/supabase/schemas/sync.sql b/packages/database/supabase/schemas/sync.sql new file mode 100644 index 000000000..f5bbba07a --- /dev/null +++ b/packages/database/supabase/schemas/sync.sql @@ -0,0 +1,261 @@ +CREATE TYPE public.task_status AS ENUM ( + 'active', + 'timeout', + 'complete', + 'failed' +); + +ALTER TYPE public.task_status OWNER TO "postgres"; + +CREATE TABLE IF NOT EXISTS public.sync_info ( + id integer NOT NULL, + sync_target bigint, + sync_function character varying(20), + status public.task_status DEFAULT 'active'::public.task_status, + worker character varying(100) NOT NULL, + failure_count smallint DEFAULT 0, + last_task_start timestamp with time zone, + last_task_end timestamp with time zone, + task_times_out_at timestamp with time zone +); + +ALTER TABLE public.sync_info OWNER TO "postgres"; + +CREATE SEQUENCE IF NOT EXISTS public.sync_info_id_seq +AS integer +START WITH 1 +INCREMENT BY 1 +NO MINVALUE +NO MAXVALUE +CACHE 1; + +ALTER TABLE public.sync_info_id_seq OWNER TO "postgres"; + +ALTER SEQUENCE public.sync_info_id_seq OWNED BY public.sync_info.id; + +ALTER TABLE ONLY public.sync_info ALTER COLUMN id SET DEFAULT nextval( + 'public.sync_info_id_seq'::regclass +); + +ALTER TABLE ONLY public.sync_info +ADD CONSTRAINT sync_info_pkey PRIMARY KEY (id); + +CREATE UNIQUE INDEX sync_info_u_idx ON public.sync_info USING btree ( + "sync_target", sync_function +); + +set search_path to public, extensions ; + + +CREATE OR REPLACE FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) RETURNS void +LANGUAGE plpgsql +AS $$ +DECLARE t_id INTEGER; +DECLARE t_worker varchar; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +BEGIN + SELECT id, worker, status, failure_count, last_task_end + INTO STRICT t_id, t_worker, t_status, t_failure_count, t_last_task_end + FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + ASSERT s_status > 'active'; + ASSERT t_worker = s_worker, 'Wrong worker'; + ASSERT s_status >= t_status, 'do not go back in status'; + IF s_status = 'complete' THEN + t_last_task_end := now(); + t_failure_count := 0; + ELSE + IF t_status != s_status THEN + t_failure_count := t_failure_count + 1; + END IF; + END IF; + + UPDATE sync_info + SET status = s_status, + task_times_out_at=null, + last_task_end=t_last_task_end, + failure_count=t_failure_count + WHERE id=t_id; +END; +$$; + +ALTER FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) OWNER TO "postgres"; + + +CREATE OR REPLACE FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) RETURNS interval +LANGUAGE plpgsql +AS $$ +DECLARE s_id INTEGER; +DECLARE start_time TIMESTAMP WITH TIME ZONE; +DECLARE t_status task_status; +DECLARE t_failure_count SMALLINT; +DECLARE t_last_task_start TIMESTAMP WITH TIME ZONE; +DECLARE t_last_task_end TIMESTAMP WITH TIME ZONE; +DECLARE t_times_out_at TIMESTAMP WITH TIME ZONE; +DECLARE result INTERVAL = NULL; +BEGIN + ASSERT timeout * 2 < task_interval; + ASSERT timeout >= '1s'::interval; + ASSERT task_interval >= '5s'::interval; + start_time := now(); + INSERT INTO sync_info (sync_target, sync_function, status, worker, last_task_start, task_times_out_at) + VALUES (s_target, s_function, 'active', s_worker, start_time, start_time+timeout) + ON CONFLICT DO NOTHING + RETURNING id INTO s_id; + -- zut il renvoie null... + IF s_id IS NOT NULL THEN + -- totally new_row, I'm on the task + RETURN NULL; + END IF; + -- now we know it pre-existed. Maybe already active. + SELECT id INTO STRICT s_id FROM sync_info WHERE sync_target = s_target AND sync_function = s_function; + PERFORM pg_advisory_lock(s_id); + SELECT status, failure_count, last_task_start, last_task_end, task_times_out_at + INTO t_status, t_failure_count, t_last_task_start, t_last_task_end, t_times_out_at + FROM sync_info + WHERE id = s_id; + + IF t_status = 'active' AND t_last_task_start >= coalesce(t_last_task_end, t_last_task_start) AND start_time > t_times_out_at THEN + t_status := 'timeout'; + t_failure_count := t_failure_count + 1; + END IF; + -- basic backoff + task_interval := task_interval * (1+t_failure_count); + IF coalesce(t_last_task_end, t_last_task_start) + task_interval < now() THEN + -- we are ready to take on the task + UPDATE sync_info + SET worker=s_worker, status='active', task_times_out_at = now() + timeout, last_task_start = now(), failure_count=t_failure_count + WHERE id=s_id; + ELSE + -- the task has been tried recently enough + IF t_status = 'timeout' THEN + UPDATE sync_info + SET status=t_status, failure_count=t_failure_count + WHERE id=s_id; + END IF; + result := coalesce(t_last_task_end, t_last_task_start) + task_interval - now(); + END IF; + + PERFORM pg_advisory_unlock(s_id); + RETURN result; +END; +$$; + +ALTER FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) OWNER TO "postgres"; + +CREATE OR REPLACE FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) +RETURNS TABLE (uid_to_sync text) +LANGUAGE plpgsql +AS $function$ + DECLARE + node_info jsonb; + roam_node_uid TEXT; + roam_node_edit_epoch_ms BIGINT; + content_db_last_modified_epoch_ms BIGINT; + BEGIN + FOR node_info IN SELECT * FROM jsonb_array_elements(nodes_from_roam) + LOOP + roam_node_uid := (node_info->>'uid')::text; + roam_node_edit_epoch_ms := (node_info->>'roam_edit_time')::bigint; + + -- Get the last_modified time from your Content table for the current node, converting it to epoch milliseconds + -- Assumes your 'last_modified' column in 'Content' is a timestamp type + SELECT EXTRACT(EPOCH FROM c.last_modified) * 1000 + INTO content_db_last_modified_epoch_ms + FROM public."Content" c -- Ensure "Content" matches your table name exactly (case-sensitive if quoted) + WHERE c.source_local_id = roam_node_uid; + + IF NOT FOUND THEN + -- Node does not exist in Supabase Content table, so it needs sync + uid_to_sync := roam_node_uid; + RETURN NEXT; + ELSE + -- Node exists, compare timestamps + IF roam_node_edit_epoch_ms > content_db_last_modified_epoch_ms THEN + uid_to_sync := roam_node_uid; + RETURN NEXT; + END IF; + END IF; + END LOOP; + RETURN; + END; + $function$ +; + +GRANT ALL ON TABLE public.sync_info TO "anon"; +GRANT ALL ON TABLE public.sync_info TO "authenticated"; +GRANT ALL ON TABLE public.sync_info TO "service_role"; + +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "anon"; +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "authenticated"; +GRANT ALL ON SEQUENCE public.sync_info_id_seq TO "service_role"; + +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "anon"; +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "authenticated"; +GRANT ALL ON FUNCTION public.end_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + s_status public.task_status +) TO "service_role"; + +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "anon"; +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "authenticated"; +GRANT ALL ON FUNCTION public.propose_sync_task( + s_target bigint, + s_function character varying, + s_worker character varying, + "timeout" interval, + "task_interval" interval +) TO "service_role"; + +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "anon"; +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "authenticated"; +GRANT ALL ON FUNCTION public.get_nodes_needing_sync(nodes_from_roam jsonb) TO "service_role"; + +RESET ALL; diff --git a/packages/database/types.gen.ts b/packages/database/types.gen.ts new file mode 100644 index 000000000..4c32dc33e --- /dev/null +++ b/packages/database/types.gen.ts @@ -0,0 +1,797 @@ +export type Json = + | string + | number + | boolean + | null + | { [key: string]: Json | undefined } + | Json[] + +export type Database = { + public: { + Tables: { + Account: { + Row: { + active: boolean + id: number + person_id: number + platform_id: number + write_permission: boolean + } + Insert: { + active?: boolean + id?: number + person_id: number + platform_id: number + write_permission: boolean + } + Update: { + active?: boolean + id?: number + person_id?: number + platform_id?: number + write_permission?: boolean + } + Relationships: [ + { + foreignKeyName: "Account_person_id_fkey" + columns: ["person_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Account_platform_id_fkey" + columns: ["platform_id"] + isOneToOne: false + referencedRelation: "DiscoursePlatform" + referencedColumns: ["id"] + }, + ] + } + Agent: { + Row: { + id: number + type: Database["public"]["Enums"]["EntityType"] + } + Insert: { + id?: number + type: Database["public"]["Enums"]["EntityType"] + } + Update: { + id?: number + type?: Database["public"]["Enums"]["EntityType"] + } + Relationships: [] + } + AutomatedAgent: { + Row: { + deterministic: boolean | null + id: number + metadata: Json + name: string + version: string | null + } + Insert: { + deterministic?: boolean | null + id: number + metadata?: Json + name: string + version?: string | null + } + Update: { + deterministic?: boolean | null + id?: number + metadata?: Json + name?: string + version?: string | null + } + Relationships: [ + { + foreignKeyName: "automated_agent_id_fkey" + columns: ["id"] + isOneToOne: true + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + Concept: { + Row: { + arity: number + author_id: number | null + content: Json + created: string + description: string | null + epistemic_status: Database["public"]["Enums"]["EpistemicStatus"] + id: number + is_schema: boolean + last_modified: string + name: string + represented_by_id: number | null + schema_id: number | null + space_id: number | null + } + Insert: { + arity?: number + author_id?: number | null + content?: Json + created: string + description?: string | null + epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"] + id?: number + is_schema?: boolean + last_modified: string + name: string + represented_by_id?: number | null + schema_id?: number | null + space_id?: number | null + } + Update: { + arity?: number + author_id?: number | null + content?: Json + created?: string + description?: string | null + epistemic_status?: Database["public"]["Enums"]["EpistemicStatus"] + id?: number + is_schema?: boolean + last_modified?: string + name?: string + represented_by_id?: number | null + schema_id?: number | null + space_id?: number | null + } + Relationships: [ + { + foreignKeyName: "Concept_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Concept_represented_by_id_fkey" + columns: ["represented_by_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Concept_schema_id_fkey" + columns: ["schema_id"] + isOneToOne: false + referencedRelation: "Concept" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Concept_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + concept_contributors: { + Row: { + concept_id: number + contributor_id: number + } + Insert: { + concept_id: number + contributor_id: number + } + Update: { + concept_id?: number + contributor_id?: number + } + Relationships: [ + { + foreignKeyName: "concept_contributors_concept_id_fkey" + columns: ["concept_id"] + isOneToOne: false + referencedRelation: "Concept" + referencedColumns: ["id"] + }, + { + foreignKeyName: "concept_contributors_contributor_id_fkey" + columns: ["contributor_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + Content: { + Row: { + author_id: number | null + created: string + creator_id: number | null + document_id: number + id: number + last_modified: string + metadata: Json + part_of_id: number | null + scale: Database["public"]["Enums"]["Scale"] + source_local_id: string | null + space_id: number | null + text: string + } + Insert: { + author_id?: number | null + created: string + creator_id?: number | null + document_id: number + id?: number + last_modified: string + metadata?: Json + part_of_id?: number | null + scale: Database["public"]["Enums"]["Scale"] + source_local_id?: string | null + space_id?: number | null + text: string + } + Update: { + author_id?: number | null + created?: string + creator_id?: number | null + document_id?: number + id?: number + last_modified?: string + metadata?: Json + part_of_id?: number | null + scale?: Database["public"]["Enums"]["Scale"] + source_local_id?: string | null + space_id?: number | null + text?: string + } + Relationships: [ + { + foreignKeyName: "Content_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_creator_id_fkey" + columns: ["creator_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_document_id_fkey" + columns: ["document_id"] + isOneToOne: false + referencedRelation: "Document" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_part_of_id_fkey" + columns: ["part_of_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Content_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + content_contributors: { + Row: { + content_id: number + contributor_id: number + } + Insert: { + content_id: number + contributor_id: number + } + Update: { + content_id?: number + contributor_id?: number + } + Relationships: [ + { + foreignKeyName: "content_contributors_content_id_fkey" + columns: ["content_id"] + isOneToOne: false + referencedRelation: "Content" + referencedColumns: ["id"] + }, + { + foreignKeyName: "content_contributors_contributor_id_fkey" + columns: ["contributor_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + ContentEmbedding_openai_text_embedding_3_small_1536: { + Row: { + model: Database["public"]["Enums"]["EmbeddingName"] + obsolete: boolean | null + target_id: number + vector: string + } + Insert: { + model?: Database["public"]["Enums"]["EmbeddingName"] + obsolete?: boolean | null + target_id: number + vector: string + } + Update: { + model?: Database["public"]["Enums"]["EmbeddingName"] + obsolete?: boolean | null + target_id?: number + vector?: string + } + Relationships: [ + { + foreignKeyName: "ContentEmbedding_openai_text_embedding_3_small_1_target_id_fkey" + columns: ["target_id"] + isOneToOne: true + referencedRelation: "Content" + referencedColumns: ["id"] + }, + ] + } + DiscoursePlatform: { + Row: { + id: number + name: string + url: string + } + Insert: { + id?: number + name: string + url: string + } + Update: { + id?: number + name?: string + url?: string + } + Relationships: [] + } + DiscourseSpace: { + Row: { + discourse_platform_id: number + id: number + name: string + url: string | null + } + Insert: { + discourse_platform_id: number + id?: number + name: string + url?: string | null + } + Update: { + discourse_platform_id?: number + id?: number + name?: string + url?: string | null + } + Relationships: [ + { + foreignKeyName: "DiscourseSpace_discourse_platform_id_fkey" + columns: ["discourse_platform_id"] + isOneToOne: false + referencedRelation: "DiscoursePlatform" + referencedColumns: ["id"] + }, + ] + } + Document: { + Row: { + author_id: number + contents: unknown | null + created: string + id: number + last_modified: string + metadata: Json + source_local_id: string | null + space_id: number | null + url: string | null + } + Insert: { + author_id: number + contents?: unknown | null + created: string + id?: number + last_modified: string + metadata?: Json + source_local_id?: string | null + space_id?: number | null + url?: string | null + } + Update: { + author_id?: number + contents?: unknown | null + created?: string + id?: number + last_modified?: string + metadata?: Json + source_local_id?: string | null + space_id?: number | null + url?: string | null + } + Relationships: [ + { + foreignKeyName: "Document_author_id_fkey" + columns: ["author_id"] + isOneToOne: false + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + { + foreignKeyName: "Document_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + Person: { + Row: { + email: string + id: number + name: string + orcid: string | null + } + Insert: { + email: string + id: number + name: string + orcid?: string | null + } + Update: { + email?: string + id?: number + name?: string + orcid?: string | null + } + Relationships: [ + { + foreignKeyName: "person_id_fkey" + columns: ["id"] + isOneToOne: true + referencedRelation: "Agent" + referencedColumns: ["id"] + }, + ] + } + SpaceAccess: { + Row: { + account_id: number + editor: boolean + id: number + space_id: number | null + } + Insert: { + account_id: number + editor: boolean + id?: number + space_id?: number | null + } + Update: { + account_id?: number + editor?: boolean + id?: number + space_id?: number | null + } + Relationships: [ + { + foreignKeyName: "SpaceAccess_account_id_fkey" + columns: ["account_id"] + isOneToOne: false + referencedRelation: "Account" + referencedColumns: ["id"] + }, + { + foreignKeyName: "SpaceAccess_space_id_fkey" + columns: ["space_id"] + isOneToOne: false + referencedRelation: "DiscourseSpace" + referencedColumns: ["id"] + }, + ] + } + sync_info: { + Row: { + failure_count: number | null + id: number + last_task_end: string | null + last_task_start: string | null + status: Database["public"]["Enums"]["task_status"] | null + sync_function: string | null + sync_target: number | null + task_times_out_at: string | null + worker: string + } + Insert: { + failure_count?: number | null + id?: number + last_task_end?: string | null + last_task_start?: string | null + status?: Database["public"]["Enums"]["task_status"] | null + sync_function?: string | null + sync_target?: number | null + task_times_out_at?: string | null + worker: string + } + Update: { + failure_count?: number | null + id?: number + last_task_end?: string | null + last_task_start?: string | null + status?: Database["public"]["Enums"]["task_status"] | null + sync_function?: string | null + sync_target?: number | null + task_times_out_at?: string | null + worker?: string + } + Relationships: [] + } + } + Views: { + [_ in never]: never + } + Functions: { + end_sync_task: { + Args: { + s_target: number + s_function: string + s_worker: string + s_status: Database["public"]["Enums"]["task_status"] + } + Returns: undefined + } + match_content_embeddings: { + Args: { + query_embedding: string + match_threshold: number + match_count: number + current_document_id?: number + } + Returns: { + content_id: number + roam_uid: string + text_content: string + similarity: number + }[] + } + match_embeddings_for_subset_nodes: { + Args: { p_query_embedding: string; p_subset_roam_uids: string[] } + Returns: { + content_id: number + roam_uid: string + text_content: string + similarity: number + }[] + } + propose_sync_task: { + Args: { + s_target: number + s_function: string + s_worker: string + timeout: unknown + task_interval: unknown + } + Returns: unknown + } + } + Enums: { + EmbeddingName: + | "openai_text_embedding_ada2_1536" + | "openai_text_embedding_3_small_512" + | "openai_text_embedding_3_small_1536" + | "openai_text_embedding_3_large_256" + | "openai_text_embedding_3_large_1024" + | "openai_text_embedding_3_large_3072" + EntityType: + | "Platform" + | "Space" + | "Account" + | "Person" + | "AutomatedAgent" + | "Document" + | "Content" + | "Concept" + | "ConceptSchema" + | "ContentLink" + | "Occurrence" + EpistemicStatus: + | "certainly_not" + | "strong_evidence_against" + | "could_be_false" + | "unknown" + | "uncertain" + | "contentious" + | "could_be_true" + | "strong_evidence_for" + | "certain" + Scale: + | "document" + | "post" + | "chunk_unit" + | "section" + | "block" + | "field" + | "paragraph" + | "quote" + | "sentence" + | "phrase" + task_status: "active" | "timeout" | "complete" | "failed" + } + CompositeTypes: { + [_ in never]: never + } + } +} + +type DefaultSchema = Database[Extract] + +export type Tables< + DefaultSchemaTableNameOrOptions extends + | keyof (DefaultSchema["Tables"] & DefaultSchema["Views"]) + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] & + Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"]) + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] & + Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])[TableName] extends { + Row: infer R + } + ? R + : never + : DefaultSchemaTableNameOrOptions extends keyof (DefaultSchema["Tables"] & + DefaultSchema["Views"]) + ? (DefaultSchema["Tables"] & + DefaultSchema["Views"])[DefaultSchemaTableNameOrOptions] extends { + Row: infer R + } + ? R + : never + : never + +export type TablesInsert< + DefaultSchemaTableNameOrOptions extends + | keyof DefaultSchema["Tables"] + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends { + Insert: infer I + } + ? I + : never + : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"] + ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends { + Insert: infer I + } + ? I + : never + : never + +export type TablesUpdate< + DefaultSchemaTableNameOrOptions extends + | keyof DefaultSchema["Tables"] + | { schema: keyof Database }, + TableName extends DefaultSchemaTableNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] + : never = never, +> = DefaultSchemaTableNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends { + Update: infer U + } + ? U + : never + : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"] + ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends { + Update: infer U + } + ? U + : never + : never + +export type Enums< + DefaultSchemaEnumNameOrOptions extends + | keyof DefaultSchema["Enums"] + | { schema: keyof Database }, + EnumName extends DefaultSchemaEnumNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"] + : never = never, +> = DefaultSchemaEnumNameOrOptions extends { schema: keyof Database } + ? Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"][EnumName] + : DefaultSchemaEnumNameOrOptions extends keyof DefaultSchema["Enums"] + ? DefaultSchema["Enums"][DefaultSchemaEnumNameOrOptions] + : never + +export type CompositeTypes< + PublicCompositeTypeNameOrOptions extends + | keyof DefaultSchema["CompositeTypes"] + | { schema: keyof Database }, + CompositeTypeName extends PublicCompositeTypeNameOrOptions extends { + schema: keyof Database + } + ? keyof Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"] + : never = never, +> = PublicCompositeTypeNameOrOptions extends { schema: keyof Database } + ? Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"][CompositeTypeName] + : PublicCompositeTypeNameOrOptions extends keyof DefaultSchema["CompositeTypes"] + ? DefaultSchema["CompositeTypes"][PublicCompositeTypeNameOrOptions] + : never + +export const Constants = { + public: { + Enums: { + EmbeddingName: [ + "openai_text_embedding_ada2_1536", + "openai_text_embedding_3_small_512", + "openai_text_embedding_3_small_1536", + "openai_text_embedding_3_large_256", + "openai_text_embedding_3_large_1024", + "openai_text_embedding_3_large_3072", + ], + EntityType: [ + "Platform", + "Space", + "Account", + "Person", + "AutomatedAgent", + "Document", + "Content", + "Concept", + "ConceptSchema", + "ContentLink", + "Occurrence", + ], + EpistemicStatus: [ + "certainly_not", + "strong_evidence_against", + "could_be_false", + "unknown", + "uncertain", + "contentious", + "could_be_true", + "strong_evidence_for", + "certain", + ], + Scale: [ + "document", + "post", + "chunk_unit", + "section", + "block", + "field", + "paragraph", + "quote", + "sentence", + "phrase", + ], + task_status: ["active", "timeout", "complete", "failed"], + }, + }, +} as const + diff --git a/turbo.json b/turbo.json index 45f57fbdd..c52dabc35 100644 --- a/turbo.json +++ b/turbo.json @@ -32,7 +32,10 @@ "passThroughEnv": [ "BLOB_READ_WRITE_TOKEN", "GITHUB_REF_NAME", - "GITHUB_HEAD_REF" + "GITHUB_HEAD_REF", + "SUPABASE_PROJECT_ID", + "SUPABASE_DB_PASSWORD", + "SUPABASE_ACCESS_TOKEN" ] }, "publish": {