diff --git a/scripts/create_registry_schema.py b/scripts/create_registry_schema.py index 11aabd1..fa790e9 100644 --- a/scripts/create_registry_schema.py +++ b/scripts/create_registry_schema.py @@ -131,26 +131,29 @@ def _get_table_metadata(schema, table): "__tablename__": table, } - if ( - "index" in schema_data[table].keys() - and "unique_constraints" in schema_data[table].keys() - ): - meta["__table_args__"] = ( - UniqueConstraint( - *schema_data[table]["unique_constraints"]["unique_list"], - name=schema_data[table]["unique_constraints"]["name"], - ), - Index(*schema_data[table]["index"]["index_list"]), - {"schema": schema}, - ) - elif "unique_constraints" in schema_data[table].keys(): - meta["__table_args__"] = ( - UniqueConstraint( - *schema_data[table]["unique_constraints"]["unique_list"], - name=schema_data[table]["unique_constraints"]["name"], - ), - {"schema": schema}, - ) + table_args = [] + + # Handle column indexes + if "indexs" in schema_data[table].keys(): + for index_att in schema_data[table]["index"].keys(): + table_args.append( + Index(*schema_data[table]["index"][index_att]["index_list"]) + ) + + # Handle unique constraints + if "unique_constraints" in schema_data[table].keys(): + for uq_att in schema_data[table]["unique_constraints"].keys(): + table_args.append( + UniqueConstraint( + *schema_data[table]["unique_constraints"][uq_att]["unique_list"], + name=uq_att, + ) + ) + + # Bring it together + if len(table_args) > 0: + table_args.append({"schema": schema}) + meta["__table_args__"] = tuple(table_args) else: meta["__table_args__"] = {"schema": schema} @@ -332,14 +335,7 @@ def _BuildTable(schema, table_name, has_production, production): print(f"Could not grant access to {acct} on schema {schema}") # Create the tables -for table_name in [ - "dataset", - "dataset_alias", - "dependency", - "execution", - "execution_alias", - "provenance", -]: +for table_name in schema_data.keys(): _BuildTable(schema, table_name, db_connection.dialect != "sqlite", prod_schema) # Generate the database diff --git a/src/dataregistry/schema/load_schema.py b/src/dataregistry/schema/load_schema.py index efe4cab..e2b1501 100644 --- a/src/dataregistry/schema/load_schema.py +++ b/src/dataregistry/schema/load_schema.py @@ -28,6 +28,8 @@ def _populate_defaults(mydict): for row in mydict[table]["column_definitions"].keys(): for att in atts.keys(): if att not in mydict[table]["column_definitions"][row].keys(): + if att not in atts.keys(): + raise ValueError(f"The {att} attribute has no default value") mydict[table]["column_definitions"][row][att] = atts[att] diff --git a/src/dataregistry/schema/schema.yaml b/src/dataregistry/schema/schema.yaml index f7881ef..c8fe2c0 100644 --- a/src/dataregistry/schema/schema.yaml +++ b/src/dataregistry/schema/schema.yaml @@ -1,4 +1,68 @@ --- +# Stores table information for the schema. +# +# List each table name under the `tables` key. +# +# Column definitions +# ------------------ +# For each table, the `column_definitions` key is required, which lists, as its keys, each column in the table. Each column contains metadata to describe it, including: +# +# - `type` (required): +# "String", "Integer", "DateTime", "StringShort", "StringLong", "Boolean", +# "Float" +# - `description` (required): +# Text description of the column +# - `primary_key` (optional, default=False): +# True if this column is the primary key of the table +# - `nullable` (optional, default=False): +# True if column is nullable +# - `foreign_key` (optional, default=False): +# True if this is a foreign_key, then you must also define +# `foreign_key_schema`, `foreign_key_table` and `foreign_key_column`. +# - `cli_optional` (optional, default=False): +# Add this column as an optional choice during CLI registration +# - `cli_default` (optional, default=None): +# Default value in the CLI +# - `modifiable` (optional, default=False): +# Is this column modifiable after registration +# - `choices` (optional, default=None): +# If the column only has fixed allowed values +# +# Indexes +# ------- +# Tables can have an optional `indexes` key, which can have under it any number +# of indexes for that table. Each key under `indexes` will be the referenced +# `name` of the index in the database, and must contain a `index_list` entry +# underneath, which is a list of columns that builds that index. +# +# For example: +# +# tables: +# mytable: +# indexes: +# index_name: +# index_list: ["column1", "column2"] +# seconds_index_name: +# index_list: ["column1", "column4"] +# +# Unique constraints +# ------------------ +# Tables can have an optional `unique_constraints` key, which can have under it +# any number of unique constraints for that table. Each key under +# `unique_constraints` will be the referenced `name` of the constraint in the +# database, and must contain a `unique_list` entry underneath, which is a list +# of columns that builds that unique constraint. +# +# For example: +# +# tables: +# mytable: +# unique_constraints: +# unique_constraint_name: +# unique_list: ["column1", "column2"] +# second_unique_constraint_name: +# unique_list: ["column1", "column4"] + tables: execution: @@ -92,8 +156,8 @@ tables: execution_alias: unique_constraints: - unique_list: ["alias","register_date"] - name: "execution_alias_unique" + execution_alias_unique: + unique_list: ["alias","register_date"] column_definitions: execution_alias_id: @@ -126,8 +190,8 @@ tables: dataset_alias: unique_constraints: - unique_list: ["alias","register_date"] - name: "dataset_alias_unique" + dataset_alias_unique: + unique_list: ["alias","register_date"] column_definitions: dataset_alias_id: @@ -192,12 +256,13 @@ tables: dataset: - index: - index_list: ["relative_path", "owner", "owner_type"] + indexs: + dataset_index: + index_list: ["relative_path", "owner", "owner_type"] unique_constraints: - unique_list: ["name", "version_string", "version_suffix", "owner", "owner_type"] - name: "dataset_unique" + dataset_unique: + unique_list: ["name", "version_string", "version_suffix", "owner", "owner_type"] column_definitions: dataset_id: