Skip to content

Commit

Permalink
Allow for multiple unique constraints and column indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Jun 4, 2024
1 parent ca87303 commit c4070ae
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 36 deletions.
52 changes: 24 additions & 28 deletions scripts/create_registry_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,29 @@ def _get_table_metadata(schema, table):
"__tablename__": table,
}

if (
"index" in schema_data[table].keys()
and "unique_constraints" in schema_data[table].keys()
):
meta["__table_args__"] = (
UniqueConstraint(
*schema_data[table]["unique_constraints"]["unique_list"],
name=schema_data[table]["unique_constraints"]["name"],
),
Index(*schema_data[table]["index"]["index_list"]),
{"schema": schema},
)
elif "unique_constraints" in schema_data[table].keys():
meta["__table_args__"] = (
UniqueConstraint(
*schema_data[table]["unique_constraints"]["unique_list"],
name=schema_data[table]["unique_constraints"]["name"],
),
{"schema": schema},
)
table_args = []

# Handle column indexes
if "indexs" in schema_data[table].keys():
for index_att in schema_data[table]["index"].keys():
table_args.append(
Index(*schema_data[table]["index"][index_att]["index_list"])
)

# Handle unique constraints
if "unique_constraints" in schema_data[table].keys():
for uq_att in schema_data[table]["unique_constraints"].keys():
table_args.append(
UniqueConstraint(
*schema_data[table]["unique_constraints"][uq_att]["unique_list"],
name=uq_att,
)
)

# Bring it together
if len(table_args) > 0:
table_args.append({"schema": schema})
meta["__table_args__"] = tuple(table_args)
else:
meta["__table_args__"] = {"schema": schema}

Expand Down Expand Up @@ -332,14 +335,7 @@ def _BuildTable(schema, table_name, has_production, production):
print(f"Could not grant access to {acct} on schema {schema}")

# Create the tables
for table_name in [
"dataset",
"dataset_alias",
"dependency",
"execution",
"execution_alias",
"provenance",
]:
for table_name in schema_data.keys():
_BuildTable(schema, table_name, db_connection.dialect != "sqlite", prod_schema)

# Generate the database
Expand Down
2 changes: 2 additions & 0 deletions src/dataregistry/schema/load_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def _populate_defaults(mydict):
for row in mydict[table]["column_definitions"].keys():
for att in atts.keys():
if att not in mydict[table]["column_definitions"][row].keys():
if att not in atts.keys():
raise ValueError(f"The {att} attribute has no default value")
mydict[table]["column_definitions"][row][att] = atts[att]


Expand Down
81 changes: 73 additions & 8 deletions src/dataregistry/schema/schema.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,68 @@
---
# Stores table information for the schema.
#
# List each table name under the `tables` key.
#
# Column definitions
# ------------------
# For each table, the `column_definitions` key is required, which lists, as its keys, each column in the table. Each column contains metadata to describe it, including:
#
# - `type` (required):
# "String", "Integer", "DateTime", "StringShort", "StringLong", "Boolean",
# "Float"
# - `description` (required):
# Text description of the column
# - `primary_key` (optional, default=False):
# True if this column is the primary key of the table
# - `nullable` (optional, default=False):
# True if column is nullable
# - `foreign_key` (optional, default=False):
# True if this is a foreign_key, then you must also define
# `foreign_key_schema`, `foreign_key_table` and `foreign_key_column`.
# - `cli_optional` (optional, default=False):
# Add this column as an optional choice during CLI registration
# - `cli_default` (optional, default=None):
# Default value in the CLI
# - `modifiable` (optional, default=False):
# Is this column modifiable after registration
# - `choices` (optional, default=None):
# If the column only has fixed allowed values
#
# Indexes
# -------
# Tables can have an optional `indexes` key, which can have under it any number
# of indexes for that table. Each key under `indexes` will be the referenced
# `name` of the index in the database, and must contain a `index_list` entry
# underneath, which is a list of columns that builds that index.
#
# For example:
#
# tables:
# mytable:
# indexes:
# index_name:
# index_list: ["column1", "column2"]
# seconds_index_name:
# index_list: ["column1", "column4"]
#
# Unique constraints
# ------------------
# Tables can have an optional `unique_constraints` key, which can have under it
# any number of unique constraints for that table. Each key under
# `unique_constraints` will be the referenced `name` of the constraint in the
# database, and must contain a `unique_list` entry underneath, which is a list
# of columns that builds that unique constraint.
#
# For example:
#
# tables:
# mytable:
# unique_constraints:
# unique_constraint_name:
# unique_list: ["column1", "column2"]
# second_unique_constraint_name:
# unique_list: ["column1", "column4"]

tables:

execution:
Expand Down Expand Up @@ -92,8 +156,8 @@ tables:
execution_alias:

unique_constraints:
unique_list: ["alias","register_date"]
name: "execution_alias_unique"
execution_alias_unique:
unique_list: ["alias","register_date"]

column_definitions:
execution_alias_id:
Expand Down Expand Up @@ -126,8 +190,8 @@ tables:
dataset_alias:

unique_constraints:
unique_list: ["alias","register_date"]
name: "dataset_alias_unique"
dataset_alias_unique:
unique_list: ["alias","register_date"]

column_definitions:
dataset_alias_id:
Expand Down Expand Up @@ -192,12 +256,13 @@ tables:

dataset:

index:
index_list: ["relative_path", "owner", "owner_type"]
indexs:
dataset_index:
index_list: ["relative_path", "owner", "owner_type"]

unique_constraints:
unique_list: ["name", "version_string", "version_suffix", "owner", "owner_type"]
name: "dataset_unique"
dataset_unique:
unique_list: ["name", "version_string", "version_suffix", "owner", "owner_type"]

column_definitions:
dataset_id:
Expand Down

0 comments on commit c4070ae

Please sign in to comment.