Skip to content

Commit

Permalink
don't allow commas in model and dataset names (#700)
Browse files Browse the repository at this point in the history
  • Loading branch information
ekorman authored Aug 5, 2024
1 parent de3f327 commit 93b4bb0
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 0 deletions.
8 changes: 8 additions & 0 deletions api/tests/unit-tests/schemas/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ def test_dataset(metadata):
metadata=[{123: 12434}, "123"], # type: ignore - purposefully throwing error
)

with pytest.raises(ValidationError) as exc_info:
schemas.Dataset(name="name,with,commas")
assert "cannot contain commas" in str(exc_info)


def test_model(metadata):
# valid
Expand Down Expand Up @@ -111,6 +115,10 @@ def test_model(metadata):
metadata=[{123: 12434}, "123"], # type: ignore - purposefully throwing error
)

with pytest.raises(ValidationError) as exc_info:
schemas.Model(name="name,with,commas")
assert "cannot contain commas" in str(exc_info)


def test_datum(metadata):
# valid
Expand Down
16 changes: 16 additions & 0 deletions api/valor_api/schemas/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,14 @@ def validate_metadata_values(cls, v: dict) -> dict:
validate_metadata(v)
return v

@field_validator("name")
@classmethod
def validate_name_no_commans(cls, v: str) -> str:
"""Validates the 'name' field has no commas in it."""
if "," in v:
raise ValueError("Dataset names cannot contain commas.")
return v


class Model(BaseModel):
"""
Expand All @@ -575,6 +583,14 @@ def validate_name(cls, v: str) -> str:
validate_type_string(v)
return v

@field_validator("name")
@classmethod
def validate_name_no_commans(cls, v: str) -> str:
"""Validates the 'name' field has no commas in it."""
if "," in v:
raise ValueError("Model names cannot contain commas.")
return v

@field_validator("metadata")
@classmethod
def validate_metadata_values(cls, v: dict) -> dict:
Expand Down
Empty file.
63 changes: 63 additions & 0 deletions migrations/sql/00000013_disallow_commas.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
-- this migration goes through dataset names and model names and replaces commas with underscores
-- if the resulting name happens to exist, it adds underscores until it gets a name that doesn't
-- this will update the following tables: Model, Dataset, Evaluation

-- Function to get a unique name by adding underscores
CREATE OR REPLACE FUNCTION get_unique_name(base_name TEXT, table_name TEXT)
RETURNS TEXT AS $$
DECLARE
unique_name TEXT := base_name;
name_exists INT;
BEGIN
EXECUTE format('SELECT COUNT(*) FROM %I WHERE name = $1', table_name) INTO name_exists USING unique_name;

WHILE name_exists > 0 LOOP
unique_name := unique_name || '_';
EXECUTE format('SELECT COUNT(*) FROM %I WHERE name = $1', table_name) INTO name_exists USING unique_name;
END LOOP;

RETURN unique_name;
END;
$$ LANGUAGE plpgsql;

DO $$
DECLARE
old_name TEXT;
new_name TEXT;
BEGIN
FOR old_name IN SELECT name FROM model WHERE POSITION(',' IN name) > 0 LOOP
new_name := get_unique_name(REPLACE(old_name, ',', '_'), 'model');

UPDATE Model SET name = new_name WHERE name = old_name;

UPDATE Evaluation SET model_name = new_name WHERE model_name = old_name;
END LOOP;
END;
$$;

DO $$
DECLARE
old_name TEXT;
new_name TEXT;
BEGIN
FOR old_name IN SELECT name FROM Dataset WHERE POSITION(',' IN name) > 0 LOOP
new_name := get_unique_name(REPLACE(old_name, ',', '_'), 'dataset');

UPDATE Dataset SET name = new_name WHERE name = old_name;

UPDATE Evaluation
SET dataset_names = (
SELECT jsonb_agg(
CASE
WHEN elem = old_name THEN new_name
ELSE elem
END
)
FROM jsonb_array_elements_text(dataset_names) AS elem
)
WHERE dataset_names @> jsonb_build_array(old_name);
END LOOP;
END;
$$;

DROP FUNCTION get_unique_name;

0 comments on commit 93b4bb0

Please sign in to comment.