diff --git a/renku/core/commands/dataset.py b/renku/core/commands/dataset.py index 2a3fc934b5..1d768c4589 100644 --- a/renku/core/commands/dataset.py +++ b/renku/core/commands/dataset.py @@ -419,10 +419,10 @@ def _export_dataset(name, provider_name, publish, tag, client_dispatcher: IClien selected_tag = _prompt_tag_selection(tags) if selected_tag: - dataset = datasets_provenance.get_by_id(selected_tag.dataset_id, immutable=True) + dataset = datasets_provenance.get_by_id(selected_tag.dataset_id.value, immutable=True) if not dataset: - raise DatasetNotFound(message=f"Cannot find dataset with id: '{selected_tag.dataset_id}'") + raise DatasetNotFound(message=f"Cannot find dataset with id: '{selected_tag.dataset_id.value}'") data_dir = get_dataset_data_dir(client, dataset) dataset = DynamicProxy(dataset) diff --git a/renku/core/commands/format/dataset_tags.py b/renku/core/commands/format/dataset_tags.py index 2ecc1741c3..7d527f6b6c 100644 --- a/renku/core/commands/format/dataset_tags.py +++ b/renku/core/commands/format/dataset_tags.py @@ -35,7 +35,7 @@ def tabular(tags): ("name", None), ("description", None), ("dataset", None), - ("dataset_id", "dataset id"), + ("dataset_id.value", "dataset id"), ) ), # workaround for tabulate issue 181 diff --git a/renku/core/management/dataset/datasets_provenance.py b/renku/core/management/dataset/datasets_provenance.py index 1920226cee..e8610b4388 100644 --- a/renku/core/management/dataset/datasets_provenance.py +++ b/renku/core/management/dataset/datasets_provenance.py @@ -24,7 +24,7 @@ from renku.core import errors from renku.core.management.command_builder.command import inject from renku.core.management.interface.dataset_gateway import IDatasetGateway -from renku.core.models.dataset import Dataset, DatasetTag +from renku.core.models.dataset import Dataset, DatasetTag, Url from renku.core.models.provenance.agent import Person from renku.core.utils import communication @@ -194,6 +194,9 @@ def _process_dataset_tags(self, dataset: Dataset, tags: List[DatasetTag]): if tag.name in current_tag_names: continue tag = DatasetTag( - dataset_id=dataset.id, date_created=tag.date_created, description=tag.description, name=tag.name + dataset_id=Url(url_id=dataset.id), + date_created=tag.date_created, + description=tag.description, + name=tag.name, ) self.add_tag(dataset, tag) diff --git a/renku/core/management/dataset/tag.py b/renku/core/management/dataset/tag.py index 302c5d295e..6144233a77 100644 --- a/renku/core/management/dataset/tag.py +++ b/renku/core/management/dataset/tag.py @@ -22,7 +22,7 @@ from renku.core import errors from renku.core.management.dataset.datasets_provenance import DatasetsProvenance -from renku.core.models.dataset import Dataset, DatasetTag +from renku.core.models.dataset import Dataset, DatasetTag, Url def add_dataset_tag(dataset: Dataset, tag: str, description="", force=False): @@ -49,7 +49,7 @@ def add_dataset_tag(dataset: Dataset, tag: str, description="", force=False): raise errors.ParameterError(f"Tag '{tag}' already exists") datasets_provenance.remove_tag(dataset, existing_tag) - new_tag = DatasetTag(dataset_id=dataset.id, description=description, name=tag) + new_tag = DatasetTag(dataset_id=Url(url_id=dataset.id), description=description, name=tag) datasets_provenance.add_tag(dataset, new_tag) diff --git a/renku/core/management/migrations/utils/conversion.py b/renku/core/management/migrations/utils/conversion.py index 36b3222cbb..3c11c27df6 100644 --- a/renku/core/management/migrations/utils/conversion.py +++ b/renku/core/management/migrations/utils/conversion.py @@ -36,7 +36,11 @@ def _convert_dataset_tag(tag: Optional[old_datasets.DatasetTag]) -> Optional[Dat """Convert old DatasetTag to new DatasetTag.""" if not tag: return - return DatasetTag(dataset_id="dummy-id", date_created=tag.created, description=tag.description, name=tag.name) + + # NOTE: ``dataset_id`` field will be set later when processing the migrated commit. + return DatasetTag( + dataset_id=Url(url_id="dummy-id"), date_created=tag.created, description=tag.description, name=tag.name + ) def _convert_language(language: Optional[old_datasets.Language]) -> Optional[Language]: diff --git a/renku/core/metadata/gateway/dataset_gateway.py b/renku/core/metadata/gateway/dataset_gateway.py index 0a12d6eada..d800897f8d 100644 --- a/renku/core/metadata/gateway/dataset_gateway.py +++ b/renku/core/metadata/gateway/dataset_gateway.py @@ -59,7 +59,7 @@ def add_tag(self, dataset: Dataset, tag: DatasetTag): tags = PersistentList() self.database_dispatcher.current_database["datasets-tags"].add(tags, key=dataset.name) - assert tag.dataset_id == dataset.id, f"Tag has wrong dataset id: {tag.dataset_id} != {dataset.id}" + assert tag.dataset_id.value == dataset.id, f"Tag has wrong dataset id: {tag.dataset_id.value} != {dataset.id}" tags.append(tag) diff --git a/renku/core/models/dataset.py b/renku/core/models/dataset.py index 2843f7611e..c2167254ce 100644 --- a/renku/core/models/dataset.py +++ b/renku/core/models/dataset.py @@ -116,16 +116,16 @@ class DatasetTag(Persistent): def __init__( self, *, - dataset_id: str, + dataset_id: Url, date_created: datetime = None, description: str = None, id: str = None, name: str, ): if not id: - id = DatasetTag.generate_id(dataset_id=dataset_id, name=name) + id = DatasetTag.generate_id(dataset_id=dataset_id.value, name=name) - self.dataset_id: str = dataset_id + self.dataset_id: Url = dataset_id self.date_created: datetime = parse_date(date_created) or local_now() self.description: str = description self.id: str = id @@ -614,7 +614,7 @@ class Meta: model = DatasetTag unknown = EXCLUDE - dataset_id = fields.String(schema.location) + dataset_id = Nested(schema.about, UrlSchema, missing=None) date_created = fields.DateTime(schema.startDate, missing=None, format="iso", extra_formats=("%Y-%m-%d",)) description = fields.String(schema.description, missing=None) id = fields.Id() diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index a317ad32fb..d9d668d4f4 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -321,10 +321,7 @@ "datatype": { "@id": "xsd:string" }, - "maxCount": 1, - "sh:lessThanOrEquals": { - "@id": "schema:datePublished" - } + "maxCount": 1 }, { "nodeKind": "sh:Literal", @@ -553,19 +550,9 @@ "maxCount": 1 }, { - "nodeKind": "sh:Literal", - "path": "schema:location", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", "path": "schema:about", - "datatype": { - "@id": "xsd:string" + "sh:class": { + "@id": "schema:URL" }, "minCount": 1, "maxCount": 1 diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index b6f6c2313c..ef98b4a02e 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -1114,7 +1114,7 @@ def test_dataset_provider_resolution_dataverse(doi_responses, uri): assert type(provider) is DataverseProvider -def test_dataset_tag(tmpdir, runner, project, client, subdirectory): +def test_dataset_tag(tmpdir, runner, client, subdirectory, get_datasets_provenance_with_injection): result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output @@ -1139,6 +1139,11 @@ def test_dataset_tag(tmpdir, runner, project, client, subdirectory): result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "aBc9.34-11_55.t"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) + with get_datasets_provenance_with_injection(client) as datasets_provenance: + dataset = datasets_provenance.get_by_name("my-dataset") + all_tags = datasets_provenance.get_all_tags(dataset) + assert {dataset.id} == {t.dataset_id.value for t in all_tags} + @pytest.mark.parametrize("form", ["tabular", "json-ld"]) def test_dataset_ls_tags(tmpdir, runner, project, client, form, load_dataset_with_injection):