Skip to content

Commit

Permalink
fix(dataset): fix datasets shacl shape (#2368)
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed Sep 24, 2021
1 parent c37f7aa commit 449ec7b
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 30 deletions.
4 changes: 2 additions & 2 deletions renku/core/commands/dataset.py
Expand Up @@ -419,10 +419,10 @@ def _export_dataset(name, provider_name, publish, tag, client_dispatcher: IClien
selected_tag = _prompt_tag_selection(tags)

if selected_tag:
dataset = datasets_provenance.get_by_id(selected_tag.dataset_id, immutable=True)
dataset = datasets_provenance.get_by_id(selected_tag.dataset_id.value, immutable=True)

if not dataset:
raise DatasetNotFound(message=f"Cannot find dataset with id: '{selected_tag.dataset_id}'")
raise DatasetNotFound(message=f"Cannot find dataset with id: '{selected_tag.dataset_id.value}'")

data_dir = get_dataset_data_dir(client, dataset)
dataset = DynamicProxy(dataset)
Expand Down
2 changes: 1 addition & 1 deletion renku/core/commands/format/dataset_tags.py
Expand Up @@ -35,7 +35,7 @@ def tabular(tags):
("name", None),
("description", None),
("dataset", None),
("dataset_id", "dataset id"),
("dataset_id.value", "dataset id"),
)
),
# workaround for tabulate issue 181
Expand Down
7 changes: 5 additions & 2 deletions renku/core/management/dataset/datasets_provenance.py
Expand Up @@ -24,7 +24,7 @@
from renku.core import errors
from renku.core.management.command_builder.command import inject
from renku.core.management.interface.dataset_gateway import IDatasetGateway
from renku.core.models.dataset import Dataset, DatasetTag
from renku.core.models.dataset import Dataset, DatasetTag, Url
from renku.core.models.provenance.agent import Person
from renku.core.utils import communication

Expand Down Expand Up @@ -194,6 +194,9 @@ def _process_dataset_tags(self, dataset: Dataset, tags: List[DatasetTag]):
if tag.name in current_tag_names:
continue
tag = DatasetTag(
dataset_id=dataset.id, date_created=tag.date_created, description=tag.description, name=tag.name
dataset_id=Url(url_id=dataset.id),
date_created=tag.date_created,
description=tag.description,
name=tag.name,
)
self.add_tag(dataset, tag)
4 changes: 2 additions & 2 deletions renku/core/management/dataset/tag.py
Expand Up @@ -22,7 +22,7 @@

from renku.core import errors
from renku.core.management.dataset.datasets_provenance import DatasetsProvenance
from renku.core.models.dataset import Dataset, DatasetTag
from renku.core.models.dataset import Dataset, DatasetTag, Url


def add_dataset_tag(dataset: Dataset, tag: str, description="", force=False):
Expand All @@ -49,7 +49,7 @@ def add_dataset_tag(dataset: Dataset, tag: str, description="", force=False):
raise errors.ParameterError(f"Tag '{tag}' already exists")
datasets_provenance.remove_tag(dataset, existing_tag)

new_tag = DatasetTag(dataset_id=dataset.id, description=description, name=tag)
new_tag = DatasetTag(dataset_id=Url(url_id=dataset.id), description=description, name=tag)

datasets_provenance.add_tag(dataset, new_tag)

Expand Down
6 changes: 5 additions & 1 deletion renku/core/management/migrations/utils/conversion.py
Expand Up @@ -36,7 +36,11 @@ def _convert_dataset_tag(tag: Optional[old_datasets.DatasetTag]) -> Optional[Dat
"""Convert old DatasetTag to new DatasetTag."""
if not tag:
return
return DatasetTag(dataset_id="dummy-id", date_created=tag.created, description=tag.description, name=tag.name)

# NOTE: ``dataset_id`` field will be set later when processing the migrated commit.
return DatasetTag(
dataset_id=Url(url_id="dummy-id"), date_created=tag.created, description=tag.description, name=tag.name
)


def _convert_language(language: Optional[old_datasets.Language]) -> Optional[Language]:
Expand Down
2 changes: 1 addition & 1 deletion renku/core/metadata/gateway/dataset_gateway.py
Expand Up @@ -59,7 +59,7 @@ def add_tag(self, dataset: Dataset, tag: DatasetTag):
tags = PersistentList()
self.database_dispatcher.current_database["datasets-tags"].add(tags, key=dataset.name)

assert tag.dataset_id == dataset.id, f"Tag has wrong dataset id: {tag.dataset_id} != {dataset.id}"
assert tag.dataset_id.value == dataset.id, f"Tag has wrong dataset id: {tag.dataset_id.value} != {dataset.id}"

tags.append(tag)

Expand Down
8 changes: 4 additions & 4 deletions renku/core/models/dataset.py
Expand Up @@ -116,16 +116,16 @@ class DatasetTag(Persistent):
def __init__(
self,
*,
dataset_id: str,
dataset_id: Url,
date_created: datetime = None,
description: str = None,
id: str = None,
name: str,
):
if not id:
id = DatasetTag.generate_id(dataset_id=dataset_id, name=name)
id = DatasetTag.generate_id(dataset_id=dataset_id.value, name=name)

self.dataset_id: str = dataset_id
self.dataset_id: Url = dataset_id
self.date_created: datetime = parse_date(date_created) or local_now()
self.description: str = description
self.id: str = id
Expand Down Expand Up @@ -614,7 +614,7 @@ class Meta:
model = DatasetTag
unknown = EXCLUDE

dataset_id = fields.String(schema.location)
dataset_id = Nested(schema.about, UrlSchema, missing=None)
date_created = fields.DateTime(schema.startDate, missing=None, format="iso", extra_formats=("%Y-%m-%d",))
description = fields.String(schema.description, missing=None)
id = fields.Id()
Expand Down
19 changes: 3 additions & 16 deletions renku/data/shacl_shape.json
Expand Up @@ -321,10 +321,7 @@
"datatype": {
"@id": "xsd:string"
},
"maxCount": 1,
"sh:lessThanOrEquals": {
"@id": "schema:datePublished"
}
"maxCount": 1
},
{
"nodeKind": "sh:Literal",
Expand Down Expand Up @@ -553,19 +550,9 @@
"maxCount": 1
},
{
"nodeKind": "sh:Literal",
"path": "schema:location",
"datatype": {
"@id": "xsd:string"
},
"minCount": 1,
"maxCount": 1
},
{
"nodeKind": "sh:Literal",
"path": "schema:about",
"datatype": {
"@id": "xsd:string"
"sh:class": {
"@id": "schema:URL"
},
"minCount": 1,
"maxCount": 1
Expand Down
7 changes: 6 additions & 1 deletion tests/cli/test_datasets.py
Expand Up @@ -1114,7 +1114,7 @@ def test_dataset_provider_resolution_dataverse(doi_responses, uri):
assert type(provider) is DataverseProvider


def test_dataset_tag(tmpdir, runner, project, client, subdirectory):
def test_dataset_tag(tmpdir, runner, client, subdirectory, get_datasets_provenance_with_injection):
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
assert 0 == result.exit_code, format_result_exception(result)
assert "OK" in result.output
Expand All @@ -1139,6 +1139,11 @@ def test_dataset_tag(tmpdir, runner, project, client, subdirectory):
result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "aBc9.34-11_55.t"], catch_exceptions=False)
assert 0 == result.exit_code, format_result_exception(result)

with get_datasets_provenance_with_injection(client) as datasets_provenance:
dataset = datasets_provenance.get_by_name("my-dataset")
all_tags = datasets_provenance.get_all_tags(dataset)
assert {dataset.id} == {t.dataset_id.value for t in all_tags}


@pytest.mark.parametrize("form", ["tabular", "json-ld"])
def test_dataset_ls_tags(tmpdir, runner, project, client, form, load_dataset_with_injection):
Expand Down

0 comments on commit 449ec7b

Please sign in to comment.