From 00b499b435608b52041ba7160cdece85ea7c20fd Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Wed, 8 Sep 2021 10:46:37 +0200 Subject: [PATCH] feat(core): allow adding custom metadata to projects (#2313) --- renku/cli/init.py | 37 ++++++++++++++++++- renku/cli/project.py | 26 ++++++++++++- renku/core/commands/init.py | 7 +++- renku/core/commands/project.py | 6 +-- renku/core/management/repository.py | 5 ++- renku/core/models/project.py | 27 +++++++++++--- renku/service/controllers/project_edit.py | 6 ++- .../controllers/templates_create_project.py | 1 + renku/service/serializers/project.py | 1 + renku/service/serializers/templates.py | 1 + tests/cli/test_init.py | 25 +++++++++++++ tests/cli/test_project.py | 17 ++++++++- tests/service/fixtures/service_client.py | 6 +++ tests/service/views/test_project_views.py | 12 ++++++ 14 files changed, 160 insertions(+), 17 deletions(-) diff --git a/renku/cli/init.py b/renku/cli/init.py index 55e840fa8f..13617e901a 100644 --- a/renku/cli/init.py +++ b/renku/cli/init.py @@ -113,6 +113,24 @@ automatically added to the list of parameters forwarded to the ``init`` command. +Provide custom metadata +~~~~~~~~~~~~~~~~~~~~~~~ + +Custom metadata can be added to the projects knowledge graph by writing +it to a json file and passing that via the `--metadata` option. + +.. code-block:: console + + $ echo '{"@id": "https://example.com/id1", \ + "@type": "https://schema.org/Organization", \ + "https://schema.org/legalName": "ETHZ"}' > metadata.json + + $ renku init --template-id python-minimal --parameter \ + "description"="my new shiny project" --metadata metadata.json + + Initializing new Renku repository... OK + + Update an existing project ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -171,6 +189,7 @@ """ import configparser +import json import os from pathlib import Path from tempfile import mkdtemp @@ -262,7 +281,7 @@ def check_git_user_config(): @click.option( "-p", "--parameter", - "metadata", + "parameters", multiple=True, type=click.STRING, callback=parse_parameters, @@ -271,6 +290,14 @@ def check_git_user_config(): 'Please specify the values as follow: --parameter "param1"="value"' ), ) +@click.option( + "-m", + "--metadata", + "metadata", + default=None, + type=click.Path(exists=True, dir_okay=False), + help="Custom metadata to be associated with the project.", +) @click.option("-l", "--list-templates", is_flag=True, help="List templates available in the template-source.") @click.option("-d", "--describe", is_flag=True, help="Show description for templates and parameters") @click.option("--force", is_flag=True, help="Override target path.") @@ -287,6 +314,7 @@ def init( template_index, template_source, template_ref, + parameters, metadata, list_templates, force, @@ -309,6 +337,10 @@ def init( if template_ref and not template_source: raise errors.ParameterError("Can't use '--template-ref' without specifying '--template-source'") + custom_metadata = None + if metadata: + custom_metadata = json.loads(Path(metadata).read_text()) + communicator = ClickCallback() init_command().with_communicator(communicator).build().execute( ctx=ctx, @@ -320,7 +352,8 @@ def init( template_index=template_index, template_source=template_source, template_ref=template_ref, - metadata=metadata, + metadata=parameters, + custom_metadata=custom_metadata, list_templates=list_templates, force=force, describe=describe, diff --git a/renku/cli/project.py b/renku/cli/project.py index 73a886f844..cb4e60e626 100644 --- a/renku/cli/project.py +++ b/renku/cli/project.py @@ -35,8 +35,14 @@ | | Accepted format is | | | 'Forename Surname [affiliation]'. | +-------------------+------------------------------------------------------+ +| -m, --metadata | Path to json file containing custom metadata to be | +| | added to the project knowledge graph. | ++-------------------+------------------------------------------------------+ """ +import json +from pathlib import Path + import click from renku.cli.utils.callback import ClickCallback @@ -57,9 +63,25 @@ def project(): type=click.STRING, help="Creator's name, email, and affiliation. Accepted format is 'Forename Surname [affiliation]'.", ) -def edit(description, creator): +@click.option( + "-m", + "--metadata", + default=None, + type=click.Path(exists=True, dir_okay=False), + help="Custom metadata to be associated with the project.", +) +def edit(description, creator, metadata): """Edit project metadata.""" - result = edit_project_command().build().execute(description=description, creator=creator) + custom_metadata = None + + if metadata: + custom_metadata = json.loads(Path(metadata).read_text()) + + result = ( + edit_project_command() + .build() + .execute(description=description, creator=creator, custom_metadata=custom_metadata) + ) updated, no_email_warning = result.output diff --git a/renku/core/commands/init.py b/renku/core/commands/init.py index edb43fc381..585064c8bc 100644 --- a/renku/core/commands/init.py +++ b/renku/core/commands/init.py @@ -241,6 +241,7 @@ def _init( template_source, template_ref, metadata, + custom_metadata, list_templates, force, describe, @@ -334,6 +335,7 @@ def _init( client=client, name=name, metadata=metadata, + custom_metadata=custom_metadata, template_version=template_version, immutable_template_files=template_data.get("immutable_template_files", []), automated_update=template_data.get("allow_template_update", False), @@ -499,6 +501,7 @@ def create_from_template( client, name=None, metadata={}, + custom_metadata=None, template_version=None, immutable_template_files=[], automated_update=False, @@ -518,7 +521,7 @@ def create_from_template( metadata["name"] = name with client.commit(commit_message=commit_message, commit_only=commit_only, skip_dirty_checks=True): - with client.with_metadata(name=name, description=description) as project: + with client.with_metadata(name=name, description=description, custom_metadata=custom_metadata) as project: project.template_source = metadata["__template_source__"] project.template_ref = metadata["__template_ref__"] project.template_id = metadata["__template_id__"] @@ -542,6 +545,7 @@ def _create_from_template_local( name, client_dispatcher: IClientDispatcher, metadata={}, + custom_metadata=None, default_metadata={}, template_version=None, immutable_template_files=[], @@ -571,6 +575,7 @@ def _create_from_template_local( client=client, name=name, metadata=metadata, + custom_metadata=custom_metadata, template_version=template_version, immutable_template_files=immutable_template_files, automated_update=automated_template_update, diff --git a/renku/core/commands/project.py b/renku/core/commands/project.py index 6ac7568829..8726ab03fe 100644 --- a/renku/core/commands/project.py +++ b/renku/core/commands/project.py @@ -25,9 +25,9 @@ @inject.autoparams() -def _edit_project(description, creator, project_gateway: IProjectGateway): +def _edit_project(description, creator, custom_metadata, project_gateway: IProjectGateway): """Edit dataset metadata.""" - possible_updates = {"creator": creator, "description": description} + possible_updates = {"creator": creator, "description": description, "custom_metadata": custom_metadata} creator, no_email_warnings = construct_creator(creator, ignore_email=True) @@ -35,7 +35,7 @@ def _edit_project(description, creator, project_gateway: IProjectGateway): if updated: project = project_gateway.get_project() - project.update_metadata(creator=creator, description=description) + project.update_metadata(creator=creator, description=description, custom_metadata=custom_metadata) project_gateway.update_project(project) return updated, no_email_warnings diff --git a/renku/core/management/repository.py b/renku/core/management/repository.py index 19f281e6c6..ea413c9e68 100644 --- a/renku/core/management/repository.py +++ b/renku/core/management/repository.py @@ -331,13 +331,16 @@ def with_metadata( read_only=False, name=None, description=None, + custom_metadata=None, ): """Yield an editable metadata object.""" try: project = project_gateway.get_project() except ValueError: - project = Project.from_client(name=name, description=description, client=self) + project = Project.from_client( + name=name, description=description, custom_metadata=custom_metadata, client=self + ) yield project diff --git a/renku/core/models/project.py b/renku/core/models/project.py index 7e819a4f5c..8d789d7fb5 100644 --- a/renku/core/models/project.py +++ b/renku/core/models/project.py @@ -18,15 +18,16 @@ """Project class.""" from datetime import datetime -from typing import List +from typing import Dict, List from urllib.parse import quote from marshmallow import EXCLUDE from renku.core import errors from renku.core.metadata.database import persistent -from renku.core.models.calamus import DateTimeList, JsonLDSchema, Nested, StringList, fields, prov, renku, schema +from renku.core.models.calamus import DateTimeList, JsonLDSchema, Nested, StringList, fields, oa, prov, renku, schema from renku.core.models.provenance.agent import Person, PersonSchema +from renku.core.models.provenance.annotation import Annotation, AnnotationSchema from renku.core.utils.datetime8601 import fix_timezone, local_now, parse_date @@ -37,6 +38,7 @@ def __init__( self, *, agent_version: str = None, + annotations: List[Annotation] = None, automated_update: bool = False, creator: Person, date_created: datetime = None, @@ -61,6 +63,7 @@ def __init__( id = Project.generate_id(namespace=namespace, name=name) self.agent_version: str = agent_version + self.annotations: List[Annotation] = annotations or [] self.automated_update: bool = automated_update self.creator: Person = creator self.date_created: datetime = fix_timezone(date_created) or local_now() @@ -76,16 +79,22 @@ def __init__( self.version: str = version @classmethod - def from_client(cls, client, name: str = None, description: str = None, creator: Person = None) -> "Project": + def from_client( + cls, client, name: str = None, description: str = None, custom_metadata: Dict = None, creator: Person = None + ) -> "Project": """Create an instance from a LocalClient.""" namespace, name = cls.get_namespace_and_name(client=client, name=name, creator=creator) creator = creator or Person.from_git(client.repo) + annotations = None + + if custom_metadata: + annotations = [Annotation(id=Annotation.generate_id(), body=custom_metadata, source="renku")] if not creator: raise ValueError("Project Creator not set") id = cls.generate_id(namespace=namespace, name=name) - return cls(creator=creator, id=id, name=name, description=description) + return cls(creator=creator, id=id, name=name, description=description, annotations=annotations) @staticmethod def get_namespace_and_name(*, client=None, name: str = None, creator: Person = None): @@ -116,7 +125,7 @@ def generate_id(namespace: str, name: str): return f"/projects/{namespace}/{name}" - def update_metadata(self, **kwargs): + def update_metadata(self, custom_metadata=None, **kwargs): """Updates metadata.""" editable_attributes = ["creator", "description"] for name, value in kwargs.items(): @@ -125,6 +134,13 @@ def update_metadata(self, **kwargs): if value and value != getattr(self, name): setattr(self, name, value) + if custom_metadata: + existing_metadata = [a for a in self.annotations if a.source != "renku"] + + existing_metadata.append(Annotation(id=Annotation.generate_id(), body=custom_metadata, source="renku")) + + self.annotations = existing_metadata + class ProjectSchema(JsonLDSchema): """Project Schema.""" @@ -137,6 +153,7 @@ class Meta: unknown = EXCLUDE agent_version = StringList(schema.agent, missing="pre-0.11.0") + annotations = Nested(oa.hasTarget, AnnotationSchema, reverse=True, many=True) automated_update = fields.Boolean(renku.automatedTemplateUpdate, missing=False) creator = Nested(schema.creator, PersonSchema, missing=None) date_created = DateTimeList(schema.dateCreated, missing=None, format="iso", extra_formats=("%Y-%m-%d",)) diff --git a/renku/service/controllers/project_edit.py b/renku/service/controllers/project_edit.py index cb42e27e92..7dd65d0e9a 100644 --- a/renku/service/controllers/project_edit.py +++ b/renku/service/controllers/project_edit.py @@ -50,7 +50,11 @@ def renku_op(self): edit_project_command() .with_commit_message(self.ctx["commit_message"]) .build() - .execute(description=self.ctx.get("description"), creator=self.ctx.get("creator")) + .execute( + description=self.ctx.get("description"), + creator=self.ctx.get("creator"), + custom_metadata=self.ctx.get("custom_metadata"), + ) ) edited, warning = result.output diff --git a/renku/service/controllers/templates_create_project.py b/renku/service/controllers/templates_create_project.py index b61c7eb2d8..701d92e6e4 100644 --- a/renku/service/controllers/templates_create_project.py +++ b/renku/service/controllers/templates_create_project.py @@ -146,6 +146,7 @@ def new_project(self): self.ctx["project_name"], metadata=provided_parameters, default_metadata=self.default_metadata, + custom_metadata=self.ctx["project_custom_metadata"], template_version=self.template_version, immutable_template_files=self.template.get("immutable_template_files", []), automated_template_update=self.template.get("allow_template_update", False), diff --git a/renku/service/serializers/project.py b/renku/service/serializers/project.py index 7195db497d..9b40d0f99e 100644 --- a/renku/service/serializers/project.py +++ b/renku/service/serializers/project.py @@ -34,6 +34,7 @@ class ProjectEditRequest(AsyncSchema, LocalRepositorySchema, RemoteRepositorySch description = fields.String(default=None) creator = fields.Nested(DatasetCreators) + custom_metadata = fields.Dict(default=None) class ProjectEditResponse(RenkuSyncSchema): diff --git a/renku/service/serializers/templates.py b/renku/service/serializers/templates.py index 137cf508b2..817376db22 100644 --- a/renku/service/serializers/templates.py +++ b/renku/service/serializers/templates.py @@ -61,6 +61,7 @@ class ProjectTemplateRequest(ProjectCloneContext, ManifestTemplatesRequest): project_repository = fields.String(required=True) project_slug = fields.String(required=True) project_description = fields.String(missing=None) + project_custom_metadata = fields.Dict(missing=None) new_project_url = fields.String(required=True) project_name_stripped = fields.String(required=True) diff --git a/tests/cli/test_init.py b/tests/cli/test_init.py index 810d064636..4ebd932eeb 100644 --- a/tests/cli/test_init.py +++ b/tests/cli/test_init.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Test ``init`` command.""" +import json import os import shutil from pathlib import Path @@ -353,6 +354,30 @@ def test_init_with_parameters(isolated_runner, project_init, template): assert "These parameters are not used by the template and were ignored:" in result.output +def test_init_with_custom_metadata(isolated_runner, project_init, template): + """Test project initialization using custom metadata.""" + data, commands = project_init + + metadata = { + "@id": "https://example.com/annotation1", + "@type": "https://schema.org/specialType", + "https://schema.org/specialProperty": "some_unique_value", + } + metadata_path = Path("metadata.json") + metadata_path.write_text(json.dumps(metadata)) + + # create the project + new_project = Path(data["test_project"]) + assert not new_project.exists() + result = isolated_runner.invoke(cli, commands["init_test"] + commands["id"] + ["--metadata", str(metadata_path)]) + assert 0 == result.exit_code + + database = Database.from_path(new_project / ".renku" / "metadata") + project = database.get("project") + + assert metadata == project.annotations[0].body + + @pytest.mark.parametrize("data_dir", ["dir", "nested/dir/s"]) def test_init_with_data_dir(isolated_runner, data_dir, directory_tree, project_init): """Test initializing with data directory.""" diff --git a/tests/cli/test_project.py b/tests/cli/test_project.py index 246c01d2de..19fc74c851 100644 --- a/tests/cli/test_project.py +++ b/tests/cli/test_project.py @@ -17,6 +17,8 @@ # limitations under the License. """Test ``project`` command.""" +import json + from renku.cli import cli from renku.core.metadata.gateway.project_gateway import ProjectGateway from renku.core.models.provenance.agent import Person @@ -29,12 +31,22 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma creator = "Forename Surname [Affiliation]" + metadata = { + "@id": "https://example.com/annotation1", + "@type": "https://schema.org/specialType", + "https://schema.org/specialProperty": "some_unique_value", + } + metadata_path = client.path / "metadata.json" + metadata_path.write_text(json.dumps(metadata)) + commit_sha_before = client.repo.head.object.hexsha - result = runner.invoke(cli, ["project", "edit", "-d", " new description ", "-c", creator]) + result = runner.invoke( + cli, ["project", "edit", "-d", " new description ", "-c", creator, "--metadata", str(metadata_path)] + ) assert 0 == result.exit_code, format_result_exception(result) - assert "Successfully updated: creator, description." in result.output + assert "Successfully updated: creator, description, custom_metadata." in result.output assert "Warning: No email or wrong format for: Forename Surname" in result.output with client_database_injection_manager(client): @@ -45,6 +57,7 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma assert isinstance(project.creator, Person) assert "Forename Surname" == project.creator.name assert "Affiliation" == project.creator.affiliation + assert metadata == project.annotations[0].body assert client.repo.is_dirty() commit_sha_after = client.repo.head.object.hexsha diff --git a/tests/service/fixtures/service_client.py b/tests/service/fixtures/service_client.py index 999d629afe..18582d56e5 100644 --- a/tests/service/fixtures/service_client.py +++ b/tests/service/fixtures/service_client.py @@ -219,6 +219,12 @@ def svc_client_templates_creation(svc_client_with_templates): "project_namespace": "renku-python-integration-tests", "project_repository": "https://dev.renku.ch/gitlab", "project_description": "new service project", + "project_custom_metadata": { + "@id": "http://example.com/metadata12", + "@type": "https://schema.org/myType", + "https://schema.org/property1": 1, + "https://schema.org/property2": "test", + }, } # clenup by invoking the GitLab delete API diff --git a/tests/service/views/test_project_views.py b/tests/service/views/test_project_views.py index ceae2da111..47e1a9728a 100644 --- a/tests/service/views/test_project_views.py +++ b/tests/service/views/test_project_views.py @@ -43,6 +43,12 @@ def test_edit_project_view(svc_client_with_repo): "project_id": project_id, "description": "my new title", "creator": {"name": "name123", "email": "name123@ethz.ch", "affiliation": "ethz"}, + "custom_metadata": { + "@id": "http://example.com/metadata12", + "@type": "https://schema.org/myType", + "https://schema.org/property1": 1, + "https://schema.org/property2": "test", + }, } response = svc_client.post("/project.edit", data=json.dumps(edit_payload), headers=headers) @@ -53,6 +59,12 @@ def test_edit_project_view(svc_client_with_repo): assert { "description": "my new title", "creator": {"name": "name123", "email": "name123@ethz.ch", "affiliation": "ethz"}, + "custom_metadata": { + "@id": "http://example.com/metadata12", + "@type": "https://schema.org/myType", + "https://schema.org/property1": 1, + "https://schema.org/property2": "test", + }, } == response.json["result"]["edited"]