From c2ae2bf30806b0de23eac700225e8a8cdcfcd368 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 25 Aug 2022 11:38:16 +0200 Subject: [PATCH] feat(service): add support for dataset data directory in endpoints (#3089) --- renku/command/init.py | 3 ++ renku/domain_model/dataset.py | 2 +- renku/ui/cli/dataset.py | 2 +- .../ui/service/controllers/datasets_create.py | 1 + .../ui/service/controllers/datasets_import.py | 1 + .../controllers/templates_create_project.py | 1 + renku/ui/service/jobs/datasets.py | 9 +++++- renku/ui/service/serializers/datasets.py | 8 +++++ renku/ui/service/serializers/templates.py | 21 +++++++------ renku/ui/service/views/api_versions.py | 11 ++++--- tests/service/views/test_dataset_views.py | 31 +++++++++++++++++-- tests/service/views/test_templates_views.py | 3 ++ 12 files changed, 73 insertions(+), 20 deletions(-) diff --git a/renku/command/init.py b/renku/command/init.py index be01c33759..f7a6cefbec 100644 --- a/renku/command/init.py +++ b/renku/command/init.py @@ -333,6 +333,7 @@ def _create_from_template_local( commit_message: Optional[str] = None, description: Optional[str] = None, keywords: Optional[List[str]] = None, + data_dir: Optional[str] = None, ): """Initialize a new project from a template. @@ -352,6 +353,7 @@ def _create_from_template_local( commit_message(Optional[str]): Message of initial commit (Default value = None). description(Optional[str]): Project description (Default value = None). keywords(Optional[List[str]]): Project keywords (Default value = None). + data_dir(Optional[str]): Project base data directory (Default value = None). """ client = client_dispatcher.current_client @@ -406,6 +408,7 @@ def _create_from_template_local( commit_message=commit_message, description=description, keywords=keywords, + data_dir=data_dir, ) diff --git a/renku/domain_model/dataset.py b/renku/domain_model/dataset.py index 59b2aef384..6d698329c9 100644 --- a/renku/domain_model/dataset.py +++ b/renku/domain_model/dataset.py @@ -650,7 +650,7 @@ class DatasetDetailsJson(marshmallow.Schema): annotations = marshmallow.fields.List(marshmallow.fields.Nested(AnnotationJson)) - datadir = marshmallow.fields.Method("get_datadir") + data_directory = marshmallow.fields.Method("get_datadir") def get_datadir(self, obj): """Get data directory.""" diff --git a/renku/ui/cli/dataset.py b/renku/ui/cli/dataset.py index 5b03057267..6f739491d7 100644 --- a/renku/ui/cli/dataset.py +++ b/renku/ui/cli/dataset.py @@ -764,7 +764,7 @@ def show(tag, name): click.echo(click.style("Name: ", bold=True, fg=color.MAGENTA) + click.style(ds["name"], bold=True)) click.echo(click.style("Created: ", bold=True, fg=color.MAGENTA) + (ds.get("created_at", "") or "")) - click.echo(click.style("Data Directory: ", bold=True, fg=color.MAGENTA) + str(ds.get("datadir", "") or "")) + click.echo(click.style("Data Directory: ", bold=True, fg=color.MAGENTA) + str(ds.get("data_directory", "") or "")) creators = [] for creator in ds.get("creators", []): diff --git a/renku/ui/service/controllers/datasets_create.py b/renku/ui/service/controllers/datasets_create.py index 73a60114d4..7ef046a86f 100644 --- a/renku/ui/service/controllers/datasets_create.py +++ b/renku/ui/service/controllers/datasets_create.py @@ -79,6 +79,7 @@ def renku_op(self): keywords=self.ctx.get("keywords"), images=images, custom_metadata=self.ctx.get("custom_metadata"), + datadir=self.ctx.get("data_directory"), ) ) diff --git a/renku/ui/service/controllers/datasets_import.py b/renku/ui/service/controllers/datasets_import.py index 08004801d1..c4b21f3269 100644 --- a/renku/ui/service/controllers/datasets_import.py +++ b/renku/ui/service/controllers/datasets_import.py @@ -68,6 +68,7 @@ def renku_op(self): job_timeout=int(os.getenv("WORKER_DATASET_JOBS_TIMEOUT", 1800)), result_ttl=int(os.getenv("WORKER_DATASET_JOBS_RESULT_TTL", 500)), commit_message=self.ctx["commit_message"], + data_directory=self.ctx.get("data_directory"), ) return job diff --git a/renku/ui/service/controllers/templates_create_project.py b/renku/ui/service/controllers/templates_create_project.py index e4f339ddb4..0e5cf3d6b8 100644 --- a/renku/ui/service/controllers/templates_create_project.py +++ b/renku/ui/service/controllers/templates_create_project.py @@ -162,6 +162,7 @@ def new_project(self): initial_branch=self.ctx["initial_branch"], commit_message=self.ctx["commit_message"], description=self.ctx["project_description"], + data_dir=self.ctx.get("data_directory"), ) self.new_project_push(new_project_path) diff --git a/renku/ui/service/jobs/datasets.py b/renku/ui/service/jobs/datasets.py index e9ffa31dd5..750f151363 100644 --- a/renku/ui/service/jobs/datasets.py +++ b/renku/ui/service/jobs/datasets.py @@ -43,6 +43,7 @@ def dataset_import( tag=None, timeout=None, commit_message=None, + data_directory=None, ): """Job for dataset import.""" user = cache.ensure_user(user) @@ -62,7 +63,13 @@ def dataset_import( command = import_dataset_command().with_commit_message(commit_message) command.with_communicator(communicator).build().execute( - uri=dataset_uri, name=name, extract=extract, tag=tag, yes=True, gitlab_token=gitlab_token + uri=dataset_uri, + name=name, + extract=extract, + tag=tag, + yes=True, + gitlab_token=gitlab_token, + datadir=data_directory, ) worker_log.debug("operation successful - syncing with remote") diff --git a/renku/ui/service/serializers/datasets.py b/renku/ui/service/serializers/datasets.py index 4e775f8da5..1e22a87c0d 100644 --- a/renku/ui/service/serializers/datasets.py +++ b/renku/ui/service/serializers/datasets.py @@ -58,6 +58,11 @@ class DatasetCreateRequest( ): """Request schema for a dataset create view.""" + # NOTE: Override field in DatasetDetails + data_directory = fields.String( # type: ignore + missing=None, description="Base dataset data directory. '/' by default" + ) + class DatasetCreateResponse(DatasetNameSchema, RenkuSyncSchema): """Response schema for a dataset create view.""" @@ -182,6 +187,9 @@ class DatasetImportRequest(AsyncSchema, LocalRepositorySchema, RemoteRepositoryS name = fields.String(description="Optional dataset name.") extract = fields.Boolean() tag = fields.String(description="Dataset version to import.") + data_directory = fields.String( + missing=None, description="Base dataset data directory. '/' by default" + ) class DatasetImportResponseRPC(JsonRPCResponse): diff --git a/renku/ui/service/serializers/templates.py b/renku/ui/service/serializers/templates.py index fdd999f1ce..7618fdf1ad 100644 --- a/renku/ui/service/serializers/templates.py +++ b/renku/ui/service/serializers/templates.py @@ -53,15 +53,18 @@ class TemplateParameterSchema(Schema): class ProjectTemplateRequest(ProjectCloneContext, ManifestTemplatesRequest): """Request schema for listing manifest templates.""" - identifier = fields.String(required=True) - initial_branch = fields.String(missing=None) - parameters = fields.List(fields.Nested(TemplateParameterSchema), missing=[]) - project_name = fields.String(required=True) - project_namespace = fields.String(required=True) - project_repository = fields.String(required=True) - project_description = fields.String(missing=None) - project_keywords = fields.List(fields.String(), missing=None) - project_custom_metadata = fields.Dict(missing=None) + identifier = fields.String(required=True, description="Indentifier of the template") + initial_branch = fields.String(missing=None, description="Name for the initial branch in the new project.") + parameters = fields.List(fields.Nested(TemplateParameterSchema), missing=[], description="Template parameters") + project_name = fields.String(required=True, description="Project name") + project_namespace = fields.String(required=True, description="Project namespace") + project_repository = fields.String(required=True, description="Project remote repository") + project_description = fields.String(missing=None, description="Project description") + project_keywords = fields.List(fields.String(), missing=None, description="Project keywords") + project_custom_metadata = fields.Dict(missing=None, description="Project custom JSON-LD metadata") + data_directory = fields.String( + missing=None, description="Base dataset data directory in project. Defaults to 'data/'" + ) @post_load() def add_required_fields(self, data, **kwargs): diff --git a/renku/ui/service/views/api_versions.py b/renku/ui/service/views/api_versions.py index b7ab1f2e72..ec845a98f6 100644 --- a/renku/ui/service/views/api_versions.py +++ b/renku/ui/service/views/api_versions.py @@ -60,11 +60,12 @@ def add_url_rule( V0_9 = ApiVersion("0.9") V1_0 = ApiVersion("1.0") V1_1 = ApiVersion("1.1") -V1_2 = ApiVersion("1.2", is_base_version=True) +V1_2 = ApiVersion("1.2") +V1_3 = ApiVersion("1.3", is_base_version=True) -ALL_VERSIONS = [V0_9, V1_0, V1_1, V1_2] -VERSIONS_FROM_V1_0 = [V1_0, V1_1, V1_2] -VERSIONS_FROM_V1_1 = [V1_1, V1_2] +VERSIONS_FROM_V1_1 = [V1_1, V1_2, V1_3] +VERSIONS_FROM_V1_0 = [V1_0] + VERSIONS_FROM_V1_1 +ALL_VERSIONS = [V0_9] + VERSIONS_FROM_V1_0 MINIMUM_VERSION = V0_9 -MAXIMUM_VERSION = V1_2 +MAXIMUM_VERSION = V1_3 diff --git a/tests/service/views/test_dataset_views.py b/tests/service/views/test_dataset_views.py index 769654da70..fdcadabefe 100644 --- a/tests/service/views/test_dataset_views.py +++ b/tests/service/views/test_dataset_views.py @@ -86,6 +86,31 @@ def test_create_dataset_view(svc_client_with_repo): assert payload["name"] == response.json["result"]["name"] +@pytest.mark.service +@pytest.mark.integration +@retry_failed +def test_create_dataset_view_with_datadir(svc_client_with_repo): + """Create a new dataset successfully.""" + svc_client, headers, project_id, _ = svc_client_with_repo + + payload = {"project_id": project_id, "name": uuid.uuid4().hex, "data_directory": "my-folder/"} + + response = svc_client.post("/datasets.create", data=json.dumps(payload), headers=headers) + assert_rpc_response(response) + + assert {"name", "remote_branch"} == set(response.json["result"].keys()) + assert payload["name"] == response.json["result"]["name"] + + params = { + "project_id": project_id, + } + response = svc_client.get("/datasets.list", query_string=params, headers=headers) + + assert_rpc_response(response) + ds = next(ds for ds in response.json["result"]["datasets"] if ds["name"] == payload["name"]) + assert ds["data_directory"] == "my-folder" + + @pytest.mark.service @pytest.mark.integration @retry_failed @@ -606,7 +631,7 @@ def test_list_datasets_view(svc_client_with_repo): "keywords", "annotations", "storage", - "datadir", + "data_directory", } == set(response.json["result"]["datasets"][0].keys()) @@ -669,7 +694,7 @@ def test_list_datasets_view_remote(svc_client_with_repo, it_remote_repo_url): "keywords", "annotations", "storage", - "datadir", + "data_directory", } == set(response.json["result"]["datasets"][0].keys()) @@ -787,7 +812,7 @@ def test_create_and_list_datasets_view(svc_client_with_repo): "keywords", "annotations", "storage", - "datadir", + "data_directory", } == set(response.json["result"]["datasets"][0].keys()) assert payload["name"] in [ds["name"] for ds in response.json["result"]["datasets"]] diff --git a/tests/service/views/test_templates_views.py b/tests/service/views/test_templates_views.py index 8a156c1738..5ddaf1fc77 100644 --- a/tests/service/views/test_templates_views.py +++ b/tests/service/views/test_templates_views.py @@ -130,6 +130,8 @@ def test_create_project_from_template(svc_client_templates_creation, client_data svc_client, headers, payload, rm_remote = svc_client_templates_creation + payload["data_directory"] = "my-folder/" + response = svc_client.post("/templates.create_project", data=json.dumps(payload), headers=headers) assert response @@ -158,6 +160,7 @@ def test_create_project_from_template(svc_client_templates_creation, client_data expected_id = f"/projects/{payload['project_namespace']}/{stripped_name}" assert expected_id == project.id + assert client.data_dir == "my-folder/" # NOTE: Assert backwards compatibility metadata.yml was created old_metadata_path = project_path / ".renku/metadata.yml"