diff --git a/poetry.lock b/poetry.lock index 4265b34707..25b75575e3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1580,6 +1580,22 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "python-gitlab" +version = "2.10.1" +description = "Interact with GitLab API" +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.dependencies] +requests = ">=2.25.0" +requests-toolbelt = ">=0.9.1" + +[package.extras] +autocompletion = ["argcomplete (>=1.10.0,<2)"] +yaml = ["PyYaml (>=5.2)"] + [[package]] name = "pytz" version = "2021.3" @@ -1688,6 +1704,17 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +[[package]] +name = "requests-toolbelt" +version = "0.9.1" +description = "A utility belt for advanced users of python-requests" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "responses" version = "0.14.0" @@ -3392,6 +3419,10 @@ python-editor = [ {file = "python_editor-1.0.4-py3-none-any.whl", hash = "sha256:1bf6e860a8ad52a14c3ee1252d5dc25b2030618ed80c022598f00176adc8367d"}, {file = "python_editor-1.0.4-py3.5.egg", hash = "sha256:c3da2053dbab6b29c94e43c486ff67206eafbe7eb52dbec7390b5e2fb05aac77"}, ] +python-gitlab = [ + {file = "python-gitlab-2.10.1.tar.gz", hash = "sha256:7afa7d7c062fa62c173190452265a30feefb844428efc58ea5244f3b9fc0d40f"}, + {file = "python_gitlab-2.10.1-py3-none-any.whl", hash = "sha256:581a219759515513ea9399e936ed7137437cfb681f52d2641626685c492c999d"}, +] pytz = [ {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, @@ -3557,6 +3588,10 @@ requests = [ {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, ] +requests-toolbelt = [ + {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"}, + {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"}, +] responses = [ {file = "responses-0.14.0-py2.py3-none-any.whl", hash = "sha256:57bab4e9d4d65f31ea5caf9de62095032c4d81f591a8fac2f5858f7777b8567b"}, {file = "responses-0.14.0.tar.gz", hash = "sha256:93f774a762ee0e27c0d9d7e06227aeda9ff9f5f69392f72bb6c6b73f8763563e"}, diff --git a/pyproject.toml b/pyproject.toml index 3fd728310d..863410168b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,6 +120,7 @@ pytest-xdist = { version = "<2.5.0,>=1.34.0", optional = true } python-dateutil = "<2.8.3,>=2.6.1" python-dotenv = { version = "==0.19.0", optional = true } python-editor = "==1.0.4" +python-gitlab = ">=2.10.1,<2.11" pyyaml = "<6.1.0,>=5.4" rdflib = "<7.0,>=6.0.0" redis = { version = "==3.5.3", optional = true } diff --git a/renku/core/management/migrate.py b/renku/core/management/migrate.py index 32b496fe81..8a88c55f40 100644 --- a/renku/core/management/migrate.py +++ b/renku/core/management/migrate.py @@ -58,7 +58,6 @@ read_project_version, ) from renku.core.utils import communication -from renku.core.utils.git import is_valid_git_repository SUPPORTED_PROJECT_VERSION = 9 @@ -348,9 +347,6 @@ def is_renku_project(client_dispatcher: IClientDispatcher) -> bool: """Check if repository is a renku project.""" client = client_dispatcher.current_client - if not is_valid_git_repository(client.repository): - return False - try: return client.project is not None except ValueError: # NOTE: Error in loading due to an older schema diff --git a/renku/core/models/git.py b/renku/core/models/git.py index 3645cf1afa..c38ab421e5 100644 --- a/renku/core/models/git.py +++ b/renku/core/models/git.py @@ -140,6 +140,16 @@ def parse(cls, href): else: raise errors.GitConfigurationError(f"`{href}` is not a valid Git remote") + @property + def instance_url(self): + """Get the url of the git instance.""" + url = urlparse(self.href) + + path = self.pathname.split(self.owner, 1)[0] + url = url._replace(path=path) + + return url.geturl() + @property def image(self): """Return image name.""" diff --git a/renku/service/controllers/cache_migrations_check.py b/renku/service/controllers/cache_migrations_check.py index 1f06bfd47c..6cd10c96a4 100644 --- a/renku/service/controllers/cache_migrations_check.py +++ b/renku/service/controllers/cache_migrations_check.py @@ -17,9 +17,16 @@ # limitations under the License. """Renku service migrations check controller.""" +import tempfile +from pathlib import Path + from renku.core.commands.migrate import migrations_check +from renku.core.errors import RenkuException +from renku.core.utils.contexts import click_context from renku.service.controllers.api.abstract import ServiceCtrl from renku.service.controllers.api.mixins import RenkuOperationMixin +from renku.service.controllers.utils.remote_project import ANONYMOUS_SESSION +from renku.service.interfaces.git_api_provider import IGitAPIProvider from renku.service.serializers.cache import ProjectMigrationCheckRequest, ProjectMigrationCheckResponseRPC from renku.service.views import result_response @@ -30,9 +37,10 @@ class MigrationsCheckCtrl(ServiceCtrl, RenkuOperationMixin): REQUEST_SERIALIZER = ProjectMigrationCheckRequest() RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC() - def __init__(self, cache, user_data, request_data): + def __init__(self, cache, user_data, request_data, git_api_provider: IGitAPIProvider): """Construct migration check controller.""" self.ctx = MigrationsCheckCtrl.REQUEST_SERIALIZER.load(request_data) + self.git_api_provider = git_api_provider super(MigrationsCheckCtrl, self).__init__(cache, user_data, request_data) @property @@ -40,10 +48,33 @@ def context(self): """Controller operation context.""" return self.ctx + def _fast_op_without_cache(self): + """Execute renku_op with only necessary files, without cloning the whole repo.""" + if "git_url" not in self.context: + raise RenkuException("context does not contain `project_id` or `git_url`") + + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + + self.git_api_provider.download_files_from_api( + [".renku/metadata/root", ".renku/metadata/project", ".renku/metadata.yml", "Dockerfile"], + tempdir, + remote=self.ctx["git_url"], + ref=self.request_data.get("ref", None), + token=self.user_data.get("token", ANONYMOUS_SESSION), + ) + with click_context(tempdir, "renku_op"): + return self.renku_op() + def renku_op(self): """Renku operation for the controller.""" return migrations_check().build().execute().output def to_response(self): """Execute controller flow and serialize to service response.""" - return result_response(self.RESPONSE_SERIALIZER, self.execute_op()) + + if "project_id" in self.context: + # use regular flow using cache + return result_response(self.RESPONSE_SERIALIZER, self.execute_op()) + + return result_response(self.RESPONSE_SERIALIZER, self._fast_op_without_cache()) diff --git a/renku/service/gateways/__init__.py b/renku/service/gateways/__init__.py new file mode 100644 index 0000000000..09f88be332 --- /dev/null +++ b/renku/service/gateways/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service adapters/gateways.""" diff --git a/renku/service/gateways/gitlab_api_provider.py b/renku/service/gateways/gitlab_api_provider.py new file mode 100644 index 0000000000..f0b1ab7282 --- /dev/null +++ b/renku/service/gateways/gitlab_api_provider.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Git APi provider interface.""" + +from pathlib import Path +from typing import List, Optional, Union + +import gitlab + +from renku.core.models.git import GitURL +from renku.core.utils.os import delete_file +from renku.service.interfaces.git_api_provider import IGitAPIProvider + + +class GitlabAPIProvider(IGitAPIProvider): + """Interface a Git API Provider.""" + + def download_files_from_api( + self, + paths: List[Union[Path, str]], + target_folder: Union[Path, str], + remote: str, + token: str, + ref: Optional[str] = None, + ): + """Download files through a remote Git API.""" + if not ref: + ref = "HEAD" + + target_folder = Path(target_folder) + + git_data = GitURL.parse(remote) + gl = gitlab.Gitlab(git_data.instance_url, private_token=token) + project = gl.projects.get(f"{git_data.owner}/{git_data.name}") + + result_paths = [] + + for path in paths: + full_path = target_folder / path + + full_path.parent.mkdir(parents=True, exist_ok=True) + + try: + with open(full_path, "wb") as f: + project.files.raw(file_path=path, ref=ref, streamed=True, action=f.write) + + result_paths.append(full_path) + except gitlab.GitlabGetError: + delete_file(full_path) + continue diff --git a/renku/service/interfaces/__init__.py b/renku/service/interfaces/__init__.py new file mode 100644 index 0000000000..b1c9eb0075 --- /dev/null +++ b/renku/service/interfaces/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service interfaces for IoC adapters/gateways.""" diff --git a/renku/service/interfaces/git_api_provider.py b/renku/service/interfaces/git_api_provider.py new file mode 100644 index 0000000000..510ba9f3cc --- /dev/null +++ b/renku/service/interfaces/git_api_provider.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Git APi provider interface.""" + +from abc import ABC +from pathlib import Path +from typing import List, Optional, Union + + +class IGitAPIProvider(ABC): + """Interface a Git API Provider.""" + + def download_files_from_api( + self, + paths: List[Union[Path, str]], + target_folder: Union[Path, str], + remote: str, + token: str, + ref: Optional[str] = None, + ): + """Download files through a remote Git API.""" + raise NotImplementedError() diff --git a/renku/service/views/cache.py b/renku/service/views/cache.py index 6694aeb69c..97360a2ace 100644 --- a/renku/service/views/cache.py +++ b/renku/service/views/cache.py @@ -25,6 +25,7 @@ from renku.service.controllers.cache_migrate_project import MigrateProjectCtrl from renku.service.controllers.cache_migrations_check import MigrationsCheckCtrl from renku.service.controllers.cache_project_clone import ProjectCloneCtrl +from renku.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.service.views.api_versions import V0_9, V1_0, VersionedBlueprint from renku.service.views.decorators import ( accepts_json, @@ -201,7 +202,7 @@ def migration_check_project_view(user_data, cache): tags: - cache """ - return MigrationsCheckCtrl(cache, user_data, dict(request.args)).to_response() + return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()).to_response() cache_blueprint = add_v0_9_specific_endpoints(cache_blueprint) diff --git a/renku/service/views/v0_9/cache.py b/renku/service/views/v0_9/cache.py index 39676d132e..aafe77ea5a 100644 --- a/renku/service/views/v0_9/cache.py +++ b/renku/service/views/v0_9/cache.py @@ -19,6 +19,7 @@ from flask import request from renku.service.controllers.cache_migrations_check import MigrationsCheckCtrl +from renku.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.service.serializers.v0_9.cache import ProjectMigrationCheckResponseRPC_0_9 from renku.service.views.api_versions import V0_9 from renku.service.views.decorators import handle_common_except, requires_cache, requires_identity @@ -46,7 +47,7 @@ def migration_check_project_view_0_9(user_data, cache): tags: - cache """ - ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args)) + ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()) ctrl.RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC_0_9() return ctrl.to_response()