Skip to content

Commit

Permalink
feat(service): improve migrations_check performance (#2443)
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius committed Dec 1, 2021
1 parent 9b9e6a1 commit 28dde77
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 8 deletions.
35 changes: 35 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -120,6 +120,7 @@ pytest-xdist = { version = "<2.5.0,>=1.34.0", optional = true }
python-dateutil = "<2.8.3,>=2.6.1"
python-dotenv = { version = "==0.19.0", optional = true }
python-editor = "==1.0.4"
python-gitlab = ">=2.10.1,<2.11"
pyyaml = "<6.1.0,>=5.4"
rdflib = "<7.0,>=6.0.0"
redis = { version = "==3.5.3", optional = true }
Expand Down
4 changes: 0 additions & 4 deletions renku/core/management/migrate.py
Expand Up @@ -58,7 +58,6 @@
read_project_version,
)
from renku.core.utils import communication
from renku.core.utils.git import is_valid_git_repository

SUPPORTED_PROJECT_VERSION = 9

Expand Down Expand Up @@ -348,9 +347,6 @@ def is_renku_project(client_dispatcher: IClientDispatcher) -> bool:
"""Check if repository is a renku project."""
client = client_dispatcher.current_client

if not is_valid_git_repository(client.repository):
return False

try:
return client.project is not None
except ValueError: # NOTE: Error in loading due to an older schema
Expand Down
10 changes: 10 additions & 0 deletions renku/core/models/git.py
Expand Up @@ -140,6 +140,16 @@ def parse(cls, href):
else:
raise errors.GitConfigurationError(f"`{href}` is not a valid Git remote")

@property
def instance_url(self):
"""Get the url of the git instance."""
url = urlparse(self.href)

path = self.pathname.split(self.owner, 1)[0]
url = url._replace(path=path)

return url.geturl()

@property
def image(self):
"""Return image name."""
Expand Down
35 changes: 33 additions & 2 deletions renku/service/controllers/cache_migrations_check.py
Expand Up @@ -17,9 +17,16 @@
# limitations under the License.
"""Renku service migrations check controller."""

import tempfile
from pathlib import Path

from renku.core.commands.migrate import migrations_check
from renku.core.errors import RenkuException
from renku.core.utils.contexts import click_context
from renku.service.controllers.api.abstract import ServiceCtrl
from renku.service.controllers.api.mixins import RenkuOperationMixin
from renku.service.controllers.utils.remote_project import ANONYMOUS_SESSION
from renku.service.interfaces.git_api_provider import IGitAPIProvider
from renku.service.serializers.cache import ProjectMigrationCheckRequest, ProjectMigrationCheckResponseRPC
from renku.service.views import result_response

Expand All @@ -30,20 +37,44 @@ class MigrationsCheckCtrl(ServiceCtrl, RenkuOperationMixin):
REQUEST_SERIALIZER = ProjectMigrationCheckRequest()
RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC()

def __init__(self, cache, user_data, request_data):
def __init__(self, cache, user_data, request_data, git_api_provider: IGitAPIProvider):
"""Construct migration check controller."""
self.ctx = MigrationsCheckCtrl.REQUEST_SERIALIZER.load(request_data)
self.git_api_provider = git_api_provider
super(MigrationsCheckCtrl, self).__init__(cache, user_data, request_data)

@property
def context(self):
"""Controller operation context."""
return self.ctx

def _fast_op_without_cache(self):
"""Execute renku_op with only necessary files, without cloning the whole repo."""
if "git_url" not in self.context:
raise RenkuException("context does not contain `project_id` or `git_url`")

with tempfile.TemporaryDirectory() as tempdir:
tempdir = Path(tempdir)

self.git_api_provider.download_files_from_api(
[".renku/metadata/root", ".renku/metadata/project", ".renku/metadata.yml", "Dockerfile"],
tempdir,
remote=self.ctx["git_url"],
ref=self.request_data.get("ref", None),
token=self.user_data.get("token", ANONYMOUS_SESSION),
)
with click_context(tempdir, "renku_op"):
return self.renku_op()

def renku_op(self):
"""Renku operation for the controller."""
return migrations_check().build().execute().output

def to_response(self):
"""Execute controller flow and serialize to service response."""
return result_response(self.RESPONSE_SERIALIZER, self.execute_op())

if "project_id" in self.context:
# use regular flow using cache
return result_response(self.RESPONSE_SERIALIZER, self.execute_op())

return result_response(self.RESPONSE_SERIALIZER, self._fast_op_without_cache())
18 changes: 18 additions & 0 deletions renku/service/gateways/__init__.py
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Renku service adapters/gateways."""
65 changes: 65 additions & 0 deletions renku/service/gateways/gitlab_api_provider.py
@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Git APi provider interface."""

from pathlib import Path
from typing import List, Optional, Union

import gitlab

from renku.core.models.git import GitURL
from renku.core.utils.os import delete_file
from renku.service.interfaces.git_api_provider import IGitAPIProvider


class GitlabAPIProvider(IGitAPIProvider):
"""Interface a Git API Provider."""

def download_files_from_api(
self,
paths: List[Union[Path, str]],
target_folder: Union[Path, str],
remote: str,
token: str,
ref: Optional[str] = None,
):
"""Download files through a remote Git API."""
if not ref:
ref = "HEAD"

target_folder = Path(target_folder)

git_data = GitURL.parse(remote)
gl = gitlab.Gitlab(git_data.instance_url, private_token=token)
project = gl.projects.get(f"{git_data.owner}/{git_data.name}")

result_paths = []

for path in paths:
full_path = target_folder / path

full_path.parent.mkdir(parents=True, exist_ok=True)

try:
with open(full_path, "wb") as f:
project.files.raw(file_path=path, ref=ref, streamed=True, action=f.write)

result_paths.append(full_path)
except gitlab.GitlabGetError:
delete_file(full_path)
continue
18 changes: 18 additions & 0 deletions renku/service/interfaces/__init__.py
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Renku service interfaces for IoC adapters/gateways."""
37 changes: 37 additions & 0 deletions renku/service/interfaces/git_api_provider.py
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Git APi provider interface."""

from abc import ABC
from pathlib import Path
from typing import List, Optional, Union


class IGitAPIProvider(ABC):
"""Interface a Git API Provider."""

def download_files_from_api(
self,
paths: List[Union[Path, str]],
target_folder: Union[Path, str],
remote: str,
token: str,
ref: Optional[str] = None,
):
"""Download files through a remote Git API."""
raise NotImplementedError()
3 changes: 2 additions & 1 deletion renku/service/views/cache.py
Expand Up @@ -25,6 +25,7 @@
from renku.service.controllers.cache_migrate_project import MigrateProjectCtrl
from renku.service.controllers.cache_migrations_check import MigrationsCheckCtrl
from renku.service.controllers.cache_project_clone import ProjectCloneCtrl
from renku.service.gateways.gitlab_api_provider import GitlabAPIProvider
from renku.service.views.api_versions import V0_9, V1_0, VersionedBlueprint
from renku.service.views.decorators import (
accepts_json,
Expand Down Expand Up @@ -201,7 +202,7 @@ def migration_check_project_view(user_data, cache):
tags:
- cache
"""
return MigrationsCheckCtrl(cache, user_data, dict(request.args)).to_response()
return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()).to_response()


cache_blueprint = add_v0_9_specific_endpoints(cache_blueprint)
3 changes: 2 additions & 1 deletion renku/service/views/v0_9/cache.py
Expand Up @@ -19,6 +19,7 @@
from flask import request

from renku.service.controllers.cache_migrations_check import MigrationsCheckCtrl
from renku.service.gateways.gitlab_api_provider import GitlabAPIProvider
from renku.service.serializers.v0_9.cache import ProjectMigrationCheckResponseRPC_0_9
from renku.service.views.api_versions import V0_9
from renku.service.views.decorators import handle_common_except, requires_cache, requires_identity
Expand Down Expand Up @@ -46,7 +47,7 @@ def migration_check_project_view_0_9(user_data, cache):
tags:
- cache
"""
ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args))
ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider())
ctrl.RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC_0_9()
return ctrl.to_response()

Expand Down

0 comments on commit 28dde77

Please sign in to comment.