Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(service): accept commit SHA in read endpoints #3608

Merged
merged 8 commits into from Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 12 additions & 10 deletions renku/core/util/git.py
Expand Up @@ -29,6 +29,8 @@
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
from uuid import uuid4

import git

from renku.core import errors
from renku.infrastructure.repository import DiffChangeType

Expand Down Expand Up @@ -712,7 +714,7 @@ def clone_repository(
skip_smudge=True,
recursive=False,
depth=None,
progress=None,
progress: Optional[git.RemoteProgress] = None,
config: Optional[dict] = None,
raise_git_except=False,
checkout_revision=None,
Expand Down Expand Up @@ -746,10 +748,8 @@ def clone_repository(

path = Path(path) if path else Path(get_repository_name(url))

def handle_git_exception():
"""Handle git exceptions."""
if raise_git_except:
return
def error_from_progress(progress: Optional[git.RemoteProgress], url: str) -> errors.GitError:
"""Format a Git command error into a more user-friendly format."""

message = f"Cannot clone repo from {url}"

Expand All @@ -758,7 +758,7 @@ def handle_git_exception():
error = "".join([f"\n\t{line}" for line in lines if line.strip()])
message += f" - error message:\n {error}"

raise errors.GitError(message)
return errors.GitError(message)

def clean_directory(clean: bool):
if not clean or not path:
Expand Down Expand Up @@ -825,8 +825,9 @@ def clone(branch, depth):
repository = clone(branch=checkout_revision, depth=depth)
except errors.GitCommandError:
if not checkout_revision:
handle_git_exception()
raise
if raise_git_except:
raise
raise error_from_progress(progress, url)

# NOTE: Delete the partially-cloned repository
clean_directory(clean=True)
Expand All @@ -835,8 +836,9 @@ def clone(branch, depth):
try:
repository = clone(branch=None, depth=None)
except errors.GitCommandError:
handle_git_exception()
raise
if raise_git_except:
raise
raise error_from_progress(progress, url)

if checkout_revision is not None and not no_checkout:
try:
Expand Down
13 changes: 8 additions & 5 deletions renku/infrastructure/repository.py
Expand Up @@ -30,7 +30,6 @@
from pathlib import Path
from typing import (
Any,
Callable,
Dict,
Generator,
List,
Expand Down Expand Up @@ -1001,20 +1000,24 @@ def clone_from(
branch: Optional[str] = None,
recursive: bool = False,
depth: Optional[int] = None,
progress: Optional[Callable] = None,
progress: Optional[git.RemoteProgress] = None,
no_checkout: bool = False,
env: Optional[dict] = None,
clone_options: Optional[List[str]] = None,
) -> "Repository":
"""Clone a remote repository and create an instance."""
"""Clone a remote repository and create an instance.

Since this is just a thin wrapper around GitPython note that the branch parameter
can work and accept either a branch name or a tag. But it will not work with a commit SHA.
"""
try:
repository = git.Repo.clone_from(
url=url,
to_path=path,
branch=branch,
branch=branch, # NOTE: Git python will accept tag or branch here but not SHA
recursive=recursive,
depth=depth,
progress=progress,
progress=progress, # type: ignore[arg-type]
no_checkout=no_checkout,
env=env,
multi_options=clone_options,
Expand Down
15 changes: 11 additions & 4 deletions renku/ui/service/cache/models/project.py
Expand Up @@ -26,11 +26,11 @@

from renku.ui.service.cache.base import BaseCache
from renku.ui.service.config import CACHE_PROJECTS_PATH
from renku.ui.service.utils import normalize_git_url

MAX_CONCURRENT_PROJECT_REQUESTS = 10
LOCK_TIMEOUT = 15
NO_BRANCH_FOLDER = "__default_branch__"
DETACHED_HEAD_FOLDER_PREFIX = "__detached_head_"


class Project(Model):
Expand All @@ -55,14 +55,21 @@ class Project(Model):
description = TextField()
owner = TextField()
initialized = BooleanField()
commit_sha = TextField()
branch = TextField()

@property
def abs_path(self) -> Path:
"""Full path of cached project."""
branch = self.branch
folder_name = self.branch
if not self.branch:
branch = NO_BRANCH_FOLDER
return CACHE_PROJECTS_PATH / self.user_id / self.owner / normalize_git_url(self.slug) / branch
if self.commit_sha:
# NOTE: Detached head state
folder_name = f"{DETACHED_HEAD_FOLDER_PREFIX}{self.commit_sha}"
else:
# NOTE: We are on the default branch (i.e. main)
folder_name = NO_BRANCH_FOLDER
return CACHE_PROJECTS_PATH / self.user_id / self.owner / self.slug / folder_name

def read_lock(self, timeout: Optional[float] = None):
"""Shared read lock on the project."""
Expand Down
13 changes: 11 additions & 2 deletions renku/ui/service/cache/serializers/project.py
Expand Up @@ -17,7 +17,7 @@
import uuid
from datetime import datetime

from marshmallow import fields, post_load
from marshmallow import ValidationError, fields, post_load, validates_schema

from renku.ui.service.cache.models.project import Project
from renku.ui.service.serializers.common import AccessSchema, CreationSchema, MandatoryUserSchema
Expand All @@ -39,11 +39,20 @@ class ProjectSchema(CreationSchema, AccessSchema, MandatoryUserSchema):
description = fields.String(load_default=None)
owner = fields.String(required=True)
initialized = fields.Boolean(dump_default=False)
commit_sha = fields.String(required=False, load_default=None, dump_default=None)
branch = fields.String(required=False, load_default=None, dump_default=None)

@post_load
def make_project(self, data, **options):
"""Construct project object."""
data["git_url"] = normalize_git_url(data["git_url"])
if data.get("git_url"):
data["git_url"] = normalize_git_url(data["git_url"])
data["name"] = normalize_git_url(data["name"])
data["slug"] = normalize_git_url(data["slug"])
return Project(**data)

@validates_schema
def ensure_only_commit_sha_or_branch(self, data, **kwargs):
"""Checks that only a commit SHA or branch is set and not both."""
if data.get("commit_sha") and data.get("branch"):
raise ValidationError("You cannot specify a commit SHA and a branch, only one or the other")
1 change: 1 addition & 0 deletions renku/ui/service/controllers/api/mixins.py
Expand Up @@ -188,6 +188,7 @@ def local(self):
self.request_data.get("branch"),
self.user,
self.clone_depth is not None,
self.request_data.get("commit_sha"),
)

self.context["project_id"] = project.project_id
Expand Down
31 changes: 26 additions & 5 deletions renku/ui/service/gateways/repository_cache.py
Expand Up @@ -45,7 +45,13 @@ class LocalRepositoryCache(IRepositoryCache):
"""Cache for project repos stored on local disk."""

def get(
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
self,
cache: ServiceCache,
git_url: str,
branch: Optional[str],
user: User,
shallow: bool = True,
commit_sha: Optional[str] = None,
) -> Project:
"""Get a project from cache (clone if necessary)."""
if git_url is None:
Expand All @@ -58,12 +64,12 @@ def get(
)
except ValueError:
# project not found in DB
return self._clone_project(cache, git_url, branch, user, shallow)
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)

if not project.abs_path.exists():
# cache folder doesn't exist anymore
project.delete()
return self._clone_project(cache, git_url, branch, user, shallow)
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)

if not shallow and project.is_shallow:
self._unshallow_project(project, user)
Expand Down Expand Up @@ -100,7 +106,13 @@ def _update_project_access_date(self, project: Project):
project.save()

def _clone_project(
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
self,
cache: ServiceCache,
git_url: str,
branch: Optional[str],
user: User,
shallow: bool = True,
commit_sha: Optional[str] = None,
) -> Project:
"""Clone a project to cache."""
git_url = normalize_git_url(git_url)
Expand All @@ -124,6 +136,7 @@ def _clone_project(
"branch": branch,
"git_url": git_url,
"user_id": user.user_id,
"commit_sha": commit_sha,
}
project = cache.make_project(user, project_data, persist=False)

Expand All @@ -139,6 +152,7 @@ def _clone_project(
(Project.user_id == user.user_id)
& (Project.git_url == git_url)
& (Project.branch == branch)
& (Project.commit_sha == commit_sha)
& (Project.project_id != project.project_id)
)
except ValueError:
Expand Down Expand Up @@ -170,7 +184,7 @@ def _clone_project(
"user.email": user.email,
"pull.rebase": False,
},
checkout_revision=project.branch,
checkout_revision=commit_sha or project.branch,
)
).output
project.save()
Expand All @@ -186,6 +200,9 @@ def _clone_project(

def _unshallow_project(self, project: Project, user: User):
"""Turn a shallow clone into a full clone."""
if project.commit_sha is not None:
# NOTE: A project in a detached head state at a specific commit SHA does not make sense to be unshallowed
return
try:
with project.write_lock(), Repository(project.abs_path) as repository:
try:
Expand All @@ -208,6 +225,10 @@ def _maybe_update_cache(self, project: Project, user: User):
if project.fetch_age < PROJECT_FETCH_TIME:
return

if project.commit_sha is not None:
# NOTE: A project in a detached head state at a specific commit SHA cannot be updated
return

try:
with project.write_lock(), Repository(project.abs_path) as repository:
try:
Expand Down
6 changes: 6 additions & 0 deletions renku/ui/service/serializers/common.py
Expand Up @@ -67,6 +67,12 @@ def set_branch_from_ref(self, data, **_):
return data


class GitCommitSHA:
"""Schema for a commit SHA."""

commit_sha = fields.String(load_default=None, metadata={"description": "Git commit SHA."})


class AsyncSchema(Schema):
"""Schema for adding a commit at the end of the operation."""

Expand Down
10 changes: 8 additions & 2 deletions renku/ui/service/serializers/config.py
Expand Up @@ -17,11 +17,17 @@

from marshmallow import Schema, fields

from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema, RenkuSyncSchema
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
MigrateSchema,
RemoteRepositorySchema,
RenkuSyncSchema,
)
from renku.ui.service.serializers.rpc import JsonRPCResponse


class ConfigShowRequest(RemoteRepositorySchema):
class ConfigShowRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for config show."""


Expand Down
5 changes: 3 additions & 2 deletions renku/ui/service/serializers/datasets.py
Expand Up @@ -22,6 +22,7 @@
from renku.domain_model.dataset import ImageObjectRequestJson
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
JobDetailsResponse,
MigrateSchema,
RemoteRepositorySchema,
Expand Down Expand Up @@ -120,7 +121,7 @@ class DatasetAddResponseRPC(JsonRPCResponse):
result = fields.Nested(DatasetAddResponse)


class DatasetListRequest(RemoteRepositorySchema):
class DatasetListRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for dataset list view."""


Expand All @@ -142,7 +143,7 @@ class DatasetListResponseRPC(JsonRPCResponse):
result = fields.Nested(DatasetListResponse)


class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema):
class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema, GitCommitSHA):
"""Request schema for dataset files list view."""


Expand Down
4 changes: 2 additions & 2 deletions renku/ui/service/serializers/graph.py
Expand Up @@ -16,11 +16,11 @@
"""Renku graph serializers."""
from marshmallow import Schema, fields, validate

from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema
from renku.ui.service.serializers.common import AsyncSchema, GitCommitSHA, MigrateSchema, RemoteRepositorySchema
from renku.ui.service.serializers.rpc import JsonRPCResponse


class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema):
class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema, GitCommitSHA):
"""Request schema for dataset list view."""

callback_url = fields.URL()
Expand Down
3 changes: 2 additions & 1 deletion renku/ui/service/serializers/project.py
Expand Up @@ -21,6 +21,7 @@
from renku.domain_model.dataset import ImageObjectRequestJson
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
MigrateSchema,
RemoteRepositoryBaseSchema,
RemoteRepositorySchema,
Expand All @@ -29,7 +30,7 @@
from renku.ui.service.serializers.rpc import JsonRPCResponse


class ProjectShowRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema):
class ProjectShowRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema, GitCommitSHA):
"""Project show metadata request."""


Expand Down
8 changes: 4 additions & 4 deletions renku/ui/service/serializers/workflows.py
Expand Up @@ -22,11 +22,11 @@
from renku.domain_model.dataset import DatasetCreatorsJson
from renku.infrastructure.persistent import Persistent
from renku.ui.cli.utils.plugins import get_supported_formats
from renku.ui.service.serializers.common import RemoteRepositorySchema
from renku.ui.service.serializers.common import GitCommitSHA, RemoteRepositorySchema
from renku.ui.service.serializers.rpc import JsonRPCResponse


class WorkflowPlansListRequest(RemoteRepositorySchema):
class WorkflowPlansListRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for plan list view."""


Expand Down Expand Up @@ -85,7 +85,7 @@ class WorkflowPlansListResponseRPC(JsonRPCResponse):
result = fields.Nested(WorkflowPlansListResponse)


class WorkflowPlansShowRequest(RemoteRepositorySchema):
class WorkflowPlansShowRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for plan show view."""

plan_id = fields.String(required=True)
Expand Down Expand Up @@ -222,7 +222,7 @@ class WorkflowPlansShowResponseRPC(JsonRPCResponse):
)


class WorkflowPlansExportRequest(RemoteRepositorySchema):
class WorkflowPlansExportRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for exporting a plan."""

plan_id = fields.String(required=True)
Expand Down