diff --git a/renku/core/util/contexts.py b/renku/core/util/contexts.py index aea78db8b5..3288349ded 100644 --- a/renku/core/util/contexts.py +++ b/renku/core/util/contexts.py @@ -108,12 +108,13 @@ def measure(message="TOTAL"): @contextlib.contextmanager -def renku_project_context(path): +def renku_project_context(path, check_git_path=True): """Provide a project context with repo path injected.""" from renku.core.util.git import get_git_path from renku.domain_model.project_context import project_context - path = get_git_path(path) + if check_git_path: + path = get_git_path(path) with project_context.with_path(path=path), chdir(path): project_context.external_storage_requested = True diff --git a/renku/ui/service/cache/models/project.py b/renku/ui/service/cache/models/project.py index 4555678442..7bf5682d66 100644 --- a/renku/ui/service/cache/models/project.py +++ b/renku/ui/service/cache/models/project.py @@ -58,7 +58,7 @@ class Project(Model): @property def abs_path(self): """Full path of cached project.""" - return CACHE_PROJECTS_PATH / self.user_id / self.project_id / self.owner / self.slug + return CACHE_PROJECTS_PATH / self.user_id / self.owner / self.slug def read_lock(self, timeout: Optional[float] = None): """Shared read lock on the project.""" diff --git a/renku/ui/service/cache/projects.py b/renku/ui/service/cache/projects.py index 4e874663cb..9d67fb4fa3 100644 --- a/renku/ui/service/cache/projects.py +++ b/renku/ui/service/cache/projects.py @@ -30,12 +30,14 @@ class ProjectManagementCache(BaseCache): project_schema = ProjectSchema() - def make_project(self, user, project_data): + def make_project(self, user, project_data, persist=True): """Store user project metadata.""" project_data.update({"user_id": user.user_id}) project_obj = self.project_schema.load(project_data, unknown=EXCLUDE) - project_obj.save() + + if persist: + project_obj.save() return project_obj diff --git a/renku/ui/service/controllers/utils/project_clone.py b/renku/ui/service/controllers/utils/project_clone.py index 5852497c7c..8b4ae3a4f1 100644 --- a/renku/ui/service/controllers/utils/project_clone.py +++ b/renku/ui/service/controllers/utils/project_clone.py @@ -17,8 +17,8 @@ # limitations under the License. """Utilities for renku service controllers.""" from renku.command.clone import project_clone_command -from renku.core.errors import GitCommandError from renku.core.util.contexts import renku_project_context +from renku.ui.service.cache.models.project import Project from renku.ui.service.logger import service_log from renku.ui.service.views.decorators import requires_cache @@ -30,31 +30,42 @@ def user_project_clone(cache, user_data, project_data): project_data.pop("project_id") user = cache.ensure_user(user_data) - project = cache.make_project(user, project_data) + project = cache.make_project(user, project_data, persist=False) project.abs_path.mkdir(parents=True, exist_ok=True) - try: - with project.write_lock(), renku_project_context(project.abs_path): - repo, project.initialized = ( - project_clone_command() - .build() - .execute( - project_data["url_with_auth"], - path=project.abs_path, - depth=project_data["depth"], - raise_git_except=True, - config={ - "user.name": project_data["fullname"], - "user.email": project_data["email"], - "pull.rebase": False, - }, - checkout_revision=project_data["ref"], + with project.write_lock(), renku_project_context(project.abs_path): + # NOTE: If two requests ran at the same time, by the time we acquire the lock a project might already + # be cloned by an earlier request. + if "git_url" in project_data and project_data["git_url"] is not None: + try: + found_project = Project.get( + (Project.user_id == user_data["user_id"]) + & (Project.git_url == project_data["git_url"]) + & (Project.project_id != project.project_id) ) - ).output - project.save() - except GitCommandError as e: - cache.invalidate_project(user, project.project_id) - raise e + + service_log.debug(f"project already cloned, skipping clone: {project_data['git_url']}") + service_log.debug(f"project folder exists: {found_project.exists()}") + return found_project + except ValueError: + pass + repo, project.initialized = ( + project_clone_command() + .build() + .execute( + project_data["url_with_auth"], + path=project.abs_path, + depth=project_data["depth"], + raise_git_except=True, + config={ + "user.name": project_data["fullname"], + "user.email": project_data["email"], + "pull.rebase": False, + }, + checkout_revision=project_data["ref"], + ) + ).output + project.save() service_log.debug(f"project successfully cloned: {repo}") service_log.debug(f"project folder exists: {project.exists()}") diff --git a/renku/ui/service/errors.py b/renku/ui/service/errors.py index a1dd557d7f..7f80a3be7d 100644 --- a/renku/ui/service/errors.py +++ b/renku/ui/service/errors.py @@ -632,7 +632,7 @@ class IntermittentProjectIdError(ServiceError): """ code = SVC_ERROR_INTERMITTENT + 1 - userMessage = "An unexpcted error occurred. This may be a temporary problem. Please try again in a few minutes." + userMessage = "An unexpected error occurred. This may be a temporary problem. Please try again in a few minutes." devMessage = ( "Project id cannot be found. It may be a temporary problem. Check the Sentry exception for further details." ) diff --git a/renku/ui/service/utils/__init__.py b/renku/ui/service/utils/__init__.py index 0126a46dbb..2dab2cf54a 100644 --- a/renku/ui/service/utils/__init__.py +++ b/renku/ui/service/utils/__init__.py @@ -27,7 +27,7 @@ def make_project_path(user, project): valid_project = project and "owner" in project and "name" in project and "project_id" in project if valid_user and valid_project: - return CACHE_PROJECTS_PATH / user["user_id"] / project["project_id"] / project["owner"] / project["slug"] + return CACHE_PROJECTS_PATH / user["user_id"] / project["owner"] / project["slug"] def make_file_path(user, cached_file): diff --git a/renku/ui/service/views/error_handlers.py b/renku/ui/service/views/error_handlers.py index c39f3c057f..a7118e8afd 100644 --- a/renku/ui/service/views/error_handlers.py +++ b/renku/ui/service/views/error_handlers.py @@ -197,7 +197,7 @@ def decorated_function(*args, **kwargs): except GitError as e: return error_response(ProgramGitError(e, e.message if e.message else None)) except (Exception, BaseException, OSError, IOError) as e: - if hasattr(e, "stderr"): + if hasattr(e, "stderr") and e.stderr: error_message = " ".join(e.stderr.strip().split("\n")) else: error_message = str(e) diff --git a/tests/service/controllers/utils/test_project_clone.py b/tests/service/controllers/utils/test_project_clone.py index 65dbf91182..9c4bfa5d84 100644 --- a/tests/service/controllers/utils/test_project_clone.py +++ b/tests/service/controllers/utils/test_project_clone.py @@ -70,7 +70,7 @@ def test_service_user_project_clone(svc_client_cache): assert project_two.exists() new_path = project_two.abs_path - assert old_path != new_path + assert old_path == new_path user = cache.get_user(user_data["user_id"]) projects = [project.project_id for project in cache.get_projects(user)] assert project_one.project_id in projects diff --git a/tests/service/views/test_cache_views.py b/tests/service/views/test_cache_views.py index 1343719922..93ab72c58e 100644 --- a/tests/service/views/test_cache_views.py +++ b/tests/service/views/test_cache_views.py @@ -514,9 +514,10 @@ def test_clone_projects_multiple(svc_client, identity_headers, it_remote_repo_ur pids = [p["project_id"] for p in response.json["result"]["projects"]] assert last_pid in pids + assert 1 == len(pids) for inserted in project_ids: - assert inserted["project_id"] not in pids + assert inserted["project_id"] == last_pid @pytest.mark.service diff --git a/tests/service/views/test_dataset_views.py b/tests/service/views/test_dataset_views.py index c709120298..b03d8d0b57 100644 --- a/tests/service/views/test_dataset_views.py +++ b/tests/service/views/test_dataset_views.py @@ -160,7 +160,7 @@ def test_create_dataset_wrong_ref_view(svc_client_with_repo): response = svc_client.post("/datasets.create", data=json.dumps(payload), headers=headers) assert_rpc_response(response, "error") - assert IntermittentProjectIdError.code == response.json["error"]["code"] + assert IntermittentProjectIdError.code == response.json["error"]["code"], response.json @pytest.mark.service diff --git a/tests/service/views/test_templates_views.py b/tests/service/views/test_templates_views.py index b98f3528ed..569c91487d 100644 --- a/tests/service/views/test_templates_views.py +++ b/tests/service/views/test_templates_views.py @@ -143,13 +143,7 @@ def test_create_project_from_template(svc_client_templates_creation, with_inject # NOTE: assert correct git user is set on new project user_data = RenkuHeaders.decode_user(headers["Renku-User"]) - project_path = ( - CACHE_PROJECTS_PATH - / user_data["user_id"] - / response.json["result"]["project_id"] - / payload["project_namespace"] - / stripped_name - ) + project_path = CACHE_PROJECTS_PATH / user_data["user_id"] / payload["project_namespace"] / stripped_name reader = Repository(project_path).get_configuration() assert reader.get_value("user", "email") == user_data["email"] assert reader.get_value("user", "name") == user_data["name"]