Skip to content

Commit

Permalink
Fix multiple projects in cache issue
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius committed Oct 13, 2022
1 parent 7bb8aeb commit 0d15b36
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 41 deletions.
5 changes: 3 additions & 2 deletions renku/core/util/contexts.py
Expand Up @@ -108,12 +108,13 @@ def measure(message="TOTAL"):


@contextlib.contextmanager
def renku_project_context(path):
def renku_project_context(path, check_git_path=True):
"""Provide a project context with repo path injected."""
from renku.core.util.git import get_git_path
from renku.domain_model.project_context import project_context

path = get_git_path(path)
if check_git_path:
path = get_git_path(path)

with project_context.with_path(path=path), chdir(path):
project_context.external_storage_requested = True
Expand Down
2 changes: 1 addition & 1 deletion renku/ui/service/cache/models/project.py
Expand Up @@ -58,7 +58,7 @@ class Project(Model):
@property
def abs_path(self):
"""Full path of cached project."""
return CACHE_PROJECTS_PATH / self.user_id / self.project_id / self.owner / self.slug
return CACHE_PROJECTS_PATH / self.user_id / self.owner / self.slug

def read_lock(self, timeout: Optional[float] = None):
"""Shared read lock on the project."""
Expand Down
6 changes: 4 additions & 2 deletions renku/ui/service/cache/projects.py
Expand Up @@ -30,12 +30,14 @@ class ProjectManagementCache(BaseCache):

project_schema = ProjectSchema()

def make_project(self, user, project_data):
def make_project(self, user, project_data, persist=True):
"""Store user project metadata."""
project_data.update({"user_id": user.user_id})

project_obj = self.project_schema.load(project_data, unknown=EXCLUDE)
project_obj.save()

if persist:
project_obj.save()

return project_obj

Expand Down
57 changes: 34 additions & 23 deletions renku/ui/service/controllers/utils/project_clone.py
Expand Up @@ -17,8 +17,8 @@
# limitations under the License.
"""Utilities for renku service controllers."""
from renku.command.clone import project_clone_command
from renku.core.errors import GitCommandError
from renku.core.util.contexts import renku_project_context
from renku.ui.service.cache.models.project import Project
from renku.ui.service.logger import service_log
from renku.ui.service.views.decorators import requires_cache

Expand All @@ -30,31 +30,42 @@ def user_project_clone(cache, user_data, project_data):
project_data.pop("project_id")

user = cache.ensure_user(user_data)
project = cache.make_project(user, project_data)
project = cache.make_project(user, project_data, persist=False)
project.abs_path.mkdir(parents=True, exist_ok=True)

try:
with project.write_lock(), renku_project_context(project.abs_path):
repo, project.initialized = (
project_clone_command()
.build()
.execute(
project_data["url_with_auth"],
path=project.abs_path,
depth=project_data["depth"],
raise_git_except=True,
config={
"user.name": project_data["fullname"],
"user.email": project_data["email"],
"pull.rebase": False,
},
checkout_revision=project_data["ref"],
with project.write_lock(), renku_project_context(project.abs_path):
# NOTE: If two requests ran at the same time, by the time we acquire the lock a project might already
# be cloned by an earlier request.
if "git_url" in project_data and project_data["git_url"] is not None:
try:
found_project = Project.get(
(Project.user_id == user_data["user_id"])
& (Project.git_url == project_data["git_url"])
& (Project.project_id != project.project_id)
)
).output
project.save()
except GitCommandError as e:
cache.invalidate_project(user, project.project_id)
raise e

service_log.debug(f"project already cloned, skipping clone: {project_data['git_url']}")
service_log.debug(f"project folder exists: {found_project.exists()}")
return found_project
except ValueError:
pass
repo, project.initialized = (
project_clone_command()
.build()
.execute(
project_data["url_with_auth"],
path=project.abs_path,
depth=project_data["depth"],
raise_git_except=True,
config={
"user.name": project_data["fullname"],
"user.email": project_data["email"],
"pull.rebase": False,
},
checkout_revision=project_data["ref"],
)
).output
project.save()

service_log.debug(f"project successfully cloned: {repo}")
service_log.debug(f"project folder exists: {project.exists()}")
Expand Down
2 changes: 1 addition & 1 deletion renku/ui/service/errors.py
Expand Up @@ -632,7 +632,7 @@ class IntermittentProjectIdError(ServiceError):
"""

code = SVC_ERROR_INTERMITTENT + 1
userMessage = "An unexpcted error occurred. This may be a temporary problem. Please try again in a few minutes."
userMessage = "An unexpected error occurred. This may be a temporary problem. Please try again in a few minutes."
devMessage = (
"Project id cannot be found. It may be a temporary problem. Check the Sentry exception for further details."
)
Expand Down
2 changes: 1 addition & 1 deletion renku/ui/service/utils/__init__.py
Expand Up @@ -27,7 +27,7 @@ def make_project_path(user, project):
valid_project = project and "owner" in project and "name" in project and "project_id" in project

if valid_user and valid_project:
return CACHE_PROJECTS_PATH / user["user_id"] / project["project_id"] / project["owner"] / project["slug"]
return CACHE_PROJECTS_PATH / user["user_id"] / project["owner"] / project["slug"]


def make_file_path(user, cached_file):
Expand Down
2 changes: 1 addition & 1 deletion renku/ui/service/views/error_handlers.py
Expand Up @@ -197,7 +197,7 @@ def decorated_function(*args, **kwargs):
except GitError as e:
return error_response(ProgramGitError(e, e.message if e.message else None))
except (Exception, BaseException, OSError, IOError) as e:
if hasattr(e, "stderr"):
if hasattr(e, "stderr") and e.stderr:
error_message = " ".join(e.stderr.strip().split("\n"))
else:
error_message = str(e)
Expand Down
2 changes: 1 addition & 1 deletion tests/service/controllers/utils/test_project_clone.py
Expand Up @@ -70,7 +70,7 @@ def test_service_user_project_clone(svc_client_cache):
assert project_two.exists()

new_path = project_two.abs_path
assert old_path != new_path
assert old_path == new_path
user = cache.get_user(user_data["user_id"])
projects = [project.project_id for project in cache.get_projects(user)]
assert project_one.project_id in projects
Expand Down
3 changes: 2 additions & 1 deletion tests/service/views/test_cache_views.py
Expand Up @@ -514,9 +514,10 @@ def test_clone_projects_multiple(svc_client, identity_headers, it_remote_repo_ur

pids = [p["project_id"] for p in response.json["result"]["projects"]]
assert last_pid in pids
assert 1 == len(pids)

for inserted in project_ids:
assert inserted["project_id"] not in pids
assert inserted["project_id"] == last_pid


@pytest.mark.service
Expand Down
2 changes: 1 addition & 1 deletion tests/service/views/test_dataset_views.py
Expand Up @@ -160,7 +160,7 @@ def test_create_dataset_wrong_ref_view(svc_client_with_repo):

response = svc_client.post("/datasets.create", data=json.dumps(payload), headers=headers)
assert_rpc_response(response, "error")
assert IntermittentProjectIdError.code == response.json["error"]["code"]
assert IntermittentProjectIdError.code == response.json["error"]["code"], response.json


@pytest.mark.service
Expand Down
8 changes: 1 addition & 7 deletions tests/service/views/test_templates_views.py
Expand Up @@ -143,13 +143,7 @@ def test_create_project_from_template(svc_client_templates_creation, with_inject

# NOTE: assert correct git user is set on new project
user_data = RenkuHeaders.decode_user(headers["Renku-User"])
project_path = (
CACHE_PROJECTS_PATH
/ user_data["user_id"]
/ response.json["result"]["project_id"]
/ payload["project_namespace"]
/ stripped_name
)
project_path = CACHE_PROJECTS_PATH / user_data["user_id"] / payload["project_namespace"] / stripped_name
reader = Repository(project_path).get_configuration()
assert reader.get_value("user", "email") == user_data["email"]
assert reader.get_value("user", "name") == user_data["name"]
Expand Down

0 comments on commit 0d15b36

Please sign in to comment.