Skip to content

Commit

Permalink
fix(core): update template checksum when updating Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed Mar 7, 2023
1 parent 4220f4e commit a1b882a
Show file tree
Hide file tree
Showing 18 changed files with 244 additions and 51 deletions.
2 changes: 1 addition & 1 deletion renku/command/clone.py
Expand Up @@ -62,8 +62,8 @@ def _project_clone(
Tuple of cloned ``Repository`` and whether it's a Renku project or not.
"""
from renku.command.mergetool import setup_mergetool
from renku.core.migration.migrate import is_renku_project
from renku.core.util.git import clone_renku_repository
from renku.core.util.metadata import is_renku_project

install_lfs = project_context.external_storage_requested

Expand Down
8 changes: 2 additions & 6 deletions renku/command/migrate.py
Expand Up @@ -177,13 +177,9 @@ def check_project():


def _check_project():
from renku.core.migration.migrate import (
is_docker_update_possible,
is_migration_required,
is_project_unsupported,
is_renku_project,
)
from renku.core.migration.migrate import is_docker_update_possible, is_migration_required, is_project_unsupported
from renku.core.template.usecase import check_for_template_update
from renku.core.util.metadata import is_renku_project

if not is_renku_project():
return NON_RENKU_REPOSITORY
Expand Down
4 changes: 2 additions & 2 deletions renku/core/migration/m_0009__new_metadata_storage.py
Expand Up @@ -349,7 +349,7 @@ def _process_workflows(
migration_context: MigrationContext, activity_gateway: IActivityGateway, commit: "Commit", remove: bool
):

for file in commit.get_changes(paths=f"{project_context.metadata_path}/workflow/*.yaml"):
for file in commit.get_changes(f"{project_context.metadata_path}/workflow/*.yaml"):
if file.deleted:
continue

Expand Down Expand Up @@ -640,7 +640,7 @@ def _process_datasets(
is_last_commit,
preserve_identifiers,
):
changes = commit.get_changes(paths=".renku/datasets/*/*.yml")
changes = commit.get_changes(".renku/datasets/*/*.yml")
changed_paths = [c.b_path for c in changes if not c.deleted]
paths = [p for p in changed_paths if len(Path(p).parents) == 4] # Exclude files that are not in the right place
deleted_paths = [c.a_path for c in changes if c.deleted]
Expand Down
43 changes: 22 additions & 21 deletions renku/core/migration/migrate.py
Expand Up @@ -47,8 +47,15 @@
)
from renku.core.interface.project_gateway import IProjectGateway
from renku.core.migration.models.migration import MigrationContext, MigrationType
from renku.core.migration.utils import OLD_METADATA_PATH, is_using_temporary_datasets_path, read_project_version
from renku.core.migration.utils import is_using_temporary_datasets_path, read_project_version
from renku.core.template.usecase import update_dockerfile_checksum
from renku.core.util import communication
from renku.core.util.metadata import (
is_renku_project,
read_renku_version_from_dockerfile,
replace_renku_version_in_dockerfile,
)
from renku.core.util.os import hash_string
from renku.domain_model.project import ProjectTemplateMetadata
from renku.domain_model.project_context import project_context

Expand Down Expand Up @@ -209,37 +216,39 @@ def _update_dockerfile(check_only=False):
"""Update the dockerfile to the newest version of renku."""
from renku import __version__

if not project_context.docker_path.exists():
if not project_context.dockerfile_path.exists():
return False, None, None

communication.echo("Updating dockerfile...")

with open(project_context.docker_path, "r") as f:
with open(project_context.dockerfile_path, "r") as f:
dockerfile_content = f.read()

current_version = Version(__version__)
m = re.search(r"^ARG RENKU_VERSION=(\d+\.\d+\.\d+)$", dockerfile_content, flags=re.MULTILINE)
if not m:
docker_version = read_renku_version_from_dockerfile()
if not docker_version:
if check_only:
return False, None, None
raise DockerfileUpdateError(
"Couldn't update renku-python version in Dockerfile, as it doesn't contain an 'ARG RENKU_VERSION=...' line."
)

docker_version = Version(m.group(1))

current_version = Version(__version__)
if docker_version >= current_version:
return True, False, str(docker_version)

if check_only:
return True, True, str(docker_version)

dockerfile_content = re.sub(
r"^ARG RENKU_VERSION=\d+\.\d+\.\d+$", f"ARG RENKU_VERSION={__version__}", dockerfile_content, flags=re.MULTILINE
)
new_content = replace_renku_version_in_dockerfile(dockerfile_content=dockerfile_content, version=__version__)
new_checksum = hash_string(new_content)

with open(project_context.docker_path, "w") as f:
f.write(dockerfile_content)
try:
update_dockerfile_checksum(new_checksum=new_checksum)
except DockerfileUpdateError:
pass

with open(project_context.dockerfile_path, "w") as f:
f.write(new_content)

communication.echo("Updated dockerfile.")

Expand All @@ -254,14 +263,6 @@ def get_project_version():
return 1


def is_renku_project() -> bool:
"""Check if repository is a renku project."""
try:
return project_context.project is not None
except ValueError: # NOTE: Error in loading due to an older schema
return project_context.metadata_path.joinpath(OLD_METADATA_PATH).exists()


def get_migrations():
"""Return a sorted list of versions and migration modules."""
migrations = []
Expand Down
2 changes: 1 addition & 1 deletion renku/core/session/session.py
Expand Up @@ -134,7 +134,7 @@ def session_start(
abort=True,
)
with communication.busy(msg=f"Building image {image_name}"):
provider_api.build_image(project_context.docker_path.parent, image_name, config)
provider_api.build_image(project_context.dockerfile_path.parent, image_name, config)
communication.echo(f"Image {image_name} built successfully.")
else:
if not provider_api.find_image(image_name, config):
Expand Down
6 changes: 6 additions & 0 deletions renku/core/template/template.py
Expand Up @@ -184,6 +184,8 @@ def get_file_actions(
rendered_template: RenderedTemplate, template_action: TemplateAction, interactive
) -> Dict[str, FileAction]:
"""Render a template regarding files in a project."""
from renku.core.template.usecase import is_dockerfile_updated_by_user

if interactive and not communication.has_prompt():
raise errors.ParameterError("Cannot use interactive mode with no prompt")

Expand Down Expand Up @@ -220,6 +222,8 @@ def get_action_for_set(relative_path: str, destination: Path, new_checksum: Opti
elif interactive:
overwrite = communication.confirm(f"Overwrite {relative_path}?", default=True)
return FileAction.OVERWRITE if overwrite else FileAction.KEEP
elif relative_path == "Dockerfile" and not is_dockerfile_updated_by_user():
return FileAction.OVERWRITE
elif should_keep(relative_path):
return FileAction.KEEP
else:
Expand Down Expand Up @@ -248,6 +252,8 @@ def get_action_for_update(
return FileAction.OVERWRITE if overwrite else FileAction.KEEP
elif not remote_changes:
return FileAction.IGNORE_UNCHANGED_REMOTE
elif relative_path == "Dockerfile" and not is_dockerfile_updated_by_user():
return FileAction.OVERWRITE
elif file_deleted or local_changes:
if relative_path in immutable_files:
# NOTE: There are local changes in a file that should not be changed by users, and the file was
Expand Down
65 changes: 63 additions & 2 deletions renku/core/template/usecase.py
Expand Up @@ -16,7 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Template use cases."""

import json
import os
import tempfile
from pathlib import Path
Expand All @@ -29,7 +29,6 @@
from renku.command.view_model.template import TemplateChangeViewModel, TemplateViewModel
from renku.core import errors
from renku.core.interface.project_gateway import IProjectGateway
from renku.core.migration.migrate import is_renku_project
from renku.core.template.template import (
FileAction,
RepositoryTemplates,
Expand All @@ -38,16 +37,78 @@
fetch_templates_source,
get_file_actions,
has_template_checksum,
read_template_checksum,
set_template_parameters,
write_template_checksum,
)
from renku.core.util import communication
from renku.core.util.metadata import is_renku_project, replace_renku_version_in_dockerfile
from renku.core.util.os import hash_file, hash_string
from renku.core.util.tabulate import tabulate
from renku.domain_model.project import Project
from renku.domain_model.project_context import project_context
from renku.domain_model.template import RenderedTemplate, Template, TemplateMetadata, TemplatesSource
from renku.infrastructure.repository import Repository


def update_dockerfile_checksum(new_checksum: str):
"""Update ``Dockerfile`` template checksum if possible."""
if not project_context.dockerfile_path.exists():
raise errors.DockerfileUpdateError("Project doesn't have a Dockerfile")
if is_dockerfile_updated_by_user():
raise errors.DockerfileUpdateError("Cannot update Dockerfile checksum because it was update by the user")

checksums = read_template_checksum()
checksums["Dockerfile"] = new_checksum
write_template_checksum(checksums)


def does_dockerfile_contains_only_version_change() -> bool:
"""Return True if Dockerfile only contains Renku version changes."""
commits = list(project_context.repository.iterate_commits(project_context.dockerfile_path))
# NOTE: Don't include the first commit that added the Dockerfile
for commit in commits[:-1]:
changes = commit.get_changes(project_context.dockerfile_path, patch=True)
if not changes:
continue
diff = changes[0].diff
# NOTE: Check the Dockerfile change only includes adding and removing a Renku version line
if (
len(diff) != 2
or {c.change_type for c in diff} != {"A", "D"}
or any("ARG RENKU_VERSION=" not in c.text for c in diff)
):
return False

return True


def is_dockerfile_updated_by_user() -> bool:
"""Return if user modified the ``Dockerfile``."""
dockerfile = project_context.dockerfile_path

if not has_template_checksum() or not dockerfile.exists():
return False

original_checksum = read_template_checksum().get("Dockerfile")
current_checksum = hash_file(dockerfile)

if original_checksum == current_checksum: # Dockerfile was never updated
return False

# NOTE: Check if original Dockerfile has the same checksum as the time when the template was set/updated
metadata = json.loads(project_context.project.template_metadata.metadata)
original_renku_version = metadata.get("__renku_version__")

original_dockerfile_content = replace_renku_version_in_dockerfile(dockerfile.read_text(), original_renku_version)
original_calculated_checksum = hash_string(original_dockerfile_content)

if original_checksum == original_calculated_checksum:
return False

return False if does_dockerfile_contains_only_version_change() else True


@validate_arguments(config=dict(arbitrary_types_allowed=True))
def list_templates(source: Optional[str], reference: Optional[str]) -> List[TemplateViewModel]:
"""Return available templates from a source."""
Expand Down
30 changes: 25 additions & 5 deletions renku/core/util/metadata.py
Expand Up @@ -29,6 +29,7 @@
from renku.core import errors
from renku.core.config import get_value, set_value
from renku.core.constant import RENKU_HOME, RENKU_PROTECTED_PATHS, RENKU_TMP
from renku.core.migration.utils import OLD_METADATA_PATH
from renku.core.util import communication
from renku.core.util.os import is_subpath

Expand Down Expand Up @@ -97,25 +98,34 @@ def is_external_file(path: Union[Path, str], project_path: Path):
return str(os.path.join(RENKU_HOME, POINTERS)) in pointer


def read_renku_version_from_dockerfile(path: Optional[Union[Path, str]] = None) -> Optional[str]:
def read_renku_version_from_dockerfile(path: Optional[Union[Path, str]] = None) -> Optional[Version]:
"""Read RENKU_VERSION from the content of path if a valid version is available."""
from renku.domain_model.project_context import project_context

path = Path(path) if path else project_context.docker_path
path = Path(path) if path else project_context.dockerfile_path
if not path.exists():
return None

docker_content = path.read_text()
m = re.search(r"^\s*ARG RENKU_VERSION=(.+)$", docker_content, flags=re.MULTILINE)
m = re.search(r"^\s*ARG RENKU_VERSION=(\d+\.\d+\.\d+\S*)$", path.read_text(), flags=re.MULTILINE)
if not m:
return None

try:
return str(Version(m.group(1)))
return Version(m.group(1))
except ValueError:
return None


def replace_renku_version_in_dockerfile(dockerfile_content: str, version: str) -> str:
"""Replace Renku version in the Dockerfile."""
return re.sub(
r"^\s*ARG RENKU_VERSION=(\d+\.\d+\.\d+\S*)$",
f"ARG RENKU_VERSION={version}",
dockerfile_content,
flags=re.MULTILINE,
)


def make_project_temp_dir(project_path: Path) -> Path:
"""Create a temporary directory inside project's temp path."""
base = project_path / RENKU_HOME / RENKU_TMP
Expand Down Expand Up @@ -180,3 +190,13 @@ def is_protected_path(path: Path) -> bool:
return True

return False


def is_renku_project() -> bool:
"""Check if repository is a renku project."""
from renku.domain_model.project_context import project_context

try:
return project_context.project is not None
except ValueError: # NOTE: Error in loading due to an older schema
return project_context.metadata_path.joinpath(OLD_METADATA_PATH).exists()
9 changes: 9 additions & 0 deletions renku/core/util/os.py
Expand Up @@ -20,6 +20,7 @@
import fnmatch
import glob
import hashlib
import io
import os
import re
import shutil
Expand Down Expand Up @@ -248,6 +249,14 @@ def hash_file(path: Union[Path, str], hash_type: str = "sha256") -> Optional[str
return hash_file_descriptor(f, hash_type)


def hash_string(content: str, hash_type: str = "sha256") -> str:
"""Hash a string."""
content_bytes = content.encode("utf-8")
file = io.BytesIO(content_bytes)

return hash_file_descriptor(file, hash_type)


def hash_file_descriptor(file: BinaryIO, hash_type: str = "sha256") -> str:
"""Hash content of a file descriptor."""
hash_type = hash_type.lower()
Expand Down
2 changes: 1 addition & 1 deletion renku/domain_model/project_context.py
Expand Up @@ -95,7 +95,7 @@ def dataset_images_path(self) -> Path:
return self.path / RENKU_HOME / DATASET_IMAGES

@property
def docker_path(self) -> Path:
def dockerfile_path(self) -> Path:
"""Path to the Dockerfile."""
return self.path / DOCKERFILE

Expand Down
2 changes: 1 addition & 1 deletion renku/domain_model/template.py
Expand Up @@ -534,7 +534,7 @@ def from_project(cls, project: Optional["Project"]) -> "TemplateMetadata":
# NOTE: Always set __renku_version__ to the value read from the Dockerfile (if available) since setting/updating
# the template doesn't change project's metadata version and shouldn't update the Renku version either
renku_version = metadata.get("__renku_version__")
metadata["__renku_version__"] = read_renku_version_from_dockerfile() or renku_version or __version__
metadata["__renku_version__"] = str(read_renku_version_from_dockerfile()) or renku_version or __version__

return cls(metadata=metadata, immutable_files=immutable_files)

Expand Down
2 changes: 1 addition & 1 deletion renku/infrastructure/gateway/database_gateway.py
Expand Up @@ -148,7 +148,7 @@ def get_modified_objects_from_revision(self, revision_or_range: str) -> Generato
commits = [repository.get_commit(revision_or_range)]

for commit in commits:
for file in commit.get_changes(paths=f"{project_context.database_path}/**"):
for file in commit.get_changes(f"{project_context.database_path}/**"):
if file.deleted:
continue

Expand Down

0 comments on commit a1b882a

Please sign in to comment.