diff --git a/helm-chart/renku-core/values.yaml b/helm-chart/renku-core/values.yaml index f7f1e04d39..fca1371ef6 100644 --- a/helm-chart/renku-core/values.yaml +++ b/helm-chart/renku-core/values.yaml @@ -91,6 +91,15 @@ sentry: # versions is the list of different deployment that support different metadata versions. versions: latest: + name: v10 + prefix: "10" + nameOverride: "" + fullnameOverride: "" + image: + repository: renku/renku-core + tag: "v2.0.0" + pullPolicy: IfNotPresent + v9: name: v9 prefix: "9" nameOverride: "" diff --git a/renku/command/command_builder/database.py b/renku/command/command_builder/database.py index 39874ac0e6..858bec18c7 100644 --- a/renku/command/command_builder/database.py +++ b/renku/command/command_builder/database.py @@ -20,7 +20,7 @@ import json import os -from typing import TYPE_CHECKING, Optional +from typing import Optional from packaging.version import Version @@ -40,9 +40,6 @@ from renku.infrastructure.gateway.project_gateway import ProjectGateway from renku.infrastructure.storage.factory import StorageFactory -if TYPE_CHECKING: - from renku.domain_model.project import Project - class DatabaseCommand(Command): """Builder to get a database connection.""" @@ -50,12 +47,12 @@ class DatabaseCommand(Command): PRE_ORDER = 4 POST_ORDER = 5 - def __init__(self, builder: Command, write: bool = False, path: str = None, create: bool = False) -> None: + def __init__(self, builder: Command, write: bool = False, path: Optional[str] = None, create: bool = False) -> None: self._builder = builder self._write = write self._path = path self._create = create - self.project: Optional["Project"] = None + self.project_found: bool = False def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: """Create a Database singleton.""" @@ -80,8 +77,9 @@ def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) return try: - self.project = project_gateway.get_project() - minimum_renku_version = Version(self.project.minimum_renku_version) + project = project_gateway.get_project() + minimum_renku_version = Version(project.minimum_renku_version) + self.project_found = True except (KeyError, ImportError, ValueError): try: with open(project_context.database_path / "project", "r") as f: @@ -102,13 +100,14 @@ def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) def _post_hook(self, builder: Command, context: dict, result: CommandResult, *args, **kwargs) -> None: from renku.domain_model.project import Project - if ( - self._write - and self.project is not None - and Version(self.project.minimum_renku_version) < Version(Project.minimum_renku_version) - ): - # NOTE: update minimum renku version on write as migrations might happen on the fly - self.project.minimum_renku_version = Project.minimum_renku_version + if self._write and self.project_found: + # NOTE: Fetch project again in case it was updated (the current reference would be put of date) + project_gateway = ProjectGateway() + project = project_gateway.get_project() + + if Version(project.minimum_renku_version) < Version(Project.minimum_renku_version): + # NOTE: update minimum renku version on write as migrations might happen on the fly + project.minimum_renku_version = Project.minimum_renku_version project_context.pop_context() diff --git a/renku/command/migrate.py b/renku/command/migrate.py index 20ea5f41ef..9ac360c944 100644 --- a/renku/command/migrate.py +++ b/renku/command/migrate.py @@ -92,10 +92,10 @@ def _template_migration_check(): try: project = project_context.project - template_source = project.template_source - template_ref = project.template_ref - template_id = project.template_id - except ValueError: + template_source = project.template_metadata.template_source + template_ref = project.template_metadata.template_ref + template_id = project.template_metadata.template_id + except (ValueError, AttributeError): project = None template_source = None template_ref = None @@ -188,6 +188,10 @@ def _check_project(): _ = project_context.project except ValueError: return MIGRATION_REQUIRED + else: + if hasattr(project_context.project, "template_source"): + # NOTE: v10 migration not done + return MIGRATION_REQUIRED # NOTE: ``project.automated_update`` is deprecated and we always allow template update for a project status = AUTOMATED_TEMPLATE_UPDATE_SUPPORTED @@ -213,7 +217,7 @@ def _check_immutable_template_files(paths: List[str]): Returns: List of immutable template files. """ - immutable_template_files = project_context.project.immutable_template_files or [] + immutable_template_files = project_context.project.template_metadata.immutable_template_files or [] return [p for p in paths if str(p) in immutable_template_files] diff --git a/renku/command/rollback.py b/renku/command/rollback.py index ab9c6102ae..eec3069294 100644 --- a/renku/command/rollback.py +++ b/renku/command/rollback.py @@ -266,14 +266,14 @@ def _get_modification_type_from_db(path: str) -> Optional[Tuple[str, str, str, d derived = database.get_by_id(db_object.derived_from) if db_object.name == derived.name: change_type = "modified" - if db_object.invalidated_at: + if db_object.date_removed: change_type = "restored" return ( f"Plan: {db_object.name}", change_type, f"plan_{db_object.name}", - db_object.invalidated_at or db_object.date_created, + db_object.date_removed or db_object.date_created, ) elif isinstance(db_object, Dataset): change_type = "removed" diff --git a/renku/command/schema/activity.py b/renku/command/schema/activity.py index 9c253d3cda..d19795efdc 100644 --- a/renku/command/schema/activity.py +++ b/renku/command/schema/activity.py @@ -17,7 +17,7 @@ # limitations under the License. """Activity JSON-LD schema.""" -from marshmallow import EXCLUDE, pre_dump +from marshmallow import EXCLUDE from renku.command.schema.agent import PersonSchema, SoftwareAgentSchema from renku.command.schema.annotation import AnnotationSchema @@ -35,29 +35,6 @@ from renku.domain_model.provenance.parameter import ParameterValue -class _ObjectWrapper: - """Object wrapper that allows temporarily overriding fields of immutable objects.""" - - def __init__(self, wrapped, **override): - self.__wrapped = wrapped - self.__override = override - - def __getattr__(self, name): - if name in self.__override: - return self.__override[name] - - return getattr(self.__wrapped, name) - - -def _fix_id(obj): - """Fix ids under an activity that were wrong due to a bug.""" - - if not obj.id.startswith("/activities/") and not obj.id.startswith("/workflow-file-activity-collection/"): - obj = _ObjectWrapper(obj, id=f"/activities/{obj.id}") - - return obj - - class AssociationSchema(JsonLDSchema): """Association schema.""" @@ -72,11 +49,6 @@ class Meta: id = fields.Id() plan = Nested(prov.hadPlan, [PlanSchema, WorkflowFilePlanSchema, WorkflowFileCompositePlanSchema]) - @pre_dump - def _pre_dump(self, obj, **kwargs): - """Pre-dump hook.""" - return _fix_id(obj) - class UsageSchema(JsonLDSchema): """Usage schema.""" @@ -92,11 +64,6 @@ class Meta: # TODO: DatasetSchema, DatasetFileSchema entity = Nested(prov.entity, [EntitySchema, CollectionSchema]) - @pre_dump - def _pre_dump(self, obj, **kwargs): - """Pre-dump hook.""" - return _fix_id(obj) - class GenerationSchema(JsonLDSchema): """Generation schema.""" @@ -112,11 +79,6 @@ class Meta: # TODO: DatasetSchema, DatasetFileSchema entity = Nested(prov.qualifiedGeneration, [EntitySchema, CollectionSchema], reverse=True) - @pre_dump - def _pre_dump(self, obj, **kwargs): - """Pre-dump hook.""" - return _fix_id(obj) - class ParameterValueSchema(JsonLDSchema): """ParameterValue schema.""" @@ -133,11 +95,6 @@ class Meta: parameter = fields.IRI(schema.valueReference, attribute="parameter_id") value = fields.Raw(schema.value) - @pre_dump - def _pre_dump(self, obj, **kwargs): - """Pre-dump hook.""" - return _fix_id(obj) - class ActivitySchema(JsonLDSchema): """Activity schema.""" @@ -167,11 +124,6 @@ class Meta: started_at_time = fields.DateTime(prov.startedAtTime, add_value_types=True) usages = Nested(prov.qualifiedUsage, UsageSchema, many=True) - @pre_dump - def _pre_dump(self, obj, **kwargs): - """Pre-dump hook.""" - return _fix_id(obj) - class WorkflowFileActivityCollectionSchema(JsonLDSchema): """WorkflowFileActivityCollection schema.""" diff --git a/renku/command/schema/composite_plan.py b/renku/command/schema/composite_plan.py index 5d1951e526..978839f53c 100644 --- a/renku/command/schema/composite_plan.py +++ b/renku/command/schema/composite_plan.py @@ -41,7 +41,8 @@ class Meta: creators = Nested(schema.creator, PersonSchema, many=True) mappings = Nested(renku.hasMappings, [ParameterMappingSchema], many=True, load_default=None) date_created = fields.DateTime(schema.dateCreated, format="iso") - invalidated_at = fields.DateTime(prov.invalidatedAtTime, format="iso") + date_modified = fields.DateTime(schema.dateModified, format="iso") + date_removed = fields.DateTime(prov.invalidatedAtTime, format="iso") keywords = fields.List(schema.keywords, fields.String(), load_default=None) name = fields.String(schema.name, load_default=None) derived_from = fields.IRI(prov.wasDerivedFrom, load_default=None) diff --git a/renku/command/schema/dataset.py b/renku/command/schema/dataset.py index 24f88d25b9..9509a7b022 100644 --- a/renku/command/schema/dataset.py +++ b/renku/command/schema/dataset.py @@ -148,6 +148,7 @@ class Meta: annotations = Nested(oa.hasTarget, AnnotationSchema, reverse=True, many=True) creators = Nested(schema.creator, PersonSchema, many=True) date_created = fields.DateTime(schema.dateCreated, load_default=None, format="iso", extra_formats=("%Y-%m-%d",)) + date_modified = fields.DateTime(schema.dateModified, load_default=None, format="iso", extra_formats=("%Y-%m-%d",)) date_removed = fields.DateTime(prov.invalidatedAtTime, load_default=None, format="iso") date_published = fields.DateTime( schema.datePublished, load_default=None, format="%Y-%m-%d", extra_formats=("iso", "%Y-%m-%dT%H:%M:%S") diff --git a/renku/command/schema/plan.py b/renku/command/schema/plan.py index d15815ba89..af315fa80d 100644 --- a/renku/command/schema/plan.py +++ b/renku/command/schema/plan.py @@ -17,8 +17,6 @@ # limitations under the License. """Represent run templates.""" -from datetime import timezone - import marshmallow from renku.command.schema.agent import PersonSchema @@ -46,7 +44,8 @@ class Meta: id = fields.Id() inputs = Nested(renku.hasInputs, CommandInputSchema, many=True, load_default=None) date_created = fields.DateTime(schema.dateCreated, format="iso") - invalidated_at = fields.DateTime(prov.invalidatedAtTime, format="iso") + date_modified = fields.DateTime(schema.dateModified, format="iso") + date_removed = fields.DateTime(prov.invalidatedAtTime, format="iso") keywords = fields.List(schema.keywords, fields.String(), load_default=None) name = fields.String(schema.name, load_default=None) derived_from = fields.IRI(prov.wasDerivedFrom, load_default=None) @@ -55,15 +54,3 @@ class Meta: parameters = Nested(renku.hasArguments, CommandParameterSchema, many=True, load_default=None) success_codes = fields.List(renku.successCodes, fields.Integer(), load_default=[0]) annotations = Nested(oa.hasTarget, AnnotationSchema, reverse=True, many=True) - - @marshmallow.pre_dump - def _pre_dump(self, in_data, **kwargs): - """Fix data on dumping.""" - if in_data.invalidated_at is not None and in_data.invalidated_at.tzinfo is None: - # NOTE: There was a bug that caused invalidated_at to be set without timezone (as UTC time) - # so we patch in the timezone here - in_data.unfreeze() - in_data.invalidated_at = in_data.invalidated_at.replace(microsecond=0).astimezone(timezone.utc) - in_data.freeze() - - return in_data diff --git a/renku/command/schema/project.py b/renku/command/schema/project.py index c0091a7487..83551e4f3c 100644 --- a/renku/command/schema/project.py +++ b/renku/command/schema/project.py @@ -37,17 +37,25 @@ class Meta: agent_version = StringList(schema.agent, load_default="pre-0.11.0") annotations = Nested(oa.hasTarget, AnnotationSchema, reverse=True, many=True) - automated_update = fields.Boolean(renku.automatedTemplateUpdate, load_default=True) creator = Nested(schema.creator, PersonSchema, load_default=None) date_created = DateTimeList(schema.dateCreated, load_default=None, format="iso", extra_formats=("%Y-%m-%d",)) description = fields.String(schema.description, load_default=None) id = fields.Id(load_default=None) - immutable_template_files = fields.List(renku.immutableTemplateFiles, fields.String(), load_default=list()) + immutable_template_files = fields.List( + renku.immutableTemplateFiles, + fields.String(), + load_default=list(), + attribute="template_metadata.immutable_template_files", + ) name = fields.String(schema.name, load_default=None) - template_id = fields.String(renku.templateId, load_default=None) - template_metadata = fields.String(renku.templateMetadata, load_default=None) - template_ref = fields.String(renku.templateReference, load_default=None) - template_source = fields.String(renku.templateSource, load_default=None) - template_version = fields.String(renku.templateVersion, load_default=None) + template_id = fields.String(renku.templateId, load_default=None, attribute="template_metadata.template_id") + template_metadata = fields.String(renku.templateMetadata, load_default=None, attribute="template_metadata.metadata") + template_ref = fields.String(renku.templateReference, load_default=None, attribute="template_metadata.template_ref") + template_source = fields.String( + renku.templateSource, load_default=None, attribute="template_metadata.template_source" + ) + template_version = fields.String( + renku.templateVersion, load_default=None, attribute="template_metadata.template_version" + ) version = StringList(schema.schemaVersion, load_default="1") keywords = fields.List(schema.keywords, fields.String(), load_default=None) diff --git a/renku/command/view_model/log.py b/renku/command/view_model/log.py index 089544ee91..bac54b2e53 100644 --- a/renku/command/view_model/log.py +++ b/renku/command/view_model/log.py @@ -197,10 +197,10 @@ def from_dataset(cls, dataset: "Dataset") -> "DatasetLogViewModel": descriptions.append("deleted") details.deleted = True - previous_dataset = None + previous_dataset: Optional[Dataset] = None if dataset.is_derivation(): - previous_dataset = dataset_gateway.get_by_id(dataset.derived_from.url_id) # type: ignore + previous_dataset = dataset_gateway.get_by_id(dataset.derived_from.value) # type: ignore current_files = {f for f in dataset.dataset_files if not f.date_removed} previous_files = set() @@ -222,7 +222,7 @@ def from_dataset(cls, dataset: "Dataset") -> "DatasetLogViewModel": descriptions.append(f"{len(new_files)} file(s) added") details.files_added = [str(f.entity.path) for f in new_files] - details.modified = True + details.modified = bool(previous_files) if previous_files and {f.id for f in previous_files}.difference({f.id for f in current_files}): # NOTE: Files removed @@ -277,8 +277,8 @@ def from_dataset(cls, dataset: "Dataset") -> "DatasetLogViewModel": details.keywords_removed = list(previous_keywords.difference(current_keywords)) modified = True - current_images = set(dataset.images) - previous_images = set(previous_dataset.images) + current_images = set(dataset.images) if dataset.images else set() + previous_images = set(previous_dataset.images) if previous_dataset.images else set() if current_images != previous_images: details.images_changed_to = [i.content_url for i in current_images] diff --git a/renku/command/view_model/project.py b/renku/command/view_model/project.py index 82050929bd..9f48296321 100644 --- a/renku/command/view_model/project.py +++ b/renku/command/view_model/project.py @@ -65,11 +65,16 @@ def from_project(cls, project: Project): """ template_info = "" - if project.template_source: - if project.template_source == "renku": - template_info = f"{project.template_id} ({project.template_version})" + if project.template_metadata.template_source: + if project.template_metadata.template_source == "renku": + template_info = ( + f"{project.template_metadata.template_id} ({project.template_metadata.template_version})" + ) else: - template_info = f"{project.template_source}@{project.template_ref}: {project.template_id}" + template_info = ( + f"{project.template_metadata.template_source}@" + f"{project.template_metadata.template_ref}: {project.template_metadata.template_id}" + ) return cls( id=project.id, diff --git a/renku/core/migration/m_0009__new_metadata_storage.py b/renku/core/migration/m_0009__new_metadata_storage.py index 771ab3d25c..10651b503f 100644 --- a/renku/core/migration/m_0009__new_metadata_storage.py +++ b/renku/core/migration/m_0009__new_metadata_storage.py @@ -25,7 +25,7 @@ from hashlib import sha1 from itertools import chain from pathlib import Path, PurePosixPath -from typing import TYPE_CHECKING, List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union, cast from urllib.parse import urlparse from renku.command.command_builder import inject @@ -35,6 +35,7 @@ from renku.core.interface.database_gateway import IDatabaseGateway from renku.core.interface.project_gateway import IProjectGateway from renku.core.migration.models import v9 as old_schema +from renku.core.migration.models import v10 as new_schema from renku.core.migration.models.migration import DatasetMigrationContext, MigrationContext, MigrationType from renku.core.migration.utils import ( OLD_DATASETS_PATH, @@ -47,7 +48,6 @@ from renku.core.util import communication from renku.core.util.yaml import load_yaml from renku.domain_model.entity import NON_EXISTING_ENTITY_CHECKSUM, Collection, Entity -from renku.domain_model.project import Project from renku.domain_model.project_context import has_graph_files, project_context from renku.domain_model.provenance.activity import Activity, Association, Generation, Usage from renku.domain_model.provenance.agent import Person, SoftwareAgent @@ -117,28 +117,28 @@ def _maybe_migrate_project_to_database(project_gateway: IProjectGateway): metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) if metadata_path.exists(): - old_project = old_schema.Project.from_yaml(metadata_path) + old_project = cast(old_schema.Project, old_schema.Project.from_yaml(metadata_path)) id_path = urlparse(old_project._id).path id_path = id_path.replace("/projects/", "") id_path = Path(id_path) namespace, name = str(id_path.parent), id_path.name - id = Project.generate_id(namespace=namespace, name=name) + id = new_schema.Project.generate_id(namespace=namespace, name=name) - new_project = Project( + new_project = new_schema.Project( agent_version=old_project.agent_version, - automated_update=old_project.automated_update, creator=_old_agent_to_new_agent(old_project.creator), date_created=old_project.created, id=id, - immutable_template_files=old_project.immutable_template_files, name=old_project.name, + version=old_project.version, template_id=old_project.template_id, template_metadata=old_project.template_metadata, template_ref=old_project.template_ref, template_source=old_project.template_source, template_version=old_project.template_version, - version=old_project.version, + immutable_template_files=old_project.immutable_template_files, + automated_update=old_project.automated_update, ) project_gateway.update_project(new_project) diff --git a/renku/core/migration/m_0010__metadata_fixes.py b/renku/core/migration/m_0010__metadata_fixes.py new file mode 100644 index 0000000000..bcde354bbd --- /dev/null +++ b/renku/core/migration/m_0010__metadata_fixes.py @@ -0,0 +1,317 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Various metadata migrations for v10.""" +import io +import json +import os +from datetime import timezone +from itertools import chain +from pathlib import Path +from typing import Any, Dict, List, Tuple, cast + +import zstandard as zstd + +from renku.command.command_builder import inject +from renku.core.interface.activity_gateway import IActivityGateway +from renku.core.interface.dataset_gateway import IDatasetGateway +from renku.core.interface.plan_gateway import IPlanGateway +from renku.core.interface.project_gateway import IProjectGateway +from renku.core.migration.models import v10 as old_schema +from renku.core.migration.models.migration import MigrationContext, MigrationType +from renku.core.util import communication +from renku.core.util.datetime8601 import local_now +from renku.domain_model.dataset import Dataset +from renku.domain_model.project import Project, ProjectTemplateMetadata +from renku.domain_model.project_context import project_context +from renku.domain_model.provenance.activity import Activity +from renku.domain_model.workflow.plan import AbstractPlan + + +def migrate(migration_context: MigrationContext): + """Migration function.""" + from renku.infrastructure.gateway.activity_gateway import reindex_catalog + + migrate_project_template_data() + migrate_old_metadata_namespaces() + + if MigrationType.WORKFLOWS in migration_context.options.type: + migrate_activity_ids() + fix_plan_times() + + migrate_remote_entity_ids() + fix_dataset_date_modified() + + # NOTE: Rebuild all workflow catalogs since ids and times have changed + communication.echo("Rebuilding workflow metadata") + reindex_catalog(project_context.database) + + +def migrate_old_metadata_namespaces(): + """Migrate old namespaces (e.g. renku.cli.* -> renku.ui.cli.*).""" + communication.echo("Migrating old namespaces") + compressor = zstd.ZstdCompressor() + decompressor = zstd.ZstdDecompressor() + for root, _, files in os.walk(project_context.database_path): + root = Path(root) + for file_name in files: + path = root / file_name + compressed = False + with open(path, "rb") as file: + header = int.from_bytes(file.read(4), "little") + file.seek(0) + if header == zstd.MAGIC_NUMBER: + with decompressor.stream_reader(file) as zfile: + data = json.load(zfile) + compressed = True + else: + data = json.load(file) + + # modify namespace + transformation = [ + ("renku.core.models.", "renku.domain_model."), + ("renku.core.metadata.", "renku.infrastructure."), + ] + nested_update(data, target_key="@renku_data_type", transforms=transformation) + + if compressed: + with open(path, "wb") as fb, compressor.stream_writer(fb) as compression_writer: + with io.TextIOWrapper(compression_writer) as out: + json.dump(data, out, ensure_ascii=False) + else: + with open(path, "wt") as ft: + json.dump(data, ft, ensure_ascii=False, sort_keys=True, indent=2) + + +def nested_update(data: Dict[str, Any], target_key: str, transforms: List[Tuple[str, str]]) -> None: + """Update a key's value based on tranformations (from, to) in a deeply nested dictionary.""" + for k in list(data.keys()): + value = data[k] + if isinstance(value, str) and k == target_key: + for lookup, replacement in transforms: + value = value.replace(lookup, replacement) + data[k] = value + elif isinstance(value, dict): + nested_update(value, target_key=target_key, transforms=transforms) + elif isinstance(value, list) and len(value) > 0 and any(isinstance(v, dict) for v in value): + result = [] + for v in value: + if isinstance(v, dict): + nested_update(v, target_key=target_key, transforms=transforms) + + result.append(v) + data[k] = result + + +def migrate_activity_ids(): + """Fix activity related ids. + + Fixes ids not starting with `/activities/`. + Fixes generations having `/usages/` in their id. + """ + communication.echo("Fixing activity ids") + + database = project_context.database + + activities: List[Activity] = list(database["activities"].values()) + + def fix_id(id: str, changed: bool) -> Tuple[str, bool]: + """Fix an activity related id.""" + if not id.startswith("/activities/"): + return f"/activities/{id.lstrip('/')}", True + return id, changed + + for activity in activities: + changed = False + activity.unfreeze() + old_activity_id = activity.id + activity.id, changed = fix_id(activity.id, changed) + activity.association.id, changed = fix_id(activity.association.id, changed) + + for generation in activity.generations: + id, changed = fix_id(generation.id, changed) + + if "/usages/" in generation.id: + id = generation.id.replace("/usages/", "/generations/") + changed = True + object.__setattr__(generation, "id", id) + + for entity in chain(activity.usages, activity.parameters): + id, changed = fix_id(entity.id, changed) + object.__setattr__(entity, "id", id) + + if changed: + database["activities"].pop(old_activity_id) + activity._p_changed = True + old_id = str(activity._p_oid) + os.unlink(project_context.database_path / old_id[0:2] / old_id[2:4] / old_id) + activity.reassign_oid() + database["activities"].add(activity) + activity.freeze() + database["activities"]._p_changed = True + database.commit() + + +def migrate_remote_entity_ids(): + """Change `remote-entity` to `remote-entities` in ids.""" + database = project_context.database + + datasets: List[Dataset] = list(database["datasets"].values()) + + for dataset in datasets: + changed = False + for file in dataset.files: + if file.based_on is not None: + file.based_on.id = file.based_on.id.replace("/remote-entity/", "/remote-entities/") + changed = True + + if changed: + dataset._p_changed = True + + database.commit() + + +@inject.autoparams() +def migrate_project_template_data(project_gateway: IProjectGateway): + """Migrate project template metadata to nested model.""" + if not hasattr(project_context.project, "template_source"): + # NOTE: already on newest version + return + + old_project = cast( + old_schema.Project, + project_context.database.get_from_path( + "project", override_type=f"{old_schema.Project.__module__}.{old_schema.Project.__qualname__}" + ), + ) + + project = Project( + agent_version=old_project.agent_version, + annotations=old_project.annotations, + creator=old_project.creator, + date_created=old_project.date_created, + description=old_project.description, + id=old_project.id, + name=old_project.name, + version=old_project.version, + keywords=old_project.keywords, + template_metadata=ProjectTemplateMetadata( + template_id=old_project.template_id, + metadata=old_project.template_metadata, + template_ref=old_project.template_ref, + template_source=old_project.template_source, + template_version=old_project.template_version, + immutable_template_files=old_project.immutable_template_files, + ), + ) + + project_gateway.update_project(project) + project_context.database.commit() + + +@inject.autoparams("activity_gateway", "plan_gateway") +def fix_plan_times(activity_gateway: IActivityGateway, plan_gateway: IPlanGateway): + """Add timezone to plan invalidations.""" + database = project_context.database + + plans: List[AbstractPlan] = plan_gateway.get_all_plans() + all_activities = activity_gateway.get_all_activities() + activity_map: Dict[str, Activity] = {} + + for activity in all_activities: + plan_id = activity.association.plan.id + if plan_id not in activity_map or activity.started_at_time < activity_map[plan_id].started_at_time: + activity_map[plan_id] = activity + + for plan in plans: + plan.unfreeze() + if hasattr(plan, "invalidated_at"): + plan.date_removed = plan.invalidated_at + del plan.invalidated_at + elif not hasattr(plan, "date_removed"): + plan.date_removed = None + + if plan.date_removed is not None: + if plan.date_removed < plan.date_created: + # NOTE: Fix invalidation times set before creation date on plans + plan.date_removed = plan.date_created + if plan.date_removed.tzinfo is None: + # NOTE: There was a bug that caused date_removed to be set without timezone (as UTC time) + # so we patch in the timezone here + plan.date_removed = plan.date_removed.replace(microsecond=0).astimezone(timezone.utc) + if plan.id in activity_map and plan.date_created > activity_map[plan.id].started_at_time: + plan.date_created = activity_map[plan.id].started_at_time + plan.freeze() + + # NOTE: switch creation date for modification date + for tail in plan_gateway.get_newest_plans_by_names(include_deleted=True).values(): + stack: List[AbstractPlan] = [] + stack.append(tail) + creation_date = tail.date_created + plan = tail + + while plan.is_derivation(): + plan = cast(AbstractPlan, plan_gateway.get_by_id(plan.derived_from)) + creation_date = plan.date_created + stack.append(plan) + + while stack: + plan = stack.pop() + plan.unfreeze() + plan.date_modified = plan.date_created + plan.date_created = creation_date + plan.freeze + + database.commit() + + +@inject.autoparams("dataset_gateway") +def fix_dataset_date_modified(dataset_gateway: IDatasetGateway): + """Change date_created and date_modified to have correct semantics.""" + tails = dataset_gateway.get_provenance_tails() + + for dataset_tail in tails: + dataset = dataset_tail + found_datasets: List[Dataset] = [] + previous_modification_date = local_now() + + while dataset.derived_from is not None: + modification_date = dataset.date_removed or dataset.date_created + + if modification_date is not None: + assert modification_date <= previous_modification_date + dataset.unfreeze() + dataset.date_modified = modification_date + dataset.freeze() + previous_modification_date = modification_date + found_datasets.append(dataset) + dataset = dataset_gateway.get_by_id(dataset.derived_from.value) + + # NOTE: first dataset in chain + modification_date = dataset.date_created or dataset.date_published + if modification_date is not None: + dataset.unfreeze() + dataset.date_modified = modification_date + dataset.freeze() + + for child in found_datasets: + child.unfreeze() + child.date_created = dataset.date_created + child.date_published = dataset.date_published + child.freeze() + + project_context.database.commit() diff --git a/renku/core/migration/migrate.py b/renku/core/migration/migrate.py index 3a164ae33c..9253de02ac 100644 --- a/renku/core/migration/migrate.py +++ b/renku/core/migration/migrate.py @@ -49,6 +49,7 @@ from renku.core.migration.models.migration import MigrationContext, MigrationType from renku.core.migration.utils import OLD_METADATA_PATH, is_using_temporary_datasets_path, read_project_version from renku.core.util import communication +from renku.domain_model.project import ProjectTemplateMetadata from renku.domain_model.project_context import project_context try: @@ -56,7 +57,7 @@ except ImportError: import importlib.resources as importlib_resources # type: ignore -SUPPORTED_PROJECT_VERSION = 9 +SUPPORTED_PROJECT_VERSION = 10 def check_for_migration(): @@ -100,6 +101,7 @@ def migrate_project( NOTE: The project path must be pushed to the project_context before calling this function. Args: + project_gateway(IProjectGateway): The injected project gateway. force_template_update: Whether to force update the template (Default value = False). skip_template_update: Whether to skip updating the template (Default value = False). skip_docker_update: Whether to skip updating the Dockerfile (Default value = False). @@ -122,7 +124,13 @@ def migrate_project( except ValueError: project = None - if not skip_template_update and project and project.template_source: + if ( + not skip_template_update + and project + and hasattr(project, "template_metadata") + and isinstance(project.template_metadata, ProjectTemplateMetadata) + and project.template_metadata.template_source + ): try: template_updated = _update_template() except TemplateUpdateError: diff --git a/renku/core/migration/models/v10.py b/renku/core/migration/models/v10.py new file mode 100644 index 0000000000..306e22ab7d --- /dev/null +++ b/renku/core/migration/models/v10.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2022 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Migration models V10.""" + +from datetime import datetime +from typing import TYPE_CHECKING, List, Optional, cast +from urllib.parse import quote + +import persistent + +from renku.core.util.datetime8601 import fix_datetime, local_now, parse_date +from renku.core.util.git import get_git_user +from renku.core.util.os import normalize_to_ascii +from renku.domain_model.provenance.agent import Person +from renku.domain_model.provenance.annotation import Annotation +from renku.version import __minimum_project_version__ + +if TYPE_CHECKING: + from renku.domain_model.project_context import ProjectRemote + from renku.infrastructure.repository import Repository + + +class Project(persistent.Persistent): + """Represent a project.""" + + keywords: List[str] = list() + + # NOTE: the minimum version of renku to needed to work with a project + # This should be bumped on metadata version changes and when we do not forward-compatible on-the-fly migrations + minimum_renku_version: str = __minimum_project_version__ + + def __init__( + self, + *, + agent_version: Optional[str] = None, + annotations: Optional[List[Annotation]] = None, + automated_update: bool = False, + creator: Person, + date_created: Optional[datetime] = None, + description: Optional[str] = None, + id: Optional[str] = None, + immutable_template_files: Optional[List[str]] = None, + name: Optional[str] = None, + template_id: Optional[str] = None, + template_metadata: str = "{}", + template_ref: Optional[str] = None, + template_source: Optional[str] = None, + template_version: Optional[str] = None, + version: Optional[str] = None, + keywords: Optional[List[str]] = None, + ): + from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION + + version = cast(str, version or SUPPORTED_PROJECT_VERSION) + date_created = parse_date(date_created) or local_now() + + if id is None: + namespace, generated_name = Project.get_namespace_and_name(name=name, creator=creator) + assert generated_name is not None, "Cannot generate Project id with no name" + id = Project.generate_id(namespace=namespace, name=generated_name) + + self.agent_version: Optional[str] = agent_version + self.annotations: List[Annotation] = annotations or [] + self.automated_update: bool = automated_update + self.creator: Person = creator + self.date_created: datetime = fix_datetime(date_created) or local_now() + self.description: Optional[str] = description + self.id: str = id + self.immutable_template_files: Optional[List[str]] = immutable_template_files + self.name: Optional[str] = name + self.template_id: Optional[str] = template_id + self.template_metadata: str = template_metadata + self.template_ref: Optional[str] = template_ref + self.template_source: Optional[str] = template_source + self.template_version: Optional[str] = template_version + self.version: str = version + self.keywords = keywords or [] + + # NOTE: We copy this over as class variables don't get saved in the DB + self.minimum_renku_version = Project.minimum_renku_version + + @staticmethod + def get_namespace_and_name( + *, + remote: Optional["ProjectRemote"] = None, + repository: Optional["Repository"] = None, + name: Optional[str] = None, + namespace: Optional[str] = None, + creator: Optional[Person] = None, + ): + """Return Project's namespace and name from various objects.""" + if remote: + namespace = namespace or remote.owner + name = name or remote.name + + if not creator and repository: + creator = get_git_user(repository=repository) + + if not namespace and creator: + namespace = creator.email.split("@")[0] + + return namespace, name + + @staticmethod + def generate_id(namespace: str, name: str): + """Generate an id for Project.""" + assert namespace, "Cannot generate Project id with no namespace" + assert name, "Cannot generate Project id with no name" + + namespace = quote(namespace.strip("/"), safe="/") + slug = normalize_to_ascii(name) + + return f"/projects/{namespace}/{slug}" diff --git a/renku/core/migration/models/v9.py b/renku/core/migration/models/v9.py index e8bb480408..475014b9a2 100644 --- a/renku/core/migration/models/v9.py +++ b/renku/core/migration/models/v9.py @@ -35,6 +35,7 @@ from attr.validators import instance_of from marshmallow import EXCLUDE, pre_dump +from renku.command.schema.agent import PersonSchema from renku.command.schema.annotation import AnnotationSchema from renku.command.schema.calamus import ( DateTimeList, @@ -49,9 +50,10 @@ renku, schema, ) -from renku.command.schema.project import ProjectSchema as NewProjectSchema +from renku.command.schema.project import ProjectSchema as V10ProjectSchema from renku.core import errors from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION +from renku.core.migration.models import v10 as new_schema from renku.core.migration.models.refs import LinkReference from renku.core.migration.utils import ( OLD_METADATA_PATH, @@ -1813,7 +1815,9 @@ class Meta: path = fields.String(prov.atLocation) _id = fields.Id(init_name="id") _label = fields.String(rdfs.label, init_name="label", load_default=None) - _project = Nested(schema.isPartOf, [ProjectSchema, NewProjectSchema], init_name="project", load_default=None) + _project = Nested( + schema.isPartOf, [ProjectSchema, "V9ProjectSchema", V10ProjectSchema], init_name="project", load_default=None + ) class OldEntitySchema(OldCommitMixinSchema): @@ -2260,3 +2264,31 @@ class Meta: unknown = EXCLUDE _processes = Nested(wfprov.wasPartOfWorkflowRun, ProcessRunSchema, reverse=True, many=True, init_name="processes") + + +class V9ProjectSchema(JsonLDSchema): + """Project Schema.""" + + class Meta: + """Meta class.""" + + rdf_type = [schema.Project, prov.Location] + model = new_schema.Project + unknown = EXCLUDE + + agent_version = StringList(schema.agent, load_default="pre-0.11.0") + annotations = Nested(oa.hasTarget, AnnotationSchema, reverse=True, many=True) + automated_update = fields.Boolean(renku.automatedTemplateUpdate, load_default=True) + creator = Nested(schema.creator, PersonSchema, load_default=None) + date_created = DateTimeList(schema.dateCreated, load_default=None, format="iso", extra_formats=("%Y-%m-%d",)) + description = fields.String(schema.description, load_default=None) + id = fields.Id(load_default=None) + immutable_template_files = fields.List(renku.immutableTemplateFiles, fields.String(), load_default=list()) + name = fields.String(schema.name, load_default=None) + template_id = fields.String(renku.templateId, load_default=None) + template_metadata = fields.String(renku.templateMetadata, load_default=None) + template_ref = fields.String(renku.templateReference, load_default=None) + template_source = fields.String(renku.templateSource, load_default=None) + template_version = fields.String(renku.templateVersion, load_default=None) + version = StringList(schema.schemaVersion, load_default="1") + keywords = fields.List(schema.keywords, fields.String(), load_default=None) diff --git a/renku/core/migration/utils/conversion.py b/renku/core/migration/utils/conversion.py index 8aa32119aa..942c5926f8 100644 --- a/renku/core/migration/utils/conversion.py +++ b/renku/core/migration/utils/conversion.py @@ -196,6 +196,7 @@ def convert_license(license): date_created=dataset.date_created, date_published=dataset.date_published, date_removed=None, + date_modified=dataset.date_created or dataset.date_published, derived_from=convert_derived_from(dataset.derived_from, dataset.same_as), description=dataset.description, id=id, diff --git a/renku/core/template/template.py b/renku/core/template/template.py index ca4596161d..a59d25cc75 100644 --- a/renku/core/template/template.py +++ b/renku/core/template/template.py @@ -24,7 +24,7 @@ import tempfile from enum import Enum, IntEnum, auto from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast +from typing import Any, Dict, List, Optional, Tuple, cast from packaging.version import Version @@ -50,8 +50,7 @@ except ImportError: import importlib.resources as importlib_resources # type:ignore -if TYPE_CHECKING: - from renku.domain_model.project import Project +from renku.domain_model.project import Project, ProjectTemplateMetadata TEMPLATE_KEEP_FILES = ["readme.md", "readme.rst", "readme.txt", "readme"] TEMPLATE_INIT_APPEND_FILES = [".gitignore"] @@ -126,13 +125,14 @@ def copy_template_metadata_to_project(): """Update template-related metadata in a project.""" write_template_checksum(rendered_template.checksums) - project.template_source = rendered_template.template.source - project.template_ref = rendered_template.template.reference - project.template_id = rendered_template.template.id - project.template_version = rendered_template.template.version - project.immutable_template_files = rendered_template.template.immutable_files.copy() - project.automated_update = rendered_template.template.allow_update - project.template_metadata = json.dumps(rendered_template.metadata) + project.template_metadata = ProjectTemplateMetadata( + template_id=rendered_template.template.id, + template_source=rendered_template.template.source, + template_ref=rendered_template.template.reference, + template_version=rendered_template.template.version, + immutable_template_files=rendered_template.template.immutable_files.copy(), + metadata=json.dumps(rendered_template.metadata), + ) actions_mapping: Dict[FileAction, Tuple[str, str]] = { FileAction.APPEND: ("append", "Appending to"), @@ -188,7 +188,7 @@ def get_file_actions( old_checksums = read_template_checksum() try: - immutable_files = project_context.project.immutable_template_files or [] + immutable_files = project_context.project.template_metadata.immutable_template_files or [] except (AttributeError, ValueError): # NOTE: Project is not set immutable_files = [] diff --git a/renku/core/template/usecase.py b/renku/core/template/usecase.py index 4ca3618188..053188ec23 100644 --- a/renku/core/template/usecase.py +++ b/renku/core/template/usecase.py @@ -102,7 +102,7 @@ def set_template( """Set template for a project.""" project = project_context.project - if project.template_source and not force: + if project.template_metadata.template_source and not force: raise errors.TemplateUpdateError("Project already has a template: To set a template use '-f/--force' flag") templates_source = fetch_templates_source(source=source, reference=reference) diff --git a/renku/core/workflow/activity.py b/renku/core/workflow/activity.py index 7688d08762..c4bc2a450f 100644 --- a/renku/core/workflow/activity.py +++ b/renku/core/workflow/activity.py @@ -30,7 +30,6 @@ from renku.core import errors from renku.core.interface.activity_gateway import IActivityGateway from renku.core.util import communication -from renku.core.util.datetime8601 import local_now from renku.core.workflow.plan import get_activities, is_plan_removed, remove_plan from renku.domain_model.entity import Entity from renku.domain_model.project_context import project_context @@ -471,8 +470,6 @@ def revert_activity( """ repository = project_context.repository - delete_time = local_now() - def delete_associated_plan(activity): if not delete_plan: return @@ -483,7 +480,7 @@ def delete_associated_plan(activity): if used_by_other_activities: return - remove_plan(name_or_id=plan.id, force=True, when=delete_time) + remove_plan(name_or_id=plan.id, force=True) def revert_generations(activity) -> Tuple[Set[str], Set[str]]: """Either revert each generation to an older version (created by an earlier activity) or delete it.""" @@ -541,7 +538,7 @@ def revert_generations(activity) -> Tuple[Set[str], Set[str]]: revert_generations(activity) activity_gateway.remove(activity, force=force) # NOTE: Delete the activity after processing metadata or otherwise we won't see the activity as the latest generator - activity.delete(when=delete_time) + activity.delete() return activity diff --git a/renku/core/workflow/plan.py b/renku/core/workflow/plan.py index 6689e6549d..35d38e6595 100644 --- a/renku/core/workflow/plan.py +++ b/renku/core/workflow/plan.py @@ -19,7 +19,7 @@ import itertools import os -from datetime import datetime, timedelta +from datetime import timedelta from pathlib import Path from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Union, cast, overload @@ -34,7 +34,6 @@ from renku.core.interface.project_gateway import IProjectGateway from renku.core.plugin.workflow_file_parser import read_workflow_file from renku.core.util import communication -from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_git_user from renku.core.util.os import are_paths_related, get_relative_paths from renku.core.util.util import NO_VALUE, NoValueType @@ -224,7 +223,7 @@ def show_workflow(name_or_id_or_path: str, activity_gateway: IActivityGateway, w @inject.autoparams("plan_gateway") @validate_arguments(config=dict(arbitrary_types_allowed=True)) -def remove_plan(name_or_id: str, force: bool, plan_gateway: IPlanGateway, when: datetime = local_now()): +def remove_plan(name_or_id: str, force: bool, plan_gateway: IPlanGateway): """Remove the workflow by its name or id. Args: @@ -263,7 +262,7 @@ def remove_plan(name_or_id: str, force: bool, plan_gateway: IPlanGateway, when: communication.confirm(prompt_text, abort=True, warning=True) derived_plan = latest_version.derive() - derived_plan.delete(when=when) + derived_plan.delete() plan_gateway.add(derived_plan) diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index 5a8f8fea87..33dd9ec47a 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -396,6 +396,18 @@ "maxCount": 1, "sh:pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}" }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateModified", + "datatype": { + "@id": "xsd:string" + }, + "sh:moreThanOrEquals": { + "@id": "schema:dateCreated" + }, + "maxCount": 1, + "sh:pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}" + }, { "nodeKind": "sh:Literal", "path": "prov:invalidatedAtTime", @@ -726,7 +738,7 @@ "@id": "prov:Entity" }, "maxCount": 1, - "sh:pattern": "http(s)?://[^/]+/remote-entity/\\S+" + "sh:pattern": "http(s)?://[^/]+/remote-entities/\\S+" }, { "nodeKind": "sh:Literal", @@ -1128,6 +1140,19 @@ "maxCount": 1, "sh:pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}" }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateModified", + "datatype": { + "@id": "xsd:string" + }, + "sh:moreThanOrEquals": { + "@id": "schema:dateCreated" + }, + "minCount": 1, + "maxCount": 1, + "sh:pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}" + }, { "nodeKind": "sh:Literal", "path": "prov:invalidatedAtTime", diff --git a/renku/domain_model/dataset.py b/renku/domain_model/dataset.py index 43679bef4e..345399bede 100644 --- a/renku/domain_model/dataset.py +++ b/renku/domain_model/dataset.py @@ -215,7 +215,7 @@ def generate_id(checksum: str, path: Union[Path, str], url: str) -> str: parsed_url = urlparse(url) prefix = quote(posixpath.join(parsed_url.netloc, parsed_url.path)) path = quote(str(path)) - return f"/remote-entity/{prefix}/{checksum}/{path}" + return f"/remote-entities/{prefix}/{checksum}/{path}" def __eq__(self, other): if self is other: @@ -336,6 +336,7 @@ def __init__( date_created: Optional[datetime] = None, date_published: Optional[datetime] = None, date_removed: Optional[datetime] = None, + date_modified: Optional[datetime] = None, derived_from: Optional[Url] = None, description: Optional[str] = None, id: Optional[str] = None, @@ -376,7 +377,7 @@ def __init__( # `dataset_files` includes existing files and those that have been removed in the previous version self.dataset_files: List[DatasetFile] = dataset_files or [] self.date_created: Optional[datetime] = date_created - self.date_modified: datetime = local_now() + self.date_modified: datetime = date_modified or local_now() self.date_published: Optional[datetime] = fix_datetime(date_published) self.date_removed: Optional[datetime] = fix_datetime(date_removed) self.derived_from: Optional[Url] = derived_from @@ -491,9 +492,9 @@ def derive_from( self.initial_identifier = dataset.initial_identifier self.derived_from = Url(url_id=dataset.id) self.same_as = None - self.date_created = date_created or local_now() + self.date_created = date_created or dataset.date_created self.date_modified = local_now() - self.date_published = None + self.date_published = dataset.date_published if creator and hasattr(creator, "email") and not any(c for c in self.creators if c.email == creator.email): self.creators.append(creator) diff --git a/renku/domain_model/project.py b/renku/domain_model/project.py index c5431463d1..3d21bb4c52 100644 --- a/renku/domain_model/project.py +++ b/renku/domain_model/project.py @@ -17,6 +17,7 @@ # limitations under the License. """Project class.""" +from dataclasses import dataclass from datetime import datetime from typing import TYPE_CHECKING, Dict, List, Optional, cast from urllib.parse import quote @@ -37,6 +38,18 @@ from renku.infrastructure.repository import Repository +@dataclass +class ProjectTemplateMetadata: + """Metadata about the template used in a project.""" + + template_id: Optional[str] = None + metadata: str = "" + template_ref: Optional[str] = None + template_source: Optional[str] = None + template_version: Optional[str] = None + immutable_template_files: Optional[List[str]] = None + + class Project(persistent.Persistent): """Represent a project.""" @@ -51,18 +64,12 @@ def __init__( *, agent_version: Optional[str] = None, annotations: Optional[List[Annotation]] = None, - automated_update: bool = False, creator: Person, date_created: Optional[datetime] = None, description: Optional[str] = None, id: Optional[str] = None, - immutable_template_files: Optional[List[str]] = None, name: Optional[str] = None, - template_id: Optional[str] = None, - template_metadata: str = "{}", - template_ref: Optional[str] = None, - template_source: Optional[str] = None, - template_version: Optional[str] = None, + template_metadata: Optional[ProjectTemplateMetadata] = None, version: Optional[str] = None, keywords: Optional[List[str]] = None, ): @@ -76,23 +83,18 @@ def __init__( assert generated_name is not None, "Cannot generate Project id with no name" id = Project.generate_id(namespace=namespace, name=generated_name) + self.name: Optional[str] = name self.agent_version: Optional[str] = agent_version self.annotations: List[Annotation] = annotations or [] - self.automated_update: bool = automated_update self.creator: Person = creator self.date_created: datetime = fix_datetime(date_created) or local_now() self.description: Optional[str] = description self.id: str = id - self.immutable_template_files: Optional[List[str]] = immutable_template_files - self.name: Optional[str] = name - self.template_id: Optional[str] = template_id - self.template_metadata: str = template_metadata - self.template_ref: Optional[str] = template_ref - self.template_source: Optional[str] = template_source - self.template_version: Optional[str] = template_version self.version: str = version self.keywords = keywords or [] + self.template_metadata: ProjectTemplateMetadata = template_metadata or ProjectTemplateMetadata() + # NOTE: We copy this over as class variables don't get saved in the DB self.minimum_renku_version = Project.minimum_renku_version diff --git a/renku/domain_model/template.py b/renku/domain_model/template.py index b8837ee263..59e8d8f2ae 100644 --- a/renku/domain_model/template.py +++ b/renku/domain_model/template.py @@ -515,16 +515,16 @@ def from_project(cls, project: Optional["Project"]) -> "TemplateMetadata": metadata = {} immutable_files: List[str] = [] else: - metadata = json.loads(project.template_metadata) if project.template_metadata else {} + metadata = json.loads(project.template_metadata.metadata) if project.template_metadata.metadata else {} # NOTE: Make sure project's template metadata is updated - metadata["__template_source__"] = project.template_source - metadata["__template_ref__"] = project.template_ref - metadata["__template_version__"] = project.template_version - metadata["__template_id__"] = project.template_id + metadata["__template_source__"] = project.template_metadata.template_source + metadata["__template_ref__"] = project.template_metadata.template_ref + metadata["__template_version__"] = project.template_metadata.template_version + metadata["__template_id__"] = project.template_metadata.template_id # NOTE: Ignore Project.automated_update since it's default is False and won't allow any update at all - immutable_files = project.immutable_template_files or [] + immutable_files = project.template_metadata.immutable_template_files or [] # NOTE: Always set __renku_version__ to the value read from the Dockerfile (if available) since setting/updating # the template doesn't change project's metadata version and shouldn't update the Renku version either diff --git a/renku/domain_model/workflow/composite_plan.py b/renku/domain_model/workflow/composite_plan.py index fe60ebdc42..360c035d53 100644 --- a/renku/domain_model/workflow/composite_plan.py +++ b/renku/domain_model/workflow/composite_plan.py @@ -50,7 +50,8 @@ def __init__( description: Optional[str] = None, id: str, date_created: Optional[datetime] = None, - invalidated_at: Optional[datetime] = None, + date_modified: Optional[datetime] = None, + date_removed: Optional[datetime] = None, keywords: Optional[List[str]] = None, links: Optional[List[ParameterLink]] = None, mappings: Optional[List[ParameterMapping]] = None, @@ -65,7 +66,8 @@ def __init__( description=description, id=id, date_created=date_created, - invalidated_at=invalidated_at, + date_modified=date_modified, + date_removed=date_removed, keywords=keywords, name=name, project_id=project_id, @@ -396,7 +398,7 @@ def derive(self, creator: Optional[Person] = None) -> "CompositePlan": """Create a new ``CompositePlan`` that is derived from self.""" derived = copy.copy(self) derived.derived_from = self.id - derived.date_created = local_now() + derived.date_modified = local_now() derived.plans = self.plans.copy() derived.mappings = self.mappings.copy() derived.links = self.links.copy() diff --git a/renku/domain_model/workflow/plan.py b/renku/domain_model/workflow/plan.py index 469e3da395..c7d2dad59c 100644 --- a/renku/domain_model/workflow/plan.py +++ b/renku/domain_model/workflow/plan.py @@ -56,7 +56,8 @@ def __init__( description: Optional[str] = None, id: str, date_created: Optional[datetime] = None, - invalidated_at: Optional[datetime] = None, + date_modified: Optional[datetime] = None, + date_removed: Optional[datetime] = None, keywords: Optional[List[str]] = None, name: Optional[str] = None, project_id: Optional[str] = None, @@ -66,7 +67,8 @@ def __init__( self.description: Optional[str] = description self.id: str = id self.date_created: datetime = date_created or local_now() - self.invalidated_at: Optional[datetime] = invalidated_at + self.date_modified: datetime = date_modified or local_now() + self.date_removed: Optional[datetime] = date_removed self.keywords: List[str] = keywords or [] if creators: @@ -87,7 +89,7 @@ def __repr__(self): @property def deleted(self) -> bool: """True if plan is deleted.""" - return self.invalidated_at is not None + return self.date_removed is not None @staticmethod def generate_id(*, uuid: Optional[str] = None, **_) -> str: @@ -151,14 +153,14 @@ def is_derivation(self) -> bool: """Return if an ``AbstractPlan`` has correct derived_from.""" raise NotImplementedError() - def delete(self, *, when: datetime = local_now()): + def delete(self): """Mark a plan as deleted. NOTE: Don't call this function for deleting plans since it doesn't delete the whole plan derivatives chain. Use renku.core.workflow.plan::remove_plan instead. """ self.unfreeze() - self.invalidated_at = when + self.date_removed = local_now() self.freeze() @@ -177,12 +179,13 @@ def __init__( command: str, creators: Optional[List[Person]] = None, date_created: Optional[datetime] = None, + date_modified: Optional[datetime] = None, derived_from: Optional[str] = None, description: Optional[str] = None, hidden_inputs: List[HiddenInput] = None, id: str, inputs: Optional[List[CommandInput]] = None, - invalidated_at: Optional[datetime] = None, + date_removed: Optional[datetime] = None, keywords: Optional[List[str]] = None, name: Optional[str] = None, outputs: Optional[List[CommandOutput]] = None, @@ -201,7 +204,8 @@ def __init__( id=id, description=description, date_created=date_created, - invalidated_at=invalidated_at, + date_modified=date_modified, + date_removed=date_removed, keywords=keywords, name=name, project_id=project_id, diff --git a/renku/infrastructure/database.py b/renku/infrastructure/database.py index 9fa91fc540..de125611ff 100644 --- a/renku/infrastructure/database.py +++ b/renku/infrastructure/database.py @@ -351,16 +351,25 @@ def get(self, oid: OID_TYPE) -> persistent.Persistent: return object - def get_from_path(self, path: str, absolute: bool = False) -> persistent.Persistent: + def get_from_path( + self, path: str, absolute: bool = False, override_type: Optional[str] = None + ) -> persistent.Persistent: """Load a database object from a path. Args: path(str): Path of the database object. - absolute(bool): Whether the path is absolute or a filename inside the database (Default value: False). + absolute(bool): Whether the path is absolute or a filename inside the database (Default value = False). + override_type(Optional[str]): load object as a different type than what is set inside `renku_data_type` + (Default value = None). Returns: persistent.Persistent: The object. """ data = self._storage.load(filename=path, absolute=absolute) + if override_type is not None: + if "@renku_data_type" not in data: + raise errors.IncompatibleParametersError("Cannot override type on found data.") + + data["@renku_data_type"] = override_type object = self._reader.deserialize(data) object._p_changed = 0 object._p_serial = PERSISTED @@ -779,12 +788,12 @@ def store(self, filename: str, data: Union[Dict, List], compress=False, absolute self.path.mkdir(parents=True, exist_ok=True) if compress: - with open(path, "wb") as f, self.zstd_compressor.stream_writer(f) as compressor: + with open(path, "wb") as fb, self.zstd_compressor.stream_writer(fb) as compressor: with io.TextIOWrapper(compressor) as out: json.dump(data, out, ensure_ascii=False) else: - with open(path, "wt") as f: # type: ignore - json.dump(data, f, ensure_ascii=False, sort_keys=True, indent=2) # type: ignore + with open(path, "wt") as ft: + json.dump(data, ft, ensure_ascii=False, sort_keys=True, indent=2) def load(self, filename: str, absolute: bool = False): """Load data for object with object id oid. diff --git a/renku/infrastructure/git_merger.py b/renku/infrastructure/git_merger.py index 27331f71de..73c7a0377c 100644 --- a/renku/infrastructure/git_merger.py +++ b/renku/infrastructure/git_merger.py @@ -268,26 +268,14 @@ def merge_projects(self, local: Project, remote: Project, base: Optional[Project or local.description != base.description or local.annotations != base.annotations ) - local_template_changed = ( - base is None - or local.template_id != base.template_id - or local.template_ref != base.template_ref - or local.template_source != base.template_source - or local.template_version != base.template_version - ) + local_template_changed = base is None or local.template_metadata != base.template_metadata remote_changed = ( base is None or remote.keywords != base.keywords or remote.description != base.description or remote.annotations != base.annotations ) - remote_template_changed = ( - base is None - or remote.template_id != base.template_id - or remote.template_ref != base.template_ref - or remote.template_source != base.template_source - or remote.template_version != base.template_version - ) + remote_template_changed = base is None or remote.template_metadata != base.template_metadata if (local_changed or local_template_changed) and not remote_changed and not remote_template_changed: return local @@ -333,34 +321,24 @@ def merge_projects(self, local: Project, remote: Project, base: Optional[Project # NOTE: Merge conflicts! action = communication.prompt( "Merge conflict detected:\n Project template modified/update in both remote and local branch.\n" - f"local: {local.template_source}@{local.template_ref}:{local.template_id}, " - "version {local.template_version}\n" - f"remote: {remote.template_source}@{remote.template_ref}:{remote.template_id}, " - "version {remote.template_version}\n" + f"local: {local.template_metadata.template_source}@{local.template_metadata.template_ref}:" + f"{local.template_metadata.template_id}, " + "version {local.template_metadata.template_version}\n" + f"remote: {remote.template_metadata.template_source}@{remote.template_metadata.template_ref}:" + f"{remote.template_metadata.template_id}, " + "version {remote.template_metadata.template_version}\n" "Which do you want to keep?\n[l]ocal, [r]emote, [a]bort:", default="a", ) if action == "r": - local.template_id = remote.template_id - local.template_ref = remote.template_ref - local.template_source = remote.template_source - local.template_version = remote.template_version local.template_metadata = remote.template_metadata - local.immutable_template_files = remote.immutable_template_files - local.automated_update = remote.automated_update elif action == "a": raise errors.MetadataMergeError("Merge aborted") elif action != "l": raise errors.MetadataMergeError(f"Invalid merge option selected: {action}") elif remote_template_changed: - local.template_id = remote.template_id - local.template_ref = remote.template_ref - local.template_source = remote.template_source - local.template_version = remote.template_version local.template_metadata = remote.template_metadata - local.immutable_template_files = remote.immutable_template_files - local.automated_update = remote.automated_update return local diff --git a/renku/ui/api/models/plan.py b/renku/ui/api/models/plan.py index 2c19aa0f98..0673f499d6 100644 --- a/renku/ui/api/models/plan.py +++ b/renku/ui/api/models/plan.py @@ -84,7 +84,7 @@ def from_plan(cls, plan: core_plan.Plan) -> "Plan": return cls( command=plan.command, date_created=plan.date_created, - deleted=plan.invalidated_at is not None, + deleted=plan.date_removed is not None, description=plan.description, id=plan.id, inputs=[Input.from_parameter(i) for i in plan.inputs], @@ -160,7 +160,7 @@ def from_composite_plan(cls, composite_plan: core_composite_plan.CompositePlan) """ return cls( date_created=composite_plan.date_created, - deleted=composite_plan.invalidated_at is not None, + deleted=composite_plan.date_removed is not None, description=composite_plan.description, id=composite_plan.id, keywords=composite_plan.keywords, diff --git a/tests/cli/fixtures/cli_old_projects.py b/tests/cli/fixtures/cli_old_projects.py index 6353f1ef79..15ee661485 100644 --- a/tests/cli/fixtures/cli_old_projects.py +++ b/tests/cli/fixtures/cli_old_projects.py @@ -29,7 +29,9 @@ from tests.utils import clone_compressed_repository -@pytest.fixture(params=["old-datasets-v0.3.0.git", "old-datasets-v0.5.1.git", "test-renku-v0.3.0.git"]) +@pytest.fixture( + params=["old-datasets-v0.3.0.git", "old-datasets-v0.5.1.git", "test-renku-v0.3.0.git", "pre-2.0-project.git"] +) def old_project(request, tmp_path) -> Generator[RenkuProject, None, None]: """Prepares a testing repo created by old version of renku.""" from renku.core.util.contexts import chdir diff --git a/tests/cli/test_init.py b/tests/cli/test_init.py index 5be8fb7a51..f40f1aa10c 100644 --- a/tests/cli/test_init.py +++ b/tests/cli/test_init.py @@ -301,7 +301,7 @@ def test_init_new_metadata_defaults(isolated_runner, project_init): assert 0 == result.exit_code, format_result_exception(result) project = Database.from_path(Path(data["test_project"]) / ".renku" / "metadata").get("project") - metadata = json.loads(project.template_metadata) + metadata = json.loads(project.template_metadata.metadata) assert True is metadata["bool_var"] assert "ask again" == metadata["enum_var"] assert "some description" == metadata["description"] @@ -324,7 +324,7 @@ def test_init_new_metadata_defaults_is_overwritten(isolated_runner, project_init assert 0 == result.exit_code, format_result_exception(result) project = Database.from_path(Path(data["test_project"]) / ".renku" / "metadata").get("project") - metadata = json.loads(project.template_metadata) + metadata = json.loads(project.template_metadata.metadata) assert metadata["bool_var"] is False assert "maybe" == metadata["enum_var"] assert "some description" == metadata["description"] @@ -459,7 +459,7 @@ def test_init_with_description(isolated_runner, template): assert "new project" == project.name assert project.id.endswith("new-project") # make sure id uses slug version of name without space - assert "my project description" in project.template_metadata + assert "my project description" in project.template_metadata.metadata assert "my project description" == project.description readme_content = (Path("new project") / "README.md").read_text() diff --git a/tests/cli/test_migrate.py b/tests/cli/test_migrate.py index a5858f24ea..2a44a3d10b 100644 --- a/tests/cli/test_migrate.py +++ b/tests/cli/test_migrate.py @@ -124,24 +124,6 @@ def test_correct_relative_path(isolated_runner, old_project, with_injection): assert len(list(datasets_provenance.datasets)) > 0 -@pytest.mark.migration -def test_remove_committed_lock_file(isolated_runner, old_project): - """Check that renku lock file has been successfully removed from git.""" - (old_project.path / ".renku.lock").write_text("lock") - - old_project.repository.add(".renku.lock", force=True) - old_project.repository.commit("locked") - - result = isolated_runner.invoke(cli, ["migrate", "--strict"]) - assert 0 == result.exit_code, format_result_exception(result) - - assert not (old_project.path / ".renku.lock").exists() - assert not old_project.repository.is_dirty(untracked_files=True) - - ignored = (old_project.path / ".gitignore").read_text() - assert ".renku.lock" in ignored - - @pytest.mark.migration def test_graph_building_after_migration(isolated_runner, old_project): """Check that structural migration did not break graph building.""" @@ -417,31 +399,34 @@ def test_migrate_can_preserve_dataset_ids(isolated_runner, old_dataset_project, @pytest.mark.migration -def test_migrate_preserves_creation_date_when_preserving_ids(isolated_runner, old_dataset_project): - """Test migrate doesn't change dataset's dateCreated when --preserve-identifiers is passed.""" +def test_migrate_preserves_date_when_preserving_ids(isolated_runner, old_dataset_project): + """Test migrate doesn't change dataset's dateCreated/Modified when --preserve-identifiers is passed.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict", "--preserve-identifiers"]).exit_code dataset = get_dataset_with_injection("mixed") assert "2020-08-10 21:35:20+00:00" == dataset.date_created.isoformat(" ") + assert "2020-08-10 21:35:20+00:00" == dataset.date_modified.isoformat(" ") @pytest.mark.migration @pytest.mark.parametrize("old_dataset_project", ["old-datasets-v0.16.0.git"], indirect=True) -def test_migrate_preserves_creation_date_for_mutated_datasets(isolated_runner, old_dataset_project): - """Test migration of datasets that were mutated keeps original dateCreated.""" +def test_migrate_preserves_date_for_mutated_datasets(isolated_runner, old_dataset_project): + """Test migration of datasets that were mutated keeps original dateCreated/Modified.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict"]).exit_code dataset = get_dataset_with_injection("local") - assert "2021-07-23 14:34:58+00:00" == dataset.date_created.isoformat(" ") + assert "2021-07-23 14:34:24+00:00" == dataset.date_created.isoformat(" ") + assert "2021-07-23 14:34:58+00:00" == dataset.date_modified.isoformat(" ") @pytest.mark.migration -def test_migrate_sets_correct_creation_date_for_non_mutated_datasets(isolated_runner, old_dataset_project): - """Test migration of datasets that weren't mutated uses commit date as dateCreated.""" +def test_migrate_sets_correct_date_for_non_mutated_datasets(isolated_runner, old_dataset_project): + """Test migration of datasets that weren't mutated uses commit date as dateCreated/Modified.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict"]).exit_code dataset = get_dataset_with_injection("mixed") - assert "2020-08-10 23:35:56+02:00" == dataset.date_created.isoformat(" ") + assert "2020-08-10 21:35:20+00:00" == dataset.date_created.isoformat(" ") + assert "2020-08-10 23:35:56+02:00" == dataset.date_modified.isoformat(" ") diff --git a/tests/cli/test_template.py b/tests/cli/test_template.py index de69cb306d..2f629ab51a 100644 --- a/tests/cli/test_template.py +++ b/tests/cli/test_template.py @@ -135,7 +135,7 @@ def test_template_set_failure(runner, project, with_injection): assert 1 == result.exit_code, format_result_exception(result) assert "Project already has a template" in result.output with with_injection(): - assert "python-minimal" == project_context.project.template_id + assert "python-minimal" == project_context.project.template_metadata.template_id def test_template_set(runner, project, with_injection): @@ -146,9 +146,9 @@ def test_template_set(runner, project, with_injection): assert 0 == result.exit_code, format_result_exception(result) with with_injection(): - assert "R-minimal" == project_context.project.template_id - assert __template_version__ == project_context.project.template_version - assert __template_version__ == project_context.project.template_ref + assert "R-minimal" == project_context.project.template_metadata.template_id + assert __template_version__ == project_context.project.template_metadata.template_version + assert __template_version__ == project_context.project.template_metadata.template_ref result = runner.invoke(cli, ["graph", "export", "--format", "json-ld", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) @@ -162,7 +162,7 @@ def test_template_set_overwrites_modified(runner, project, with_injection): assert 0 == result.exit_code, format_result_exception(result) with with_injection(): - assert "R-minimal" == project_context.project.template_id + assert "R-minimal" == project_context.project.template_metadata.template_id assert "my-modifications" not in (project.path / "Dockerfile").read_text() assert not project.repository.is_dirty(untracked_files=True) @@ -176,7 +176,7 @@ def test_template_set_interactive(runner, project, with_injection, overwrite, fo assert 0 == result.exit_code, format_result_exception(result) with with_injection(): - assert "R-minimal" == project_context.project.template_id + assert "R-minimal" == project_context.project.template_metadata.template_id assert ("my-modifications" in (project.path / "Dockerfile").read_text()) is found assert not project.repository.is_dirty(untracked_files=True) @@ -239,18 +239,18 @@ def test_template_update(runner, project, with_injection): assert 0 == result.exit_code, format_result_exception(result) with with_injection(): - assert "python-minimal" == project_context.project.template_id - assert "0.3.2" == project_context.project.template_ref - assert "b9ab266fba136bdecfa91dc8d7b6d36b9d427012" == project_context.project.template_version + assert "python-minimal" == project_context.project.template_metadata.template_id + assert "0.3.2" == project_context.project.template_metadata.template_ref + assert "b9ab266fba136bdecfa91dc8d7b6d36b9d427012" == project_context.project.template_metadata.template_version result = runner.invoke(cli, ["template", "update"]) assert 0 == result.exit_code, format_result_exception(result) assert "Template is up-to-date" not in result.output with with_injection(): - assert "python-minimal" == project_context.project.template_id - assert Version(project_context.project.template_ref) > Version("0.3.2") - assert "6c59d8863841baeca8f30062fd16c650cf67da3b" != project_context.project.template_version + assert "python-minimal" == project_context.project.template_metadata.template_id + assert Version(project_context.project.template_metadata.template_ref) > Version("0.3.2") + assert "6c59d8863841baeca8f30062fd16c650cf67da3b" != project_context.project.template_metadata.template_version result = runner.invoke(cli, ["template", "update"]) diff --git a/tests/core/commands/test_graph.py b/tests/core/commands/test_graph.py index 9c882fc5e9..ff8dcfbf88 100644 --- a/tests/core/commands/test_graph.py +++ b/tests/core/commands/test_graph.py @@ -87,6 +87,7 @@ def input_objects(request, dataset_model, activity_model, plan_model): "@type": ["http://schema.org/Dataset", "http://www.w3.org/ns/prov#Entity"], "http://schema.org/creator": [], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/hasPart": [{"@id": "/dataset-files/2ac728d65fec48afbfa5e027eb3abd71"}], "http://schema.org/identifier": [{"@value": "14249f1571fb4a2786ddeb7f706b9833"}], "http://schema.org/image": [], @@ -172,6 +173,7 @@ def input_objects(request, dataset_model, activity_model, plan_model): ], "http://schema.org/creator": [{"@id": "mailto:john.doe@example.com"}], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/keywords": [], "http://schema.org/name": [{"@value": "my-plan"}], "https://swissdatasciencecenter.github.io/renku-ontology#command": [{"@value": "echo"}], @@ -210,6 +212,7 @@ def input_objects(request, dataset_model, activity_model, plan_model): ], "http://schema.org/creator": [{"@id": "mailto:john.doe@example.com"}], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/keywords": [], "http://schema.org/name": [{"@value": "my-plan"}], "https://swissdatasciencecenter.github.io/renku-ontology#command": [{"@value": "echo"}], @@ -268,12 +271,15 @@ def test_graph_export_full(): ], derived_from=Url(url_id="/datasets/0000000aaaaaaa"), initial_identifier="abcdefg", + date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), ) ] dataset_gateway.get_by_id.return_value = Dataset( id="/datasets/0000000aaaaaaa", name="my-dataset", date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), identifier="abcdefg", initial_identifier="abcdefg", ) @@ -282,6 +288,7 @@ def test_graph_export_full(): spec=DatasetTag, dataset_id=Url(url="/datasets/abcdefg12345"), date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), id="/dataset-tags/my-tag", name="my-tag", ) @@ -292,6 +299,7 @@ def test_graph_export_full(): command="echo", name="echo", date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), creators=[Person(email="test@example.com", name="John Doe")], ) @@ -314,6 +322,7 @@ def test_graph_export_full(): id="/plans/composite1", name="composite", date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), creators=[Person(email="test@example.com", name="John Doe")], plans=[plan], ), @@ -352,6 +361,7 @@ def test_graph_export_full(): ], "http://schema.org/creator": [{"@id": "mailto:test@example.com"}], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/keywords": [], "http://schema.org/name": [{"@value": "echo"}], "https://swissdatasciencecenter.github.io/renku-ontology#command": [{"@value": "echo"}], @@ -370,6 +380,7 @@ def test_graph_export_full(): ], "http://schema.org/creator": [{"@id": "mailto:test@example.com"}], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/keywords": [], "http://schema.org/name": [{"@value": "composite"}], "https://swissdatasciencecenter.github.io/renku-ontology#hasMappings": [], @@ -417,6 +428,8 @@ def test_graph_export_full(): "http://schema.org/hasPart": [{"@id": "/dataset-files/abcdefg123456789"}], "http://www.w3.org/ns/prov#wasDerivedFrom": [{"@id": "/urls/datasets/0000000aaaaaaa"}], "https://swissdatasciencecenter.github.io/renku-ontology#originalIdentifier": [{"@value": "abcdefg"}], + "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], }, { "@id": "/entities/1234567890/data/my-dataset", @@ -449,6 +462,7 @@ def test_graph_export_full(): "@type": ["http://schema.org/Dataset", "http://www.w3.org/ns/prov#Entity"], "http://schema.org/creator": [], "http://schema.org/dateCreated": [{"@value": "2022-07-12T16:29:14+02:00"}], + "http://schema.org/dateModified": [{"@value": "2022-07-12T16:29:14+02:00"}], "http://schema.org/hasPart": [], "http://schema.org/identifier": [{"@value": "abcdefg"}], "http://schema.org/image": [], diff --git a/tests/core/commands/test_log.py b/tests/core/commands/test_log.py index 9003826c10..dfc6762ddf 100644 --- a/tests/core/commands/test_log.py +++ b/tests/core/commands/test_log.py @@ -265,7 +265,7 @@ def test_log_dataset_add_create(mocker): assert entry.details.created assert not entry.details.imported assert not entry.details.migrated - assert entry.details.modified + assert not entry.details.modified def test_log_dataset_import(mocker): @@ -323,7 +323,7 @@ def test_log_dataset_import(mocker): assert not entry.details.created assert entry.details.imported assert not entry.details.migrated - assert entry.details.modified + assert not entry.details.modified def test_log_dataset_deleted(mocker): diff --git a/tests/core/commands/test_merge.py b/tests/core/commands/test_merge.py index 97b037a053..2ca86bc8d6 100644 --- a/tests/core/commands/test_merge.py +++ b/tests/core/commands/test_merge.py @@ -22,7 +22,7 @@ from BTrees.OOBTree import BTree from renku.domain_model.dataset import Dataset -from renku.domain_model.project import Project +from renku.domain_model.project import Project, ProjectTemplateMetadata from renku.domain_model.provenance.agent import Person from renku.domain_model.workflow.plan import Plan from renku.infrastructure.database import Index @@ -309,45 +309,45 @@ def test_merge_project_both_template_changed(mocker): base_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal", - template_source="renku", - template_ref="master", - template_version="abcdef", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal", + template_source="renku", + template_ref="master", + template_version="abcdef", + ), ) local_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal1", - template_source="renku1", - template_ref="master1", - template_version="12345", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal1", + template_source="renku1", + template_ref="master1", + template_version="12345", + ), ) remote_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal2", - template_source="renku2", - template_ref="master2", - template_version="78910", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal2", + template_source="renku2", + template_ref="master2", + template_version="78910", + ), ) mocker.patch("renku.infrastructure.git_merger.communication.prompt", mocker.MagicMock(return_value="l")) result = GitMerger().merge_projects(local_project, remote_project, base_project) - assert result.template_version == local_project.template_version - assert result.template_source == local_project.template_source - assert result.template_ref == local_project.template_ref - assert result.template_id == local_project.template_id + assert result.template_metadata == local_project.template_metadata mocker.patch("renku.infrastructure.git_merger.communication.prompt", mocker.MagicMock(return_value="r")) result = GitMerger().merge_projects(local_project, remote_project, base_project) - assert result.template_version == remote_project.template_version - assert result.template_source == remote_project.template_source - assert result.template_ref == remote_project.template_ref - assert result.template_id == remote_project.template_id + assert result.template_metadata == remote_project.template_metadata def test_merge_project_local_template_changed(): @@ -355,34 +355,37 @@ def test_merge_project_local_template_changed(): base_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal", - template_source="renku", - template_ref="master", - template_version="abcdef", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal", + template_source="renku", + template_ref="master", + template_version="abcdef", + ), ) local_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal1", - template_source="renku1", - template_ref="master1", - template_version="12345", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal1", + template_source="renku1", + template_ref="master1", + template_version="12345", + ), ) remote_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal", - template_source="renku", - template_ref="master", - template_version="abcdef", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal", + template_source="renku", + template_ref="master", + template_version="abcdef", + ), ) result = GitMerger().merge_projects(local_project, remote_project, base_project) - assert result.template_version == local_project.template_version - assert result.template_source == local_project.template_source - assert result.template_ref == local_project.template_ref - assert result.template_id == local_project.template_id + assert result.template_metadata == local_project.template_metadata def test_merge_project_remote_template_changed(): @@ -390,31 +393,34 @@ def test_merge_project_remote_template_changed(): base_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal", - template_source="renku", - template_ref="master", - template_version="abcdef", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal", + template_source="renku", + template_ref="master", + template_version="abcdef", + ), ) local_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal", - template_source="renku", - template_ref="master", - template_version="abcdef", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal", + template_source="renku", + template_ref="master", + template_version="abcdef", + ), ) remote_project = Project( creator=Person.from_string("John Doe "), name="my-project", - template_id="python-minimal1", - template_source="renku1", - template_ref="master1", - template_version="12345", + template_metadata=ProjectTemplateMetadata( + template_id="python-minimal1", + template_source="renku1", + template_ref="master1", + template_version="12345", + ), ) result = GitMerger().merge_projects(local_project, remote_project, base_project) - assert result.template_version == remote_project.template_version - assert result.template_source == remote_project.template_source - assert result.template_ref == remote_project.template_ref - assert result.template_id == remote_project.template_id + assert result.template_metadata == remote_project.template_metadata diff --git a/tests/core/commands/test_workflow.py b/tests/core/commands/test_workflow.py index ee5c17132a..2cba9e4f6a 100644 --- a/tests/core/commands/test_workflow.py +++ b/tests/core/commands/test_workflow.py @@ -19,16 +19,13 @@ from contextlib import nullcontext -from datetime import datetime import pytest -from renku.command.schema.plan import PlanSchema from renku.core import errors from renku.core.workflow.model.concrete_execution_graph import ExecutionGraph from renku.core.workflow.value_resolution import CompositePlanValueResolver from renku.domain_model.workflow.composite_plan import CompositePlan -from renku.domain_model.workflow.plan import Plan def _get_nested_actual_values(run): @@ -504,14 +501,3 @@ def test_composite_plan_auto_links(composite_plan, mappings, defaults, links, ra with maybe_raises: for virtual_link in graph.virtual_links: grouped.add_link(virtual_link[0], [virtual_link[1]]) - - -def test_plan_invalidated_at_datetime_export(): - """The invalidated_at has a timezone on export.""" - plan = Plan(id=Plan.generate_id(), name="p1", command="/bin/sh") - plan.invalidated_at = datetime.utcnow() - - dumped = PlanSchema().dump(plan) - - date = datetime.fromisoformat(dumped["http://www.w3.org/ns/prov#invalidatedAtTime"]) - assert date.tzinfo is not None diff --git a/tests/core/metadata/test_plan_gateway.py b/tests/core/metadata/test_plan_gateway.py index b5c7dc8f9f..f6369203fb 100644 --- a/tests/core/metadata/test_plan_gateway.py +++ b/tests/core/metadata/test_plan_gateway.py @@ -52,12 +52,8 @@ def test_plan_gateway_newest_plans(project_with_injection): """Test getting newest plans.""" plan = Plan(id=Plan.generate_id(), name="plan", command="") plan2 = Plan(id=Plan.generate_id(), name="plan", command="") - invalidated_plan = Plan( - id=Plan.generate_id(), name="invalidated_plan", command="", invalidated_at=datetime.utcnow() - ) - invalidated_plan2 = Plan( - id=Plan.generate_id(), name="invalidated_plan", command="", invalidated_at=datetime.utcnow() - ) + invalidated_plan = Plan(id=Plan.generate_id(), name="invalidated_plan", command="", date_removed=datetime.utcnow()) + invalidated_plan2 = Plan(id=Plan.generate_id(), name="invalidated_plan", command="", date_removed=datetime.utcnow()) plan_gateway = PlanGateway() diff --git a/tests/data/pre-2.0-project.git.tar.gz b/tests/data/pre-2.0-project.git.tar.gz new file mode 100644 index 0000000000..7cfc99ebbe Binary files /dev/null and b/tests/data/pre-2.0-project.git.tar.gz differ diff --git a/tests/fixtures/domain_models.py b/tests/fixtures/domain_models.py index 789b739380..0072ae7236 100644 --- a/tests/fixtures/domain_models.py +++ b/tests/fixtures/domain_models.py @@ -64,6 +64,7 @@ def _create_dataset(name="my-dataset", num_files=2, identifier="14249f1571fb4a27 name=name, dataset_files=files, date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), ) yield _create_dataset @@ -110,6 +111,7 @@ def _create_plan(name="my-plan", command="echo", identifier="7f8bcaa36ef844528b8 name=name, command=command, date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), creators=[ Person( id=Person.generate_id(email="john.doe@example.com", full_identity="john.doe"), @@ -137,6 +139,7 @@ def _create_composite(name="my-composite", num_steps=2): name=name, plans=steps, date_created=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), + date_modified=datetime.fromisoformat("2022-07-12T16:29:14+02:00"), ) yield _create_composite diff --git a/tests/utils.py b/tests/utils.py index c25b7c275e..505ec2d058 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -89,10 +89,11 @@ def assert_dataset_is_mutated(old: "Dataset", new: "Dataset", mutator=None): assert old.identifier != new.identifier assert new.derived_from is not None assert old.id == new.derived_from.url_id - if old.date_created and new.date_created: - assert old.date_created <= new.date_created + if old.date_created: + assert old.date_created == new.date_created + if old.date_published: + assert old.date_published == new.date_published assert new.same_as is None - assert new.date_published is None assert new.identifier in new.id if mutator: @@ -115,10 +116,10 @@ def modified_environ(*remove, **update): """ env = os.environ update = update or {} - remove = remove or [] + remove_list = list(remove) or [] # List of environment variables being updated or removed. - stomped = (set(update.keys()) | set(remove)) & set(env.keys()) + stomped = (set(update.keys()) | set(remove_list)) & set(env.keys()) # Environment variables and values to restore on exit. update_after = {k: env[k] for k in stomped} # Environment variables and values to remove on exit. @@ -126,7 +127,7 @@ def modified_environ(*remove, **update): try: env.update(update) - [env.pop(k, None) for k in remove] + [env.pop(k, None) for k in remove_list] yield finally: env.update(update_after)