Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dataset): unset date_created after import #2373

Merged
merged 2 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion renku/core/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def _add_to_dataset(
dataset.update_metadata_from(with_metadata)

# TODO: Remove this once we have a proper database dispatcher for injection
# we need to commit because "project clone" changes injection, so tha database instance here
# we need to commit because "project clone" changes injection, so the database instance here
# is not the same as the one in CommandBuilder
database_gateway.commit()

Expand Down
12 changes: 6 additions & 6 deletions renku/core/management/migrations/models/v9.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
from renku.core.models.git import get_user_info
from renku.core.models.provenance.annotation import AnnotationSchema
from renku.core.models.refs import LinkReference
from renku.core.utils.datetime8601 import parse_date
from renku.core.utils.datetime8601 import fix_datetime, parse_date
from renku.core.utils.doi import extract_doi, is_doi
from renku.core.utils.urls import get_host, get_slug
from renku.version import __version__, version_url
Expand Down Expand Up @@ -1875,7 +1875,7 @@ def fix_datetimes(self, obj, many=False, **kwargs):
"""Pre dump hook."""
if many:
return [self.fix_datetimes(o, many=False, **kwargs) for o in obj]
obj.created = self._fix_timezone(obj.created)
obj.created = fix_datetime(obj.created)
return obj


Expand Down Expand Up @@ -1978,7 +1978,7 @@ def fix_datetimes(self, obj, many=False, **kwargs):
"""Pre dump hook."""
if many:
return [self.fix_datetimes(o, many=False, **kwargs) for o in obj]
object.__setattr__(obj, "created", self._fix_timezone(obj.created))
object.__setattr__(obj, "created", fix_datetime(obj.created))
return obj


Expand Down Expand Up @@ -2018,7 +2018,7 @@ def fix_datetimes(self, obj, many=False, **kwargs):
"""Pre dump hook."""
if many:
return [self.fix_datetimes(o, many=False, **kwargs) for o in obj]
obj.added = self._fix_timezone(obj.added)
obj.added = fix_datetime(obj.added)
return obj


Expand Down Expand Up @@ -2079,8 +2079,8 @@ def fix_datetimes(self, obj, many=False, **kwargs):
"""Pre dump hook."""
if many:
return [self.fix_datetimes(o, many=False, **kwargs) for o in obj]
obj.date_published = self._fix_timezone(obj.date_published)
obj.date_created = self._fix_timezone(obj.date_created)
obj.date_published = fix_datetime(obj.date_published)
obj.date_created = fix_datetime(obj.date_created)
return obj


Expand Down
6 changes: 0 additions & 6 deletions renku/core/models/calamus.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
from calamus.utils import normalize_type, normalize_value
from marshmallow.base import SchemaABC

from renku.core.utils.datetime8601 import fix_timezone

prov = fields.Namespace("http://www.w3.org/ns/prov#")
rdfs = fields.Namespace("http://www.w3.org/2000/01/rdf-schema#")
renku = fields.Namespace("https://swissdatasciencecenter.github.io/renku-ontology#")
Expand Down Expand Up @@ -80,10 +78,6 @@ def _add_field_to_data(self, data, name, value):
raise ValueError(f"Field {name} is already in data {data}")
data[name] = value

def _fix_timezone(self, value):
"""Fix timezone of non-aware datetime objects."""
return fix_timezone(value)


class Uri(fields._JsonLDField, marshmallow.fields.String, marshmallow.fields.Dict):
"""A Dict/String field."""
Expand Down
21 changes: 13 additions & 8 deletions renku/core/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from renku.core.models.entity import CollectionSchema, Entity, EntitySchema
from renku.core.models.provenance.agent import Person, PersonSchema, SoftwareAgent
from renku.core.models.provenance.annotation import Annotation, AnnotationSchema
from renku.core.utils.datetime8601 import fix_timezone, local_now, parse_date
from renku.core.utils.datetime8601 import fix_datetime, local_now, parse_date
from renku.core.utils.git import get_path
from renku.core.utils.urls import get_slug

Expand Down Expand Up @@ -237,8 +237,8 @@ def __init__(
super().__init__()

self.based_on: RemoteEntity = based_on
self.date_added: datetime = fix_timezone(date_added) or local_now()
self.date_removed: datetime = fix_timezone(date_removed)
self.date_added: datetime = fix_datetime(date_added) or local_now()
self.date_removed: datetime = fix_datetime(date_removed)
self.entity: Entity = entity
self.id: str = id or DatasetFile.generate_id()
self.is_external: bool = is_external
Expand Down Expand Up @@ -292,7 +292,7 @@ def is_equal_to(self, other: "DatasetFile"):

def remove(self, date: datetime = None):
"""Create a new instance and mark it as removed."""
date_removed = fix_timezone(date) or local_now()
date_removed = fix_datetime(date) or local_now()
self.date_removed = date_removed

def is_removed(self) -> bool:
Expand Down Expand Up @@ -341,6 +341,8 @@ def __init__(
# if `date_published` is set, we are probably dealing with an imported dataset so `date_created` is not needed
if date_published:
date_created = None
else:
date_created = fix_datetime(date_created) or local_now()
if initial_identifier is None:
assert identifier is None, "Initial identifier can be None only when creating a new Dataset."
initial_identifier = identifier = uuid4().hex
Expand All @@ -352,9 +354,9 @@ def __init__(
self.creators: List[Person] = creators or []
# `dataset_files` includes existing files and those that have been removed in the previous version
self.dataset_files: List[DatasetFile] = dataset_files or []
self.date_created: datetime = fix_timezone(date_created) or local_now()
self.date_published: datetime = fix_timezone(date_published)
self.date_removed: datetime = fix_timezone(date_removed)
self.date_created: datetime = date_created
self.date_published: datetime = fix_datetime(date_published)
self.date_removed: datetime = fix_datetime(date_removed)
self.derived_from: Url = derived_from
self.description: str = description
self.images: List[ImageObject] = images or []
Expand Down Expand Up @@ -478,7 +480,7 @@ def _assign_new_identifier(self, identifier: str):

def remove(self, date: datetime = None):
"""Mark the dataset as removed."""
self.date_removed = fix_timezone(date) or local_now()
self.date_removed = fix_datetime(date) or local_now()

def is_removed(self) -> bool:
"""Return true if dataset is removed."""
Expand Down Expand Up @@ -534,6 +536,9 @@ def update_metadata_from(self, other: "Dataset"):
value = getattr(other, name)
setattr(self, name, value)

if self.date_published is not None:
self.date_created = None

def update_metadata(self, **kwargs):
"""Updates metadata."""
editable_attributes = ["creators", "description", "keywords", "title"]
Expand Down
4 changes: 2 additions & 2 deletions renku/core/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from renku.core.models.calamus import DateTimeList, JsonLDSchema, Nested, StringList, fields, oa, prov, renku, schema
from renku.core.models.provenance.agent import Person, PersonSchema
from renku.core.models.provenance.annotation import Annotation, AnnotationSchema
from renku.core.utils.datetime8601 import fix_timezone, local_now, parse_date
from renku.core.utils.datetime8601 import fix_datetime, local_now, parse_date
from renku.core.utils.scm import normalize_to_ascii


Expand Down Expand Up @@ -67,7 +67,7 @@ def __init__(
self.annotations: List[Annotation] = annotations or []
self.automated_update: bool = automated_update
self.creator: Person = creator
self.date_created: datetime = fix_timezone(date_created) or local_now()
self.date_created: datetime = fix_datetime(date_created) or local_now()
self.description: str = description
self.id: str = id
self.immutable_template_files: List[str] = immutable_template_files
Expand Down
15 changes: 10 additions & 5 deletions renku/core/utils/datetime8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,17 @@ def parse_date(value):
return date


def fix_timezone(value):
"""Fix timezone of non-aware datetime objects."""
def fix_datetime(value):
"""Fix timezone of non-aware datetime objects and remove microseconds."""
if value is None:
return
if isinstance(value, datetime) and not value.tzinfo:
value = _set_to_local_timezone(value)

if isinstance(value, datetime):
if not value.tzinfo:
value = _set_to_local_timezone(value)
if value.microsecond:
value = value.replace(microsecond=0)

return value


Expand All @@ -70,4 +75,4 @@ def _set_to_local_timezone(value):

def local_now():
"""Return current datetime in local timezone."""
return datetime.now(timezone.utc).astimezone()
return datetime.now(timezone.utc).replace(microsecond=0).astimezone()
6 changes: 5 additions & 1 deletion tests/cli/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import shutil
import textwrap
import time
from pathlib import Path

import git
Expand Down Expand Up @@ -1680,20 +1681,23 @@ def test_immutability_for_files(directory_tree, runner, client, load_dataset_wit

old_dataset = load_dataset_with_injection("my-data", client)

time.sleep(1)
# Add some files
assert 0 == runner.invoke(cli, ["dataset", "add", "my-data", str(directory_tree)]).exit_code

dataset = load_dataset_with_injection("my-data", client)
assert_dataset_is_mutated(old=old_dataset, new=dataset)
old_dataset = dataset

time.sleep(1)
# Add the same files again; it should mutate because files addition dates change
assert 0 == runner.invoke(cli, ["dataset", "add", "my-data", "--overwrite", str(directory_tree)]).exit_code

dataset = load_dataset_with_injection("my-data", client)
assert_dataset_is_mutated(old=old_dataset, new=dataset)
old_dataset = dataset

time.sleep(1)
# Remove some files
assert 0 == runner.invoke(cli, ["dataset", "unlink", "my-data", "-I", "file1", "--yes"]).exit_code

Expand Down Expand Up @@ -1856,7 +1860,7 @@ def test_datasets_provenance_after_add_with_overwrite(
):
"""Test datasets provenance is updated if adding and overwriting same files."""
assert 0 == runner.invoke(cli, ["dataset", "add", "my-data", "--create", str(directory_tree)]).exit_code

time.sleep(1)
assert 0 == runner.invoke(cli, ["dataset", "add", "my-data", "--overwrite", str(directory_tree)]).exit_code

with get_datasets_provenance_with_injection(client) as datasets_provenance:
Expand Down
5 changes: 4 additions & 1 deletion tests/cli/test_integration_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from tests.utils import assert_dataset_is_mutated, format_result_exception, retry_failed, with_dataset


@pytest.mark.skip("Add dataset doesn't store the dataset, investigate why this fails")
@pytest.mark.integration
@retry_failed
@pytest.mark.parametrize(
Expand Down Expand Up @@ -87,6 +86,8 @@ def test_dataset_import_real_doi(runner, client, doi, prefix, sleep_after, load_

dataset = load_dataset_with_injection(doi["name"], client)
assert doi["doi"] in dataset.same_as.url
assert dataset.date_created is None
assert dataset.date_published is not None


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1013,6 +1014,8 @@ def test_dataset_update_zenodo(client, runner, doi, load_dataset_with_injection)
assert after_dataset.derived_from is None
assert after_dataset.same_as is not None
assert after_dataset.same_as != before_dataset.same_as
assert after_dataset.date_created is None
assert after_dataset.date_published is not None


@pytest.mark.integration
Expand Down
4 changes: 2 additions & 2 deletions tests/cli/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_comprehensive_dataset_migration(
assert "Cornell University" == dataset.creators[0].affiliation
assert "Rooth, Mats" == dataset.creators[0].name
assert dataset.date_published is None
assert "2020-08-10T21:35:05.115412+00:00" == dataset.date_created.isoformat("T")
assert "2020-08-10T21:35:05+00:00" == dataset.date_created.isoformat("T")
assert "Replication material for a paper to be presented" in dataset.description
assert "https://doi.org/10.7910/DVN/EV6KLF" == dataset.same_as.url
assert "1" == tags[0].name
Expand All @@ -203,7 +203,7 @@ def test_comprehensive_dataset_migration(

file_ = dataset.find_file("data/dataverse/copy.sh")
assert "https://dataverse.harvard.edu/api/access/datafile/3050656" == file_.source
assert "2020-08-10T21:35:10.877832+00:00" == file_.date_added.isoformat("T")
assert "2020-08-10T21:35:10+00:00" == file_.date_added.isoformat("T")
assert file_.based_on is None
assert not hasattr(file_, "creators")

Expand Down
8 changes: 8 additions & 0 deletions tests/cli/test_rerun.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import os
import subprocess
import time
from pathlib import Path

import git
Expand Down Expand Up @@ -247,7 +248,9 @@ def test_rerun_overridden_output(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r2", "cp", b, c]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code

result = runner.invoke(cli, ["rerun", "--dry-run", c])
Expand All @@ -269,7 +272,9 @@ def test_rerun_overridden_outputs_partially(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r2", "tee", c, d, stdin=b).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code

result = runner.invoke(cli, ["rerun", "--dry-run", c])
Expand Down Expand Up @@ -299,8 +304,11 @@ def test_rerun_multiple_paths_common_output(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r2", "cp", b, d]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r3", "cp", a, c]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r4", "wc", c, stdout=d).exit_code

result = runner.invoke(cli, ["rerun", "--dry-run", d])
Expand Down
8 changes: 8 additions & 0 deletions tests/cli/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""Test ``update`` command."""

import os
import time
from pathlib import Path

import git
Expand Down Expand Up @@ -348,7 +349,9 @@ def test_update_overridden_output(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r2", "cp", b, c]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code

write_and_commit_file(repo, a, "new content")
Expand All @@ -372,7 +375,9 @@ def test_update_overridden_outputs_partially(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r2", "tee", c, d, stdin=b).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code

write_and_commit_file(repo, a, "new content")
Expand All @@ -397,8 +402,11 @@ def test_update_multiple_paths_common_output(project, renku_cli, runner):
write_and_commit_file(repo, a, "content")

assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r2", "cp", b, d]).exit_code
time.sleep(1)
assert 0 == runner.invoke(cli, ["run", "--name", "r3", "cp", a, c]).exit_code
time.sleep(1)
assert 0 == renku_cli("run", "--name", "r4", "wc", c, stdout=d).exit_code

write_and_commit_file(repo, a, "new content")
Expand Down
3 changes: 2 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def assert_dataset_is_mutated(old: Dataset, new: Dataset, mutator=None):
assert old.id != new.id
assert old.identifier != new.identifier
assert old.id == new.derived_from.url_id
assert old.date_created != new.date_created
if old.date_created and new.date_created:
assert old.date_created <= new.date_created
assert new.same_as is None
assert new.date_published is None
assert new.identifier in new.id
Expand Down