Skip to content

Commit 4d4d7d2

Browse files
authored
fix: migration of old datasets (#639)
1 parent 3323c76 commit 4d4d7d2

21 files changed

+637
-241
lines changed

conftest.py

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,24 @@ def data_repository(directory_tree):
251251

252252

253253
@pytest.fixture(
254-
params=['test-renku-v0.3.0.git', 'old-datasets-v0.3.0.git'],
254+
params=[
255+
{
256+
'name': 'old-datasets-v0.3.0.git',
257+
'exit_code': 1
258+
},
259+
{
260+
'name': 'old-datasets-v0.5.0.git',
261+
'exit_code': 1
262+
},
263+
{
264+
'name': 'old-datasets-v0.5.1.git',
265+
'exit_code': 0
266+
},
267+
{
268+
'name': 'test-renku-v0.3.0.git',
269+
'exit_code': 1
270+
},
271+
],
255272
scope='module',
256273
)
257274
def old_bare_repository(request, tmpdir_factory):
@@ -261,13 +278,19 @@ def old_bare_repository(request, tmpdir_factory):
261278

262279
compressed_repo_path = Path(
263280
__file__
264-
).parent / 'tests' / 'fixtures' / '{0}.tar.gz'.format(request.param)
265-
working_dir_path = tmpdir_factory.mktemp(request.param)
281+
).parent / 'tests' / 'fixtures' / '{0}.tar.gz'.format(
282+
request.param['name']
283+
)
284+
285+
working_dir_path = tmpdir_factory.mktemp(request.param['name'])
266286

267287
with tarfile.open(str(compressed_repo_path), 'r') as fixture:
268288
fixture.extractall(working_dir_path.strpath)
269289

270-
yield working_dir_path / request.param
290+
yield {
291+
'path': working_dir_path / request.param['name'],
292+
'exit_code': request.param['exit_code']
293+
}
271294

272295
shutil.rmtree(working_dir_path.strpath)
273296

@@ -279,21 +302,29 @@ def old_repository(tmpdir_factory, old_bare_repository):
279302
from git import Repo
280303

281304
repo_path = tmpdir_factory.mktemp('repo')
282-
yield Repo(old_bare_repository.strpath).clone(repo_path.strpath)
305+
yield {
306+
'repo':
307+
Repo(old_bare_repository['path'].strpath).clone(repo_path.strpath),
308+
'exit_code': old_bare_repository['exit_code']
309+
}
283310
shutil.rmtree(repo_path.strpath)
284311

285312

286313
@pytest.fixture
287314
def old_project(old_repository):
288315
"""Create a test project."""
289-
repo = old_repository
290-
repository = repo.working_dir
316+
repo = old_repository['repo']
317+
repository_path = repo.working_dir
291318

292319
commit = repo.head.commit
293320

294-
os.chdir(repository)
295-
yield repository
296-
os.chdir(repository)
321+
os.chdir(repository_path)
322+
yield {
323+
'repo': repo,
324+
'path': repository_path,
325+
'exit_code': old_repository['exit_code']
326+
}
327+
os.chdir(repository_path)
297328
repo.head.reset(commit, index=True, working_tree=True)
298329
# remove any extra non-tracked files (.pyc, etc)
299330
repo.git.clean('-xdff')

renku/api/_git.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
from renku import errors
3636
from renku._compat import Path
37+
from renku.errors import NothingToCommit
3738

3839
COMMIT_DIFF_STRATEGY = 'DIFF'
3940
STARTED_AT = int(time.time() * 1e3)
@@ -229,7 +230,7 @@ def ensure_unstaged(self, path):
229230
pass
230231

231232
@contextmanager
232-
def commit(self, author_date=None, commit_only=None):
233+
def commit(self, author_date=None, commit_only=None, allow_empty=True):
233234
"""Automatic commit."""
234235
from git import Actor
235236
from renku.version import __version__, version_url
@@ -286,6 +287,9 @@ def commit(self, author_date=None, commit_only=None):
286287
if not commit_only:
287288
self.repo.git.add('--all')
288289

290+
if not allow_empty and not self.repo.index.diff('HEAD'):
291+
raise NothingToCommit()
292+
289293
argv = [os.path.basename(sys.argv[0])] + sys.argv[1:]
290294

291295
# Ignore pre-commit hooks since we have already done everything.
@@ -303,7 +307,8 @@ def transaction(
303307
up_to_date=False,
304308
commit=True,
305309
commit_only=None,
306-
ignore_std_streams=False
310+
ignore_std_streams=False,
311+
allow_empty=True,
307312
):
308313
"""Perform Git checks and operations."""
309314
if clean:
@@ -316,7 +321,7 @@ def transaction(
316321
pass
317322

318323
if commit:
319-
with self.commit(commit_only=commit_only):
324+
with self.commit(commit_only=commit_only, allow_empty=allow_empty):
320325
yield self
321326
else:
322327
yield self

renku/api/repository.py

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# See the License for the specific language governing permissions and
1717
# limitations under the License.
1818
"""Client for handling a local repository."""
19-
19+
import subprocess
2020
import uuid
2121
from collections import defaultdict
2222
from contextlib import contextmanager
@@ -99,11 +99,13 @@ def __attrs_post_init__(self):
9999

100100
# initialize submodules
101101
if self.repo:
102-
check_output([
103-
'git', 'submodule', 'update', '--init', '--recursive'
104-
],
105-
cwd=str(self.path))
106-
# TODO except
102+
try:
103+
check_output([
104+
'git', 'submodule', 'update', '--init', '--recursive'
105+
],
106+
cwd=str(self.path))
107+
except subprocess.CalledProcessError:
108+
pass
107109

108110
@property
109111
def lock(self):
@@ -251,42 +253,49 @@ def subclients(self, parent_commit):
251253
Submodule.iter_items(self.repo, parent_commit=parent_commit)
252254
]
253255
except (RuntimeError, ValueError):
254-
# There are no submodules assiciated with the given commit.
256+
# There are no submodules associated with the given commit.
255257
submodules = []
256258

257-
return self._subclients.setdefault(
258-
parent_commit, {
259-
submodule: self.__class__(
260-
path=(self.path / submodule.path).resolve(),
259+
subclients = {}
260+
for submodule in submodules:
261+
subpath = (self.path / submodule.path).resolve()
262+
is_renku = subpath / Path(self.renku_home)
263+
264+
if subpath.exists() and is_renku.exists():
265+
subclients[submodule] = self.__class__(
266+
path=subpath,
261267
parent=(self, submodule),
262268
)
263-
for submodule in submodules
264-
}
265-
)
269+
270+
return subclients
266271

267272
def resolve_in_submodules(self, commit, path):
268273
"""Resolve filename in submodules."""
269274
original_path = self.path / path
270-
if original_path.is_symlink() or str(path
271-
).startswith('.renku/vendors'):
275+
in_vendor = str(path).startswith('.renku/vendors')
276+
277+
if original_path.is_symlink() or in_vendor:
272278
original_path = original_path.resolve()
279+
273280
for submodule, subclient in self.subclients(commit).items():
274-
try:
275-
subpath = original_path.relative_to(subclient.path)
276-
return (
277-
subclient,
278-
subclient.find_previous_commit(
279-
subpath, revision=submodule.hexsha
280-
),
281-
subpath,
282-
)
283-
except ValueError:
284-
pass
281+
if (Path(submodule.path) / Path('.git')).exists():
282+
283+
try:
284+
subpath = original_path.relative_to(subclient.path)
285+
return (
286+
subclient,
287+
subclient.find_previous_commit(
288+
subpath, revision=submodule.hexsha
289+
),
290+
subpath,
291+
)
292+
except ValueError:
293+
pass
285294

286295
return self, commit, path
287296

288297
@contextmanager
289-
def with_metadata(self):
298+
def with_metadata(self, read_only=False):
290299
"""Yield an editable metadata object."""
291300
from renku.models.projects import Project
292301

@@ -299,7 +308,8 @@ def with_metadata(self):
299308

300309
yield metadata
301310

302-
metadata.to_yaml()
311+
if not read_only:
312+
metadata.to_yaml()
303313

304314
@contextmanager
305315
def with_workflow_storage(self):

renku/cli/_checks/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
# limitations under the License.
1818
"""Define repository checks for :program:`renku doctor`."""
1919

20-
from .files_in_datasets import check_missing_files
21-
from .location_datasets import check_dataset_metadata
20+
from .migrate_datasets import (check_dataset_metadata, check_missing_files)
2221
from .references import check_missing_references
2322

24-
# Checks will be executed in the order as they are listed in __all__
23+
# Checks will be executed in the order as they are listed in __all__.
24+
# They are mostly used in ``doctor`` command to inspect broken things.
2525
__all__ = (
2626
'check_dataset_metadata',
2727
'check_missing_files',

renku/cli/_checks/files_in_datasets.py

Lines changed: 0 additions & 54 deletions
This file was deleted.

renku/cli/_checks/location_datasets.py

Lines changed: 0 additions & 46 deletions
This file was deleted.

0 commit comments

Comments
 (0)