Skip to content

Commit 7938ac4

Browse files
authored
fix: ensure external storage is handled correctly (#592)
* fix: ensure external storage is handled correctly * fix: yapf pinned to 0.27 * fix: fixed wording * fix: missing tests
1 parent 80b9839 commit 7938ac4

File tree

9 files changed

+188
-120
lines changed

9 files changed

+188
-120
lines changed

renku/api/storage.py

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,43 @@
1616
# See the License for the specific language governing permissions and
1717
# limitations under the License.
1818
"""Client for handling a data storage."""
19-
19+
import functools
2020
import shlex
2121
from collections import defaultdict
22+
from shutil import which
2223
from subprocess import PIPE, STDOUT, call, run
2324

2425
import attr
26+
from werkzeug.utils import cached_property
2527

2628
from renku import errors
2729
from renku._compat import Path
2830

2931
from ._git import _expand_directories
3032
from .repository import RepositoryApiMixin
3133

32-
HAS_LFS = call(['git', 'lfs'], stdout=PIPE, stderr=STDOUT) == 0
33-
3434
# Batch size for when renku is expanding a large list
3535
# of files into an argument string.
3636
ARGUMENT_BATCH_SIZE = 100
3737

3838

39+
def ensure_external_storage(fn):
40+
"""Ensure management of external storage on methods which depend on it.
41+
42+
:raises: ``errors.ExternalStorageNotInstalled``
43+
:raises: ``errors.ExternalStorageDisabled``
44+
"""
45+
# noqa
46+
@functools.wraps(fn)
47+
def wrapper(self, *args, **kwargs):
48+
if not self.has_external_storage:
49+
pass
50+
else:
51+
return fn(self, *args, **kwargs)
52+
53+
return wrapper
54+
55+
3956
@attr.s
4057
class StorageApiMixin(RepositoryApiMixin):
4158
"""Client for handling a data storage."""
@@ -53,6 +70,30 @@ class StorageApiMixin(RepositoryApiMixin):
5370

5471
_CMD_STORAGE_PULL = ['git', 'lfs', 'pull', '-I']
5572

73+
@cached_property
74+
def storage_installed(self):
75+
"""Verify that git-lfs is installed and on system PATH."""
76+
return bool(which('git-lfs'))
77+
78+
@cached_property
79+
def has_external_storage(self):
80+
"""Check if repository has external storage enabled.
81+
82+
:raises: ``errors.ExternalStorageNotInstalled``
83+
:raises: ``errors.ExternalStorageDisabled``
84+
"""
85+
repo_config = self.repo.config_reader(config_level='repository')
86+
lfs_enabled = repo_config.has_section('filter "lfs"')
87+
88+
storage_enabled = lfs_enabled and self.storage_installed
89+
if self.use_external_storage and not storage_enabled:
90+
raise errors.ExternalStorageDisabled(self.repo)
91+
92+
if lfs_enabled and not self.storage_installed:
93+
raise errors.ExternalStorageNotInstalled(self.repo)
94+
95+
return lfs_enabled and self.storage_installed
96+
5697
def init_external_storage(self, force=False):
5798
"""Initialize the external storage for data."""
5899
call(
@@ -62,19 +103,20 @@ def init_external_storage(self, force=False):
62103
cwd=str(self.path.absolute()),
63104
)
64105

65-
@property
66-
def external_storage_installed(self):
67-
"""Check that Large File Storage is installed."""
68-
return HAS_LFS
106+
def init_repository(self, name=None, force=False):
107+
"""Initialize a local Renku repository."""
108+
result = super().init_repository(name=name, force=force)
69109

70-
def track_paths_in_storage(self, *paths):
71-
"""Track paths in the external storage."""
72-
if not self._use_lfs():
73-
return
110+
# initialize LFS if it is requested and installed
111+
if self.use_external_storage and self.storage_installed:
112+
self.init_external_storage(force=force)
74113

75-
if not self.external_storage_installed:
76-
raise errors.ExternalStorageNotInstalled(self.repo)
114+
return result
77115

116+
@ensure_external_storage
117+
def track_paths_in_storage(self, *paths):
118+
"""Track paths in the external storage."""
119+
# Calculate which paths can be tracked in lfs
78120
track_paths = []
79121
attrs = self.find_attr(*paths)
80122

@@ -97,31 +139,20 @@ def track_paths_in_storage(self, *paths):
97139
cwd=str(self.path),
98140
)
99141

142+
@ensure_external_storage
100143
def untrack_paths_from_storage(self, *paths):
101144
"""Untrack paths from the external storage."""
102-
if not self._use_lfs():
103-
return
104-
105-
if not self.external_storage_installed:
106-
raise errors.ExternalStorageNotInstalled(self.repo)
107-
108145
call(
109146
self._CMD_STORAGE_UNTRACK + list(paths),
110147
stdout=PIPE,
111148
stderr=STDOUT,
112149
cwd=str(self.path),
113150
)
114151

152+
@ensure_external_storage
115153
def pull_paths_from_storage(self, *paths):
116154
"""Pull paths from LFS."""
117155
import math
118-
119-
if not self._use_lfs():
120-
return
121-
122-
if not self.external_storage_installed:
123-
raise errors.ExternalStorageNotInstalled(self.repo)
124-
125156
client_dict = defaultdict(list)
126157

127158
for path in _expand_directories(paths):
@@ -131,13 +162,14 @@ def pull_paths_from_storage(self, *paths):
131162
client_dict[client.path].append(str(path))
132163

133164
for client_path, paths in client_dict.items():
134-
for ibatch in range(math.ceil(len(paths) / ARGUMENT_BATCH_SIZE)):
165+
batch_size = math.ceil(len(paths) / ARGUMENT_BATCH_SIZE)
166+
for index in range(batch_size):
135167
run(
136168
self._CMD_STORAGE_PULL + [
137169
shlex.quote(
138170
','.join(
139-
paths[ibatch * ARGUMENT_BATCH_SIZE:
140-
(ibatch + 1) * ARGUMENT_BATCH_SIZE]
171+
paths[index * ARGUMENT_BATCH_SIZE:(index + 1) *
172+
ARGUMENT_BATCH_SIZE]
141173
)
142174
)
143175
],
@@ -146,35 +178,13 @@ def pull_paths_from_storage(self, *paths):
146178
stderr=STDOUT,
147179
)
148180

181+
@ensure_external_storage
149182
def checkout_paths_from_storage(self, *paths):
150183
"""Checkout a paths from LFS."""
151-
if not self._use_lfs():
152-
return
153-
154-
if not self.external_storage_installed:
155-
raise errors.ExternalStorageNotInstalled(self.repo)
156-
157184
run(
158185
self._CMD_STORAGE_CHECKOUT + list(paths),
159186
cwd=str(self.path.absolute()),
160187
stdout=PIPE,
161188
stderr=STDOUT,
162189
check=True,
163190
)
164-
165-
def init_repository(self, name=None, force=False):
166-
"""Initialize a local Renku repository."""
167-
result = super().init_repository(name=name, force=force)
168-
169-
# initialize LFS if it is requested and installed
170-
if self.use_external_storage and self.external_storage_installed:
171-
self.init_external_storage(force=force)
172-
173-
return result
174-
175-
def _use_lfs(self):
176-
renku_initialized_to_use_lfs = self.repo.config_reader(
177-
config_level='repository'
178-
).has_section('filter "lfs"')
179-
180-
return renku_initialized_to_use_lfs and self.use_external_storage

renku/cli/move.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -98,20 +98,25 @@ def fmt_dst(path):
9898
dataset.to_yaml()
9999

100100
# 3. Manage .gitattributes for external storage.
101-
tracked = tuple(
102-
path for path, attr in client.find_attr(*files).items()
103-
if attr.get('filter') == 'lfs'
104-
)
105-
client.untrack_paths_from_storage(*tracked)
106-
existing = client.find_attr(*tracked)
107-
if existing:
108-
click.echo(WARNING + 'There are custom .gitattributes.\n')
109-
if click.confirm(
110-
'Do you want to edit ".gitattributes" now?', default=False
111-
):
112-
click.edit(filename=str(client.path / '.gitattributes'))
113-
114-
client.track_paths_in_storage(*(destinations[path] for path in tracked))
101+
tracked = tuple()
102+
if client.has_external_storage:
103+
tracked = tuple(
104+
path for path, attr in client.find_attr(*files).items()
105+
if attr.get('filter') == 'lfs'
106+
)
107+
client.untrack_paths_from_storage(*tracked)
108+
109+
if client.find_attr(*tracked):
110+
click.echo(WARNING + 'There are custom .gitattributes.\n')
111+
if click.confirm(
112+
'Do you want to edit ".gitattributes" now?', default=False
113+
):
114+
click.edit(filename=str(client.path / '.gitattributes'))
115+
116+
if tracked and client.has_external_storage:
117+
client.track_paths_in_storage(
118+
*(destinations[path] for path in tracked)
119+
)
115120

116121
# 4. Handle symlinks.
117122
dst.parent.mkdir(parents=True, exist_ok=True)

renku/cli/remove.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,19 @@ def fmt_path(path):
7272
dataset.to_yaml()
7373

7474
# 2. Manage .gitattributes for external storage.
75-
tracked = tuple(
76-
path for path, attr in client.find_attr(*files).items()
77-
if attr.get('filter') == 'lfs'
78-
)
79-
client.untrack_paths_from_storage(*tracked)
80-
existing = client.find_attr(*tracked)
81-
if existing:
82-
click.echo(WARNING + 'There are custom .gitattributes.\n')
83-
if click.confirm(
84-
'Do you want to edit ".gitattributes" now?', default=False
85-
):
86-
click.edit(filename=str(client.path / '.gitattributes'))
75+
if client.has_external_storage:
76+
tracked = tuple(
77+
path for path, attr in client.find_attr(*files).items()
78+
if attr.get('filter') == 'lfs'
79+
)
80+
client.untrack_paths_from_storage(*tracked)
81+
existing = client.find_attr(*tracked)
82+
if existing:
83+
click.echo(WARNING + 'There are custom .gitattributes.\n')
84+
if click.confirm(
85+
'Do you want to edit ".gitattributes" now?', default=False
86+
):
87+
click.edit(filename=str(client.path / '.gitattributes'))
8788

8889
# Finally remove the files.
8990
final_sources = list(set(files.values()))

renku/cli/rerun.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,14 @@ def rerun(client, revision, roots, siblings, inputs, paths):
155155
)
156156
)
157157

158-
# Make sure all inputs are pulled from a storage.
159-
client.pull_paths_from_storage(
160-
*(path for _, path in workflow.iter_input_files(client.workflow_path))
161-
)
158+
# Don't compute paths if storage is disabled.
159+
if client.has_external_storage:
160+
# Make sure all inputs are pulled from a storage.
161+
paths_ = (
162+
path
163+
for _, path in workflow.iter_input_files(client.workflow_path)
164+
)
165+
client.pull_paths_from_storage(*paths_)
162166

163167
# Store the generated workflow used for updating paths.
164168
import yaml

renku/cli/run.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@
5656
output.
5757
5858
Because the output path detection is based on the Git repository state after
59-
the execution of ``renku run`` command, it is good to have a basic understading
60-
of the underlying principles and limitations of tracking files in Git.
59+
the execution of ``renku run`` command, it is good to have a basic
60+
understanding of the underlying principles and limitations of tracking
61+
files in Git.
6162
6263
Git tracks not only the paths in a repository, but also the content stored in
6364
those paths. Therefore:
@@ -180,13 +181,14 @@ def run(client, outputs, no_output, success_codes, isolation, command_line):
180181
with factory.watch(
181182
client, no_output=no_output, outputs=outputs
182183
) as tool:
183-
# Make sure all inputs are pulled from a storage.
184-
client.pull_paths_from_storage(
185-
*(
184+
# Don't compute paths if storage is disabled.
185+
if client.has_external_storage:
186+
# Make sure all inputs are pulled from a storage.
187+
paths_ = (
186188
path
187189
for _, path in tool.iter_input_files(client.workflow_path)
188190
)
189-
)
191+
client.pull_paths_from_storage(*paths_)
190192

191193
returncode = call(
192194
factory.command_line,

renku/cli/update.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,14 @@ def update(client, revision, no_output, siblings, paths):
169169
outputs=outputs,
170170
)
171171

172-
# Make sure all inputs are pulled from a storage.
173-
client.pull_paths_from_storage(
174-
*(path for _, path in workflow.iter_input_files(client.workflow_path))
175-
)
172+
# Don't compute paths if storage is disabled.
173+
if client.has_external_storage:
174+
# Make sure all inputs are pulled from a storage.
175+
paths_ = (
176+
path
177+
for _, path in workflow.iter_input_files(client.workflow_path)
178+
)
179+
client.pull_paths_from_storage(*paths_)
176180

177181
with output_file.open('w') as f:
178182
f.write(

renku/errors.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,10 @@ class ExternalStorageNotInstalled(RenkuException, click.ClickException):
251251
def __init__(self, repo):
252252
"""Build a custom message."""
253253
msg = (
254-
'Git-LFS is either not installed or not configured '
255-
'for this repo.\n'
256-
'By running this command without LFS you could be committing\n'
254+
'External storage is not installed, '
255+
'but this repository depends on it. \n'
256+
'By running this command without storage installed '
257+
'you could be committing\n'
257258
'large files directly to the git repository.\n\n'
258259
'If this is your intention, please repeat the command with '
259260
'the -S flag (e.g. renku -S run <cmd>), \n'
@@ -263,6 +264,25 @@ def __init__(self, repo):
263264
super(ExternalStorageNotInstalled, self).__init__(msg)
264265

265266

267+
class ExternalStorageDisabled(RenkuException, click.ClickException):
268+
"""Raise when disabled repository storage API is trying to be used."""
269+
270+
def __init__(self, repo):
271+
"""Build a custom message."""
272+
msg = (
273+
'External storage is not configured, '
274+
'but this action is trying to use it.\n'
275+
'By running this command without storage enabled '
276+
'you could be committing\n'
277+
'large files directly to the git repository.\n\n'
278+
'If this is your intention, please repeat the command with '
279+
'the -S flag (e.g. renku -S run <cmd>), \n'
280+
'otherwise install e.g. git-LFS with "git lfs install --local".'
281+
)
282+
283+
super(ExternalStorageDisabled, self).__init__(msg)
284+
285+
266286
class UninitializedProject(RenkuException, click.ClickException):
267287
"""Raise when a project does not seem to have been initialized yet."""
268288

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
'pytest>=4.0.0',
4141
'responses>=0.7.0',
4242
'unify>=0.4',
43-
'yapf>=0.27.0',
43+
'yapf==0.27.0',
4444
]
4545

4646
extras_require = {

0 commit comments

Comments
 (0)