Skip to content

Commit

Permalink
feat: better UX when adding to a dataset (#911)
Browse files Browse the repository at this point in the history
* feat: show progressbar for URL imports

* chore: round file size to 2 decimal digits

* feat: check disk size when adding
  • Loading branch information
m-alisafaee committed Jan 28, 2020
1 parent 7e9e647 commit c6ac967
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 5 deletions.
4 changes: 3 additions & 1 deletion renku/cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,9 @@ def add(short_name, urls, link, force, create, sources, destination, ref):
sources=sources,
destination=destination,
ref=ref,
urlscontext=progress
urlscontext=progress,
progress=_DownloadProgressbar,
interactive=True,
)


Expand Down
42 changes: 39 additions & 3 deletions renku/core/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
"""Repository datasets management."""

import re
import shutil
import urllib
from collections import OrderedDict
from contextlib import contextmanager

import click
import git
import requests
import yaml

from renku.core import errors
Expand All @@ -33,7 +35,7 @@
from renku.core.commands.providers import ProviderFactory
from renku.core.compat import contextlib
from renku.core.errors import DatasetNotFound, InvalidAccessToken, \
MigrationRequired, ParameterError, UsageError
MigrationRequired, OperationError, ParameterError, UsageError
from renku.core.management.datasets import DATASET_METADATA_PATHS
from renku.core.management.git import COMMIT_DIFF_STRATEGY
from renku.core.models.datasets import Dataset, generate_default_short_name
Expand Down Expand Up @@ -140,6 +142,8 @@ def add_file(
with_metadata=None,
urlscontext=contextlib.nullcontext,
commit_message=None,
progress=None,
interactive=False,
):
"""Add data file to a dataset."""
add_to_dataset(
Expand All @@ -153,7 +157,9 @@ def add_file(
destination=destination,
ref=ref,
with_metadata=with_metadata,
urlscontext=urlscontext
urlscontext=urlscontext,
progress=progress,
interactive=interactive,
)


Expand All @@ -174,6 +180,8 @@ def add_to_dataset(
all_at_once=False,
destination_names=None,
progress=None,
interactive=False,
total_size=None,
):
"""Add data to a dataset."""
if len(urls) == 0:
Expand All @@ -183,6 +191,25 @@ def add_to_dataset(
'Cannot add multiple URLs with --source or --destination'
)

if interactive:
if total_size is None:
total_size = 0
for url in urls:
try:
with requests.get(url, stream=True) as r:
total_size += int(r.headers.get('content-length', 0))
except requests.exceptions.RequestException:
pass
usage = shutil.disk_usage(client.path)

if total_size > usage.free:
mb = 2**20
message = 'Insufficient disk space (required: {:.2f} MB' \
'/available: {:.2f} MB). '.format(
total_size/mb, usage.free/mb
)
raise OperationError(message)

try:
with client.with_dataset(
short_name=short_name, create=create
Expand Down Expand Up @@ -467,6 +494,7 @@ def import_dataset(
record = provider.find_record(uri)
dataset = record.as_dataset(client)
files = dataset.files
total_size = 0

if with_prompt:
click.echo(
Expand All @@ -477,7 +505,8 @@ def import_dataset(
('filename', 'name'),
('size_in_mb', 'size (mb)'),
('filetype', 'type'),
))
)),
floatfmt='.2f'
)
)

Expand All @@ -489,6 +518,11 @@ def import_dataset(

click.confirm(text_prompt, abort=True)

for file_ in files:
total_size += file_.size_in_mb

total_size *= 2**20

except KeyError as e:
raise ParameterError((
'Could not process {0}.\n'
Expand Down Expand Up @@ -522,6 +556,8 @@ def import_dataset(
all_at_once=True,
destination_names=names,
progress=progress,
interactive=with_prompt,
total_size=total_size,
)

if dataset.version:
Expand Down
6 changes: 5 additions & 1 deletion renku/core/management/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,11 @@ def add_data_to_dataset(
)
else: # Remote URL
new_files = self._add_from_url(
dataset, url, destination, extract
dataset,
url,
destination,
extract,
progress=progress
)

files.extend(new_files)
Expand Down
26 changes: 26 additions & 0 deletions tests/cli/test_integration_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
# limitations under the License.
"""Integration tests for dataset command."""
import os
import shutil
import subprocess
from collections import namedtuple
from pathlib import Path

import git
Expand Down Expand Up @@ -1170,3 +1172,27 @@ def test_add_removes_credentials(runner, client):
with client.with_dataset('my-dataset') as dataset:
file_ = dataset.files[0]
assert file_.url == 'https://example.com/index.html'


@pytest.mark.integration
def test_check_disk_space(runner, client, monkeypatch):
"""Check adding to dataset prompts if disk space is not enough."""
url = 'https://example.com/index.html'

def disk_usage(_):
"""Mocked response."""
Usage = namedtuple('Usage', 'free')
return Usage(free=0)

monkeypatch.setattr(shutil, 'disk_usage', disk_usage)

result = runner.invoke(
cli,
['dataset', 'add', '-c', 'my-data', url],
catch_exceptions=False,
)
assert 1 == result.exit_code
assert 'Insufficient disk space' in result.output

result = runner.invoke(cli, ['dataset', 'ls-files'])
assert 'index.html' not in result.output

0 comments on commit c6ac967

Please sign in to comment.