Skip to content

Commit

Permalink
Added updated for PH2 and PAD datasets.
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidWalshe93 committed Apr 15, 2021
1 parent cfbd288 commit 02e8e3a
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 8 deletions.
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@
"packaging",
"pandas",
"PyYAML",
"tabulate"
"tabulate",
"alive-progress",
"requests",
"patool",
"fuzzywuzzy"
]

CWD = os.path.abspath(os.path.dirname(__file__))
Expand Down
2 changes: 1 addition & 1 deletion sla_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
Date: 08 April 2021
"""

__version__ = '0.2.1'
__version__ = '0.2.2'
2 changes: 1 addition & 1 deletion sla_cli/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ def cli(ctx: Context, params: CliParameters):
cli.add_command(command)

if __name__ == '__main__':
cli()
cli(prog_name="sla-cli")
24 changes: 24 additions & 0 deletions sla_cli/src/cli/commands/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,30 @@ class DownloadParameters:
@kwargs_to_dataclass(DownloadParameters)
@click.pass_context
def download(ctx: Context, params: DownloadParameters):
"""Download
Responsible for downloading datasets from various endpoints using the dataset name as
a key.
\b
Command flow:
1) Dataset archives are downloaded from a data repository endpoint specific to a given dataset.
2) Once downloaded, archives are extracted and the contents are moved to the 'data' directory specified.
3) A custom wrapper, specific to the given dataset, is used to restructure and format the dataset content into a uniform layout.
\b
Available datasets:
bcn_20000 | bcn_2020_challenge | brisbane_isic_challenge_2020 | dermoscopedia_cc_by ham10000
isic_2020_challenge_mskcc_contribution | isic_2020_vienna_part_1 | isic_2020_vienna_part_2
jid_editorial_images_2018 | mclass_d | mclass_nd | mednode | msk_1 | msk_2 | msk_3 | msk_4
msk_5 | pad_ufes_20 | ph2 | sonic | sydney_mia_smdc_2020_isic_challenge_contribution
uda_1 | uda_2
\b
Example:
$ sla-cli download -d ./data/ mednode ph2 uda_1
"""
datasets = AccessorFactory.create_datasets()

# Remove datasets that dont exist in the tool before continuing.
Expand Down
3 changes: 2 additions & 1 deletion sla_cli/src/common/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def init_progress_bars_wrapper(*args, **kwargs):
title_length=40,
spinner="classic",
unknown="classic",
bar="classic"
bar="classic",
enrich_print=False
)

return func(*args, **kwargs)
Expand Down
5 changes: 1 addition & 4 deletions sla_cli/src/download/pad_ufes_20/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ def _extract(self):
with ZipFile(archive, "r") as fh:
fh.extractall(images_dir)

# Remove archive after extraction.
os.remove(archive)

def _format_metadata(self):
pass

Expand Down Expand Up @@ -73,4 +70,4 @@ def _move_images(self):
def _clean_up(self):
"""Clean up any stray files."""
for i in range(1, 4):
os.remove(os.path.join(self.images_path, f"imgs_part_{i}"))
shutil.rmtree(os.path.join(self.images_path, f"imgs_part_{i}"))
2 changes: 2 additions & 0 deletions sla_cli/src/download/ph2/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def _download(self):
def _extract(self):
"""Extracts the downloaded archive."""
try:
if not os.path.exists(self.extracted_path):
os.makedirs(self.extracted_path)
patoolib.extract_archive(self.archive_path, outdir=self.extracted_path, verbosity=-1)
except Exception as err:
logger.error(f"You may have to install a 3rd-party application to unpack '.rar' files.")
Expand Down

0 comments on commit 02e8e3a

Please sign in to comment.