Skip to content

Commit

Permalink
Merge pull request #47 from CAREamics/remove_files
Browse files Browse the repository at this point in the history
Remove file lists
  • Loading branch information
jdeschamps committed Apr 27, 2024
2 parents da83ec7 + a989e1e commit 8ee2416
Show file tree
Hide file tree
Showing 9 changed files with 3 additions and 223 deletions.
15 changes: 0 additions & 15 deletions src/careamics_portfolio/denoiseg_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class DSB2018(PortfolioEntry, NoisyObject):
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand Down Expand Up @@ -83,10 +82,6 @@ def __init__(self, noise_level: NoiseLevel = NoiseLevel.N0) -> None:
"segmentation across imaging experiments: the 2018 Data Science "
"Bowl. Nat Methods 16, 1247-1253 (2019). "
"https://doi.org/10.1038/s41592-019-0612-7",
files=[
f"DSB2018_n{noise_level.value}/train/train_data.npz",
f"DSB2018_n{noise_level.value}/test/test_data.npz",
],
size=self._get_size(noise_level),
tags=["denoising", "segmentation", "nuclei", "fluorescence"],
)
Expand Down Expand Up @@ -136,7 +131,6 @@ class SegFlywing(PortfolioEntry, NoisyObject):
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand Down Expand Up @@ -164,10 +158,6 @@ def __init__(self, noise_level: NoiseLevel = NoiseLevel.N0) -> None:
citation="Buchholz, T.O., Prakash, M., Schmidt, D., Krull, A., Jug, "
"F.: Denoiseg: joint denoising and segmentation. In: European "
"Conference on Computer Vision (ECCV). pp. 324-337. Springer (2020) 8, 9",
files=[
f"Flywing_n{noise_level.value}/train/train_data.npz",
f"Flywing_n{noise_level.value}/test/test_data.npz",
],
size=self._get_size(noise_level),
tags=["denoising", "segmentation", "membrane", "fluorescence"],
)
Expand Down Expand Up @@ -217,7 +207,6 @@ class MouseNuclei(PortfolioEntry, NoisyObject):
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand Down Expand Up @@ -245,10 +234,6 @@ def __init__(self, noise_level: NoiseLevel = NoiseLevel.N0) -> None:
citation="Buchholz, T.O., Prakash, M., Schmidt, D., Krull, A., Jug, "
"F.: Denoiseg: joint denoising and segmentation. In: European "
"Conference on Computer Vision (ECCV). pp. 324-337. Springer (2020) 8, 9",
files=[
f"Mouse_n{noise_level.value}/train/train_data.npz",
f"Mouse_n{noise_level.value}/test/test_data.npz",
],
size=self._get_size(noise_level),
tags=["denoising", "segmentation", "nuclei", "fluorescence"],
)
Expand Down
158 changes: 0 additions & 158 deletions src/careamics_portfolio/denoising_datasets.py

Large diffs are not rendered by default.

15 changes: 0 additions & 15 deletions src/careamics_portfolio/portfolio_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class PortfolioEntry:
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand All @@ -35,7 +34,6 @@ def __init__(
citation: str,
file_name: str,
sha256: str,
files: List[str],
size: float,
tags: List[str],
is_zip: bool = True,
Expand All @@ -53,7 +51,6 @@ def __init__(
self._citation = citation
self._file_name = file_name
self._hash = sha256
self._files = files
self._size = size
self._tags = tags
self._is_zip = is_zip
Expand Down Expand Up @@ -146,17 +143,6 @@ def hash(self) -> str:
"""
return self._hash

@property
def files(self) -> List[str]:
"""Dictionary of files in the dataset.
Returns
-------
dict[str, list]
Dictionary of files in the dataset.
"""
return self._files

@property
def size(self) -> float:
"""Size of the dataset in MB.
Expand Down Expand Up @@ -224,7 +210,6 @@ def to_dict(self) -> dict:
"citation": self.citation,
"file_name": self.file_name,
"hash": self.hash,
"files": self.files,
"size": self.size,
"tags": self.tags,
}
Expand Down
4 changes: 0 additions & 4 deletions src/careamics_portfolio/utils/pale_blue_dot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class PaleBlueDot(PortfolioEntry):
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand Down Expand Up @@ -47,9 +46,6 @@ def __init__(self) -> None:
"artifacts resulting from the magnification.",
citation="NASA/JPL-Caltech",
license="Public domain",
files=[
"P36254.jpg",
],
size=0.4,
tags=["pale blue dot", "voyager", "nasa", "jpl"],
is_zip=False,
Expand Down
4 changes: 0 additions & 4 deletions src/careamics_portfolio/utils/pale_blue_dot_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class PaleBlueDotZip(PortfolioEntry):
citation (str): Citation to use when referring to the dataset.
file_name (str): Name of the downloaded file.
hash (str): SHA256 hash of the downloaded file.
files (list[str]): List of files in the dataset.
size (int): Size of the dataset in MB.
tags (list[str]): List of tags associated to the dataset.
is_zip (bool): Whether the dataset is a zip file.
Expand Down Expand Up @@ -47,9 +46,6 @@ def __init__(self) -> None:
"artifacts resulting from the magnification.",
citation="NASA/JPL-Caltech",
license="Public domain",
files=[
"P36254.jpg",
],
size=0.4,
tags=["pale blue dot", "voyager", "nasa", "jpl"],
is_zip=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_denoiseg_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_all_datasets_getters(portfolio: PortfolioManager):
def test_datasets(tmp_path, dataset: PortfolioEntry):
"""Test that all DenoiSeg datasets download properly.
This test also checks the files and size.
This test also checks the size.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion tests/test_denoising_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_all_datasets_getters(portfolio: PortfolioManager):
def test_datasets(tmp_path, dataset: PortfolioEntry):
"""Test that all denoising datasets download properly.
This test also checks the files and size.
This test also checks the size.
Parameters
----------
Expand Down
5 changes: 0 additions & 5 deletions tests/test_portfolio_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def test_change_entry(pale_blue_dot: PortfolioEntry):
_ = pale_blue_dot.citation
_ = pale_blue_dot.file_name
_ = pale_blue_dot.hash
_ = pale_blue_dot.files

# Check that changing members raises errors
with pytest.raises(AttributeError):
Expand All @@ -66,9 +65,6 @@ def test_change_entry(pale_blue_dot: PortfolioEntry):
with pytest.raises(AttributeError):
pale_blue_dot.hash = ""

with pytest.raises(AttributeError):
pale_blue_dot.files = {}


def test_registry_name(pale_blue_dot: PortfolioEntry):
"""Test that the registry name is correct."""
Expand All @@ -89,7 +85,6 @@ def test_name_with_space():
citation="citation",
file_name="file name",
sha256="34973248736ygdw3",
files=["dsada"],
size=1,
tags=["dsadas"],
)
Expand Down
21 changes: 1 addition & 20 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,40 +47,21 @@ def portoflio_entry_checker(entry: PortfolioEntry) -> None:
assert (
entry.file_name is not None and entry.file_name != ""
), f"Invalid file name in {entry}"
assert entry.files is not None and len(entry.files) > 0, f"Invalid files in {entry}"
assert entry.size is not None and entry.size > 0, f"Invalid size in {entry}"


def download_checker(path: Path, dataset: PortfolioEntry) -> None:
"""Test that the file can be downloaded and that all fields
correspond to reality."""
# download dataset
files = dataset.download(path)
_ = dataset.download(path)

# check that the zip file exists
path_to_zip = path / dataset.get_registry_name()
assert (
path_to_zip.exists()
), f"{dataset.get_registry_name()} does not exist after download."

# root folder where the downloaded files are
if dataset.is_zip:
folder_root = path / (dataset.get_registry_name() + ".unzip")
else:
folder_root = path

# check that the files exist and are in the returned list
# TODO: currently some files have hidden macOS files that need to be removed in the
# future
files_portfolio = [str(Path(folder_root, s)) for s in dataset.files]
for file in files_portfolio:
assert Path(file).name in [
f.name for f in list(Path(file).parent.rglob("*"))
], f"{file} does not exist."
assert Path(file).name in [
Path(f).name for f in files
], f"{file} not in downloaded files."

# check file size with a tolerance of 5% or 3MB
file_size = os.path.getsize(path_to_zip) / 1024 / 1024 # MB
abs_tolerance = max(0.05 * dataset.size, 3)
Expand Down

0 comments on commit 8ee2416

Please sign in to comment.