Skip to content

Commit

Permalink
speed up validation by 100x for large nodes (#173)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgeplf committed Nov 7, 2022
1 parent 47e1b06 commit bc950ad
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 30 deletions.
33 changes: 22 additions & 11 deletions bluepysnap/circuit_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
The idea here is to not depend on libsonata if possible, so we can use this in all situations
"""
import itertools as it
import logging
from pathlib import Path

Expand Down Expand Up @@ -94,14 +93,21 @@ def _check_files(name, files, level):
Returns:
list: List of errors, empty if no errors
"""
missing = sorted({f for f in files if not f.is_file()})
files = set(files)
missing = []
for f in sorted(files):
if not f.is_file():
missing.append(f)

if len(missing) >= MAX_MISSING_FILES_DISPLAY:
break

if missing:
examples = [e.name for e in it.islice(missing, MAX_MISSING_FILES_DISPLAY)]
if len(missing) > MAX_MISSING_FILES_DISPLAY:
examples.append("...")
filenames = "".join(f"\t{m.name}\n" for m in missing)

filenames = "".join(f"\t{e}\n" for e in examples)
return [Error(level, f"missing {len(missing)} files in group {name}:\n{filenames}")]
return [
Error(level, f"missing at least {len(missing)} files in group {name}:\n{filenames}")
]

return []

Expand Down Expand Up @@ -177,7 +183,7 @@ def _nodes_group_to_dataframe(group, population):
pd.DataFrame: dataframe with all group attributes
"""
# TODO: remove multi-indexing (BBP only supports group '0')
df = pd.DataFrame(population["node_type_id"], columns=["type_id"])
df = pd.DataFrame(population["node_type_id"][:], columns=["type_id"])
size = df.size
df["id"] = population["node_id"] if "node_id" in population else np.arange(size)
df["group_id"] = population["node_group_id"] if "node_group_id" in population else 0
Expand All @@ -187,6 +193,8 @@ def _nodes_group_to_dataframe(group, population):
df = df[df["group_id"] == int(str(_get_group_name(group)))]

for k, v in group.items():
if k == "@library":
continue
if isinstance(v, h5py.Dataset):
if v.dtype == h5py.string_dtype():
df[k] = v.asstr()[:]
Expand All @@ -196,7 +204,7 @@ def _nodes_group_to_dataframe(group, population):
if "@library" in group:
for k, v in group["@library"].items():
if isinstance(v, h5py.Dataset):
df[k] = v.asstr()[:][df[k].to_numpy(dtype=int)]
df[k] = pd.Categorical.from_codes(df[k], categories=v.asstr()[:])

return df

Expand Down Expand Up @@ -249,7 +257,7 @@ def _check_bio_nodes_group(group_df, group, population, population_name):

errors += _check_files(
f"morphology: {group_name}[{group.file.filename}]",
(Path(morph_path, m + "." + extension) for m in group_df["morphology"]),
(Path(morph_path, m + "." + extension) for m in group_df["morphology"].unique()),
Error.WARNING,
)

Expand All @@ -260,7 +268,10 @@ def _check_bio_nodes_group(group_df, group, population, population_name):
L.debug("Checking neuron model files: %s", bio_path)
errors += _check_files(
f"model_template: {group_name}[{group.file.filename}]",
(bio_path / _get_model_template_file(m) for m in group_df.get("model_template", [])),
(
bio_path / _get_model_template_file(m)
for m in group_df.get("model_template", pd.Series(dtype="object")).unique()
),
Error.WARNING,
)
else:
Expand Down
26 changes: 7 additions & 19 deletions tests/test_circuit_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,19 +327,7 @@ def test_no_morph_files():
assert errors == {
Error(
Error.WARNING,
f"missing 1 files in group morphology: default/0[{nodes_file}]:\n\tnoname.swc\n",
)
}

with h5py.File(nodes_file, "r+") as h5f:
morph = h5f["nodes/default/0/morphology"]
morph[:] = ["noname" + str(i) for i in range(len(morph))]
errors = validate(str(config_copy_path))
assert errors == {
Error(
Error.WARNING,
f"missing 3 files in group morphology: default/0[{nodes_file}]:"
"\n\tnoname0.swc\n\t...\n",
f"missing at least 1 files in group morphology: default/0[{nodes_file}]:\n\tnoname.swc\n",
)
}

Expand All @@ -355,7 +343,7 @@ def test_no_alternate_morph_files():
assert errors == {
Error(
Error.WARNING,
f"missing 1 files in group morphology: default/0[{nodes_file}]:\n\tmorph-A.asc\n",
f"missing at least 1 files in group morphology: default/0[{nodes_file}]:\n\tmorph-A.asc\n",
)
}

Expand All @@ -371,12 +359,12 @@ def test_no_morph_library_files():
grp["@library/morphology"][:] = "noname"
shape = grp["morphology"].shape
del grp["morphology"]
grp.create_dataset("morphology", shape=shape, fillvalue=0)
grp.create_dataset("morphology", shape=shape, fillvalue=0, dtype=int)
errors = validate(str(config_copy_path))
assert errors == {
Error(
Error.WARNING,
f"missing 1 files in group morphology: default/0[{nodes_file}]:\n\tnoname.swc\n",
f"missing at least 1 files in group morphology: default/0[{nodes_file}]:\n\tnoname.swc\n",
)
}

Expand All @@ -390,7 +378,7 @@ def test_no_template_files():
assert errors == {
Error(
Error.WARNING,
f"missing 1 files in group model_template: default/0[{nodes_file}]:\n\tnoname.hoc\n",
f"missing at least 1 files in group model_template: default/0[{nodes_file}]:\n\tnoname.hoc\n",
)
}

Expand All @@ -406,12 +394,12 @@ def test_no_template_library_files():
grp["@library/model_template"][:] = "hoc:noname"
shape = grp["model_template"].shape
del grp["model_template"]
grp.create_dataset("model_template", shape=shape, fillvalue=0)
grp.create_dataset("model_template", shape=shape, fillvalue=0, dtype=int)
errors = validate(str(config_copy_path))
assert errors == {
Error(
Error.WARNING,
f"missing 1 files in group model_template: default/0[{nodes_file}]:\n\tnoname.hoc\n",
f"missing at least 1 files in group model_template: default/0[{nodes_file}]:\n\tnoname.hoc\n",
)
}

Expand Down

0 comments on commit bc950ad

Please sign in to comment.