Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

drop '.dat' support #252

Merged
merged 8 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Improvements

- the types conform to `node types <https://sonata-extension.readthedocs.io/en/latest/sonata_config.html#populations>`_ and `edge types <https://sonata-extension.readthedocs.io/en/latest/sonata_config.html#id4>`_ defined in the sonata specification
- teach the `bluepysnap validate-circuit` and `bluepysnap validate-simulation` the ability to `--ignore-datatype-errors` so that mismatches of datatypes to the specification are ignored
- Update simulation validation to conform to the SONATA spec

- ``synapse_replay.source`` and ``.dat`` spike input files are no longer supported


Version v3.0.1
Expand Down
2 changes: 0 additions & 2 deletions bluepysnap/schemas/definitions/simulation_input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ $input_defs:
type: number
spike_file:
type: string
source:
type: string
tau:
type: number
variance:
Expand Down
52 changes: 25 additions & 27 deletions bluepysnap/simulation_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import h5py
import libsonata
import numpy as np
import pandas as pd

from bluepysnap import schemas
from bluepysnap.circuit_ids import CircuitNodeIds
Expand Down Expand Up @@ -219,15 +218,12 @@ def _get_ids_from_spike_file(file_):
"""Get unique gids from an input spikes file."""
file_ = Path(file_)
suffix = file_.suffix
if suffix == ".dat":
spikes = pd.read_csv(file_, delimiter=r"\s+", skiprows=1, header=None, names=["t", "id"])
return set(spikes["id"].values - 1)
elif suffix == ".h5":
if suffix == ".h5":
spikes = libsonata.SpikeReader(file_)
populations = spikes.get_population_names()
return {pop: set(spikes[pop].get_dict()["node_ids"]) for pop in populations}

raise IOError(f"Unknown file type: '{suffix}' (supported: '.h5', '.dat')")
raise IOError(f"Unsupported file type: '{suffix}' (supported: '.h5')")


def _get_ids_from_node_set(node_set, config):
Expand All @@ -244,6 +240,19 @@ def _get_ids_from_node_set(node_set, config):
return ids_per_population


def _get_ids_from_populations(config, only_non_virtual=False):
"""Get node ids of populations."""
circuit = libsonata.CircuitConfig.from_file(config["_circuit_config"])
populations = circuit.node_populations

if only_non_virtual:
populations = [
pop for pop in populations if circuit.node_population_properties(pop).type != "virtual"
]

return {pop: circuit.node_population(pop).select_all().flatten() for pop in populations}


def _get_missing_ids(sub_ids, super_ids):
"""Get `sub_ids` ids missing from `super_ids`."""
if isinstance(sub_ids, set):
Expand Down Expand Up @@ -277,26 +286,26 @@ def _validate_spike_file_contents(input_, config, prefix):
except IOError as e:
return [BluepySnapValidationError.fatal(f"{prefix}: {' '.join(map(str,e.args))}")]

nodeset_ids = _get_ids_from_node_set(input_["source"], config)
source = f"node set '{input_['source']}'"
if nodeset := input_.get("source"):
sim_ids = _get_ids_from_node_set(nodeset, config)
source = f"node set '{nodeset}'"
else:
sim_ids = _get_ids_from_populations(config)
source = "node populations"

return _compare_ids(spike_ids, nodeset_ids, source, prefix)
return _compare_ids(spike_ids, sim_ids, source, prefix)


def _validate_spike_input(name, input_, config):
errors = []

if (key := "source") in input_:
prefix = f"inputs.{name}.{key}"
errors += _validate_node_set_exists(config, input_[key], prefix=prefix)

if (key := "spike_file") in input_:
spike_path = _resolve_path(input_[key], config)

prefix = f"inputs.{name}.{key}"
errors += _validate_file_exists(spike_path, prefix=prefix)

if len(errors) > 0 or "source" not in input_ or not _file_exists(config["_circuit_config"]):
if len(errors) > 0 or not _file_exists(config["_circuit_config"]):
errors += [BluepySnapValidationError.fatal(f"{prefix}: Can not validate file contents")]
else:
errors += _validate_spike_file_contents(input_, config, prefix)
Expand Down Expand Up @@ -429,17 +438,6 @@ def validate_reports(config):
return errors


def _get_ids_from_non_virtual_pops(config):
"""Get ids of all non-virtual populations."""
circuit = libsonata.CircuitConfig.from_file(config["_circuit_config"])

return {
pop: circuit.node_population(pop).select_all().flatten()
for pop in circuit.node_populations
if circuit.node_population_properties(pop).type != "virtual"
}


def _validate_electrodes_file(path, config):
"""Validate the ids for each of the populations in `electrodes_file` can be found."""
prefix = "run.electrodes_file"
Expand All @@ -458,8 +456,8 @@ def _validate_electrodes_file(path, config):
source = f"node set '{node_set}'"
sim_ids = _get_ids_from_node_set(node_set, config)
else:
source = "non-virtual populations"
sim_ids = _get_ids_from_non_virtual_pops(config)
source = "non-virtual node populations"
sim_ids = _get_ids_from_populations(config, only_non_virtual=True)

return _compare_ids(elec_ids, sim_ids, source, prefix)

Expand Down
145 changes: 79 additions & 66 deletions tests/test_simulation_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from unittest.mock import MagicMock, Mock, call, patch

import numpy.testing as npt
import pandas as pd
import pytest

import bluepysnap.simulation_validation as test_module
Expand Down Expand Up @@ -294,16 +293,11 @@ def test_validate_connection_overrides(mock_validate_override):


def test__get_ids_from_spike_file(tmp_path):
spike_path = tmp_path / "spikes.dat"
pd.DataFrame({"/scatter": [1]}).to_csv(spike_path, sep="\t")

assert test_module._get_ids_from_spike_file(spike_path) == {0}

spike_path = TEST_DATA_DIR / "input_spikes.h5"
assert test_module._get_ids_from_spike_file(spike_path) == {"default": {0}}

with pytest.raises(IOError, match=r"Unknown file type: '.fake' \(supported: '.h5', '.dat'\)"):
test_module._get_ids_from_spike_file("fake_spikes.fake")
with pytest.raises(IOError, match=r"Unsupported file type: '.dat' \(supported: '.h5'\)"):
test_module._get_ids_from_spike_file(tmp_path / "spikes.dat")


def test__get_ids_from_node_set():
Expand All @@ -323,6 +317,22 @@ def test__get_ids_from_node_set():
assert test_module._get_ids_from_node_set("fake_node_set", config) == {}


def test__get_ids_from_populations():
with copy_test_data() as (_, config_path):
with edit_config(config_path) as circuit_config:
circuit_config["networks"]["nodes"][0]["populations"]["default2"]["type"] = "virtual"

config = {"_circuit_config": TEST_DATA_DIR / "circuit_config.json"}
res = test_module._get_ids_from_populations(config)
expected = {"default": [0, 1, 2], "default2": [0, 1, 2, 3]}
npt.assert_equal(res, expected)

config = {"_circuit_config": config_path}
res = test_module._get_ids_from_populations(config, only_non_virtual=True)
expected = {"default": [0, 1, 2]}
npt.assert_equal(res, expected)


def test__get_missing_ids():
nodeset_ids = {"test": [1, 2, 3], "test2": [4, 5]}
spike_ids_from_dat = {1, 2, 3, 4, 5}
Expand Down Expand Up @@ -367,45 +377,66 @@ def test__compare_ids(mock_missing_ids):
assert test_module._compare_ids(None, None, source, prefix) == expected


@patch.object(test_module, "_get_ids_from_node_set", new=Mock())
@patch.object(test_module, "_resolve_path", new=Mock())
@patch.object(test_module, "_get_ids_from_spike_file")
@patch.object(test_module, "_get_missing_ids")
def test__validate_spike_file_contents(mock_missing_ids, mock_ids_from_spikes):
input_config = {"source": "fake_node_set", "spike_file": "fake_spikes.h5"}

mock_missing_ids.return_value = []
res = test_module._validate_spike_file_contents(input_config, config=None, prefix="")
expected = []
assert res == expected

mock_missing_ids.return_value = [0, 1, 2]
res = test_module._validate_spike_file_contents(input_config, config=None, prefix="fake_prefix")
msg = "fake_prefix: 3 id(s) not found in node set 'fake_node_set': 0, 1, 2"
expected = [BluepySnapValidationError.fatal(msg)]
assert res == expected

mock_missing_ids.return_value = [("fake_population", id_) for id_ in [0, 1, 2]]
res = test_module._validate_spike_file_contents(input_config, config=None, prefix="fake_prefix")
msg = (
"fake_prefix: 3 id(s) not found in node set 'fake_node_set': "
"('fake_population', 0), ('fake_population', 1), ('fake_population', 2)"
)
expected = [BluepySnapValidationError.fatal(msg)]
assert res == expected
@pytest.mark.parametrize(
"input_config,expected_message,spike_ids_side_effect",
[
[{"spike_file": "fake_spikes.h5"}, None, lambda *_: {0, 1}],
[
{"source": "fake_node_set", "spike_file": "fake_spikes.h5"},
"3 id(s) not found in node set 'fake_node_set': 5, 6, 7",
lambda *_: {5, 6, 7},
],
[
{"spike_file": "fake_spikes.h5"},
(
"3 id(s) not found in node populations: "
"('fake_population', 0), ('fake_population', 1), ('fake_population', 2)"
),
lambda *_: {"fake_population": [0, 1, 2]},
],
[{"source": "fake_node_set", "spike_file": "fake_spikes.h5"}, None, lambda *_: {0, 1}],
[
{"spike_file": "fake_spikes.h5"},
"3 id(s) not found in node populations: 5, 6, 7",
lambda *_: {5, 6, 7},
],
[
{"source": "fake_node_set", "spike_file": "fake_spikes.h5"},
(
"3 id(s) not found in node set 'fake_node_set': "
"('fake_population', 0), ('fake_population', 1), ('fake_population', 2)"
),
lambda *_: {"fake_population": [0, 1, 2]},
],
[{"spike_file": "fake_spikes.h5"}, "Unknown IOError", IOError("Unknown", "IOError")],
],
)
def test__validate_spike_file_contents(
mock_ids_from_spikes, input_config, expected_message, spike_ids_side_effect
):
config = {
"_config_dir": TEST_DATA_DIR,
"_circuit_config": TEST_DATA_DIR / "circuit_config.json",
"_node_sets_instance": NodeSets.from_dict(
{"fake_node_set": {"population": ["default"], "node_id": [0, 1]}}
),
}
prefix = "fake"
mock_ids_from_spikes.side_effect = spike_ids_side_effect
if expected_message is not None:
expected = [BluepySnapValidationError.fatal(f"{prefix}: {expected_message}")]
else:
expected = []
res = test_module._validate_spike_file_contents(input_config, config, prefix)

mock_ids_from_spikes.side_effect = IOError("Unknown", "IOError")
res = test_module._validate_spike_file_contents(input_config, config=None, prefix="fake_prefix")
msg = "fake_prefix: Unknown IOError"
expected = [BluepySnapValidationError.fatal(msg)]
assert res == expected


def test__validate_spike_input():
node_sets = NodeSets.from_dict({"fake_node_set": {"node_id": [0]}})

input_config = {
"source": "fake_node_set",
"spike_file": TEST_DATA_DIR / "input_spikes.h5",
}
config = {
Expand All @@ -421,7 +452,6 @@ def test__validate_spike_input():
}

expected_error_messages = [
"inputs.test.source: Unknown node set: 'fail_node_set'",
f"inputs.test.spike_file: No such file: {input_config['spike_file']}",
"inputs.test.spike_file: Can not validate file contents",
]
Expand Down Expand Up @@ -535,16 +565,14 @@ def test_validate_inputs():
"pass_3": {"module": "not_synapse_replay", "source": "fail_node_set"},
"pass_4": {"module": "not_synapse_replay", "spike_file": fail_spike_file},
"fail_0": {"module": "test_module", "node_set": "fail_node_set"},
"fail_1": {"module": "synapse_replay", "source": "fail_node_set"},
"fail_2": {"module": "synapse_replay", "spike_file": fail_spike_file},
"fail_1": {"module": "synapse_replay", "spike_file": fail_spike_file},
},
}

expected_error_messages = [
"inputs.fail_0.node_set: Unknown node set: 'fail_node_set'",
"inputs.fail_1.source: Unknown node set: 'fail_node_set'",
f"inputs.fail_2.spike_file: No such file: {fail_spike_file}",
f"inputs.fail_2.spike_file: Can not validate file contents",
f"inputs.fail_1.spike_file: No such file: {fail_spike_file}",
f"inputs.fail_1.spike_file: Can not validate file contents",
]

expected = [BluepySnapValidationError.fatal(msg) for msg in expected_error_messages]
Expand Down Expand Up @@ -760,27 +788,12 @@ def test_validate_reports(tmp_path):
assert test_module.validate_reports(config) == expected


def test__get_ids_from_non_virtual_pops():
config = {"_circuit_config": TEST_DATA_DIR / "circuit_config.json"}
res = test_module._get_ids_from_non_virtual_pops(config)
expected = {"default": [0, 1, 2], "default2": [0, 1, 2, 3]}
npt.assert_equal(res, expected)

with copy_test_data() as (_, config_path):
with edit_config(config_path) as circuit_config:
circuit_config["networks"]["nodes"][0]["populations"]["default2"]["type"] = "virtual"

config = {"_circuit_config": config_path}
res = test_module._get_ids_from_non_virtual_pops(config)
expected = {"default": [0, 1, 2]}
npt.assert_equal(res, expected)


def test__validate_electrodes_file():
path = "./fake_path"
prefix = "run.electrodes_file"
expected = [
BluepySnapValidationError.fatal(f"run.electrodes_file: No such file: {TEST_DATA_DIR/path}"),
BluepySnapValidationError.fatal(f"run.electrodes_file: Can not validate file contents"),
BluepySnapValidationError.fatal(f"{prefix}: No such file: {TEST_DATA_DIR/path}"),
BluepySnapValidationError.fatal(f"{prefix}: Can not validate file contents"),
]
config = {"run": {"electrodes_file": path}, "_config_dir": TEST_DATA_DIR}
assert test_module.validate_run(config) == expected
Expand All @@ -793,9 +806,9 @@ def test__validate_electrodes_file():
}
assert test_module.validate_run(config) == []

with patch.object(test_module, "_get_ids_from_non_virtual_pops") as patched:
with patch.object(test_module, "_get_ids_from_populations") as patched:
patched.return_value = {"default": {0}}
msg = "run.electrodes_file: 1 id(s) not found in non-virtual populations: ('default', 1)"
msg = f"{prefix}: 1 id(s) not found in non-virtual node populations: ('default', 1)"
expected = [BluepySnapValidationError.fatal(msg)]
assert test_module.validate_run(config) == expected

Expand All @@ -804,12 +817,12 @@ def test__validate_electrodes_file():
assert test_module.validate_run(config) == []

config["node_set"] = "Layer23"
msg = "run.electrodes_file: 1 id(s) not found in node set 'Layer23': ('default', 1)"
msg = f"{prefix}: 1 id(s) not found in node set 'Layer23': ('default', 1)"
expected = [BluepySnapValidationError.fatal(msg)]
assert test_module.validate_run(config) == expected

config["_circuit_config"] = ""
msg = "run.electrodes_file: Can not validate file contents"
msg = f"{prefix}: Can not validate file contents"
expected = [BluepySnapValidationError.fatal(msg)]
assert test_module.validate_run(config) == expected

Expand Down