Skip to content

Commit

Permalink
fix dtypes in CircuitIds.index_schema and several deprecation warnings (
Browse files Browse the repository at this point in the history
#232)

* fix dtypes in CircuitIds.index_schema for pandas 2.1.0 (caused all dtypes to be objects)

* is_categorical_dtype -> isinstance(..., pd.CategoricalDtype)

* stop using jsonschema.validators.RefResolver

* fix future warning on concatenating empty dataframes

* fix deprecationwarning on pkg_resources

* Fix snap's own deprecation warnings

* require jsonschema>=4.18.0 and referencing>=0.28.4
  • Loading branch information
joni-herttuainen committed Sep 8, 2023
1 parent 28a8860 commit 38e1d28
Show file tree
Hide file tree
Showing 16 changed files with 67 additions and 99 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ Improvements
~~~~~~~~~~~~
- Node set resolution is done by libsonata
- Added kwarg: `raise_missing_property` to `NodePopulation.get`
- Undeprecated calling ``Edges.get`` and ``EdgePopulation.get`` with ``properties=None``

Breaking Changes
~~~~~~~~~~~~~~~~
- ``Circuit.node_sets``, ``Simulation.node_sets`` returns ``NodeSets`` object initialized with empty dict when node sets file is not present
- ``NodeSet.resolved`` is no longer available
- ``FrameReport.node_set`` returns node_set name instead of resolved node set query
- Removed ``Edges.properties``, ``EdgePopulation.properties`` that were already supposed to be removed in v1.0.0


Version v1.0.7
Expand Down
4 changes: 2 additions & 2 deletions bluepysnap/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _update_raster_properties():
if y_axis is None:
props["node_id_offset"] += spikes.nodes.size
props["pop_separators"].append(props["node_id_offset"])
elif pd.api.types.is_categorical_dtype(spikes.nodes.property_dtypes[y_axis]):
elif isinstance(spikes.nodes.property_dtypes[y_axis], pd.CategoricalDtype):
props["categorical_values"].update(spikes.nodes.property_values(y_axis))
else:
props["ymin"] = min(props["ymin"], spikes.nodes.get(properties=y_axis).min())
Expand All @@ -133,7 +133,7 @@ def _update_raster_properties():

# use np.int64 if displaying node_ids
dtype = spike_report[population_names[0]].nodes.property_dtypes[y_axis] if y_axis else IDS_DTYPE
if pd.api.types.is_categorical_dtype(dtype):
if isinstance(dtype, pd.CategoricalDtype):
# this is to prevent the problems when concatenating categoricals with unknown categories
dtype = str
data = pd.Series(index=report.index, dtype=dtype)
Expand Down
7 changes: 5 additions & 2 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ def __init__(self, index, sort_index=True):

@property
def index_schema(self):
"""Return an empty index with the same names of the wrapped index."""
return pd.MultiIndex.from_tuples([], names=self.index.names)
"""Return an empty index with the same names and dtypes of the wrapped index."""
# NOTE: Since pandas 2.1.0, the index needs to contain the explicit dtypes. In pd.concat,
# the dtypes of multi-index are coerced to 'object' if any dataframe has indices with
# dtype='object'
return self.index[:0]

@classmethod
def _instance(cls, index, sort_index=True):
Expand Down
16 changes: 1 addition & 15 deletions bluepysnap/edges/edge_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from bluepysnap.circuit_ids import CircuitEdgeId, CircuitEdgeIds
from bluepysnap.exceptions import BluepySnapError
from bluepysnap.sonata_constants import DYNAMICS_PREFIX, ConstContainer, Edge
from bluepysnap.utils import IDS_DTYPE, Deprecate
from bluepysnap.utils import IDS_DTYPE


def _is_empty(xs):
Expand Down Expand Up @@ -186,10 +186,6 @@ def _get(self, selection, properties=None):
"""Get an array of edge IDs or DataFrame with edge properties."""
edge_ids = utils.ensure_ids(selection.flatten())
if properties is None:
Deprecate.warn(
"Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
"Please use EdgePopulation.ids instead."
)
return edge_ids

if utils.is_iterable(properties):
Expand Down Expand Up @@ -312,16 +308,6 @@ def get(self, edge_ids, properties):
selection = libsonata.Selection(edge_ids)
return self._get(selection, properties)

def properties(self, edge_ids, properties):
"""Doc is overridden below."""
Deprecate.warn(
"EdgePopulation.properties function is deprecated and will be removed in 1.0.0. "
"Please use EdgePopulation.get instead."
)
return self.get(edge_ids, properties)

properties.__doc__ = get.__doc__

def positions(self, edge_ids, side, kind):
"""Edge positions as a pandas DataFrame.
Expand Down
15 changes: 0 additions & 15 deletions bluepysnap/edges/edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from bluepysnap.edges import EdgePopulation
from bluepysnap.exceptions import BluepySnapError
from bluepysnap.network import NetworkObject
from bluepysnap.utils import Deprecate


class Edges(
Expand Down Expand Up @@ -102,23 +101,9 @@ def get(self, edge_ids=None, properties=None): # pylint: disable=arguments-rena
if edge_ids is None:
raise BluepySnapError("You need to set edge_ids in get.")
if properties is None:
Deprecate.warn(
"Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
"Please use Edges.ids instead."
)
return edge_ids
return super().get(edge_ids, properties)

def properties(self, edge_ids, properties):
"""Doc is overridden below."""
Deprecate.warn(
"Edges.properties function is deprecated and will be removed in 1.0.0. "
"Please use Edges.get instead."
)
return self.get(edge_ids, properties)

properties.__doc__ = get.__doc__

def afferent_nodes(self, target, unique=True):
"""Get afferent CircuitNodeIDs for given target ``node_id``.
Expand Down
2 changes: 1 addition & 1 deletion bluepysnap/nodes/node_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def property_values(self, prop, is_present=False):
The is_present argument forces the unique() even on the categorical fields.
"""
res = self.get(properties=prop)
if pd.api.types.is_categorical_dtype(res) and not is_present:
if isinstance(res.dtype, pd.CategoricalDtype) and not is_present:
return set(res.cat.categories)
return set(res.unique())

Expand Down
29 changes: 19 additions & 10 deletions bluepysnap/schemas/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from pathlib import Path

import h5py
import importlib_resources
import jsonschema
import numpy as np
import pkg_resources
import referencing
import yaml

DEFINITIONS = "definitions"
Expand All @@ -13,11 +14,10 @@
def _load_schema_file(*args):
"""Load one of the predefined YAML schema files."""
filename = str(Path(*args).with_suffix(".yaml"))
filepath = pkg_resources.resource_filename(__name__, filename)
if not Path(filepath).is_file():
filepath = importlib_resources.files(__package__) / filename
if not filepath.is_file():
raise FileNotFoundError(f"Schema file {filepath} not found")
with open(filepath, encoding="utf-8") as fd:
return yaml.safe_load(fd)
return yaml.safe_load(filepath.read_text())


def _parse_path(path, join_str):
Expand Down Expand Up @@ -216,7 +216,7 @@ def _resolve_types(resolver, types):

def _resolve_type(type_):
if type_ not in cache:
type_ = resolver.resolve(type_)[1]["properties"]["datatype"]["const"]
type_ = resolver.lookup(type_).contents["properties"]["datatype"]["const"]
if hasattr(np, type_):
cache[type_] = getattr(np, type_)
elif type_ == "utf-8":
Expand All @@ -227,8 +227,17 @@ def _resolve_type(type_):
return {k: _resolve_type(v["$ref"]) for k, v in types.items()}


def _get_reference_resolver(schema):
"""Get reference resolver for the given schema."""
resource = referencing.Resource(
contents=schema,
specification=referencing.jsonschema.DRAFT202012,
)
return referencing.Registry().with_resource("", resource=resource).resolver()


def nodes_schema_types(nodes_type):
"""Get the datatypes of the attribute for nodes.
"""Get the datatypes of the attributes for nodes.
Args:
nodes_type (str): node type (e.g., "biophysical")
Expand All @@ -237,7 +246,7 @@ def nodes_schema_types(nodes_type):
dict: name -> type of column
"""
schema = _parse_schema("node", nodes_type)
resolver = jsonschema.validators.RefResolver("", schema)
resolver = _get_reference_resolver(schema)

schema = schema["$node_file_defs"]["nodes_file_root"]["properties"]["nodes"]
schema = schema["patternProperties"][""]["properties"]["0"]["properties"]
Expand All @@ -249,7 +258,7 @@ def nodes_schema_types(nodes_type):


def edges_schema_types(edges_type, virtual):
"""Get the datatypes of the attribute for nodes.
"""Get the datatypes of the attributes for edges.
Args:
edges_type (str): edges type (e.g., "chemical")
Expand All @@ -262,7 +271,7 @@ def edges_schema_types(edges_type, virtual):
edges_type += "_virtual"

schema = _parse_schema("edge", edges_type)
resolver = jsonschema.validators.RefResolver("", schema)
resolver = _get_reference_resolver(schema)

schema = schema["$edge_file_defs"]["edges_file_root"]["properties"]["edges"]
schema = schema["patternProperties"][""]["properties"]["0"]["properties"]
Expand Down
20 changes: 12 additions & 8 deletions bluepysnap/spike_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,22 @@ def report(self):
pandas.DataFrame: A DataFrame containing the data from the report. Row's indices are the
different timestamps and the columns are ids and population names.
"""
res = pd.DataFrame()
dfs = []
for population in self.spike_report.population_names:
spikes = self.spike_report[population]
try:
ids = spikes.nodes.ids(group=self.group)
except BluepySnapError:
continue
ids = spikes.nodes.ids(group=self.group, raise_missing_property=False)
data = spikes.get(group=ids, t_start=self.t_start, t_stop=self.t_stop).to_frame()
data["population"] = np.full(len(data), population)
res = pd.concat([res, data])
if not res.empty:
res["population"] = res["population"].astype("category")

dfs.append(data)

if all(df.empty for df in dfs):
res = dfs[0][:0]
else:
res = pd.concat([df for df in dfs if not df.empty])

res["population"] = res["population"].astype("category")

return res.sort_index()

# pylint: disable=protected-access
Expand Down
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

# -- Path setup --------------------------------------------------------------

from pkg_resources import get_distribution
import importlib

# -- Project information -----------------------------------------------------

project = "Blue Brain SNAP"
author = "Blue Brain Project, EPFL"

release = get_distribution("bluepysnap").version
release = importlib.metadata.distribution("bluepysnap").version
version = release

# -- General configuration ---------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def __init__(self, *args, **kwargs):
install_requires=[
"cached_property>=1.0",
"h5py>=3.0.1,<4.0.0",
"jsonschema>=4.0.0,<5.0.0",
"importlib_resources>=5.0.0",
"jsonschema>=4.18.0,<5.0.0",
"referencing>=0.28.4",
"libsonata>=0.1.21,<1.0.0",
"morphio>=3.0.0,<4.0.0",
"morph-tool>=2.4.3,<3.0.0",
Expand Down
6 changes: 6 additions & 0 deletions tests/test_circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ def test_init(self):

assert isinstance(self.test_obj_sorted, self.ids_cls)

def test_index_schema(self):
schema = self.test_obj_unsorted.index_schema
index = self.test_obj_unsorted.index
npt.assert_array_equal(schema.dtypes, index.dtypes)
npt.assert_array_equal(schema.names, index.names)

def test_from_arrays(self):
tested = self.ids_cls.from_arrays(["a", "b"], [0, 1])
pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "b"], [0, 1]))
Expand Down
21 changes: 2 additions & 19 deletions tests/test_edge_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,25 +244,12 @@ def test_get_4(self):
with pytest.raises(BluepySnapError):
self.test_obj.get([0], "no-such-property")

def test_get_without_properties_deprecated(self):
def test_get_without_properties(self):
edge_ids = [0, 1]
with pytest.deprecated_call(
match="Returning ids with get/properties is deprecated and will be removed in 1.0.0"
):
actual = self.test_obj.get(edge_ids, None)
actual = self.test_obj.get(edge_ids, None)
expected = np.asarray(edge_ids, dtype=np.int64)
npt.assert_equal(actual, expected)

def test_properties_deprecated(self):
ids = [0, 1, 2, 3]
properties = ["@target_node", "@source_node"]
with pytest.deprecated_call(
match="EdgePopulation.properties function is deprecated and will be removed in 1.0.0"
):
actual = self.test_obj.properties(ids, properties=properties)
expected = self.test_obj.get(ids, properties=properties)
pdt.assert_frame_equal(actual, expected, check_exact=False)

def test_get_all_edge_ids_types(self):
assert self.test_obj.get(0, Synapse.PRE_GID).tolist() == [2]
assert self.test_obj.get(np.int64(0), Synapse.PRE_GID).tolist() == [2]
Expand Down Expand Up @@ -290,10 +277,6 @@ def test_get_all_edge_ids_types(self):
== []
)

def test_get_no_properties(self):
with pytest.deprecated_call():
self.test_obj.get(0, properties=None)

def test_positions_1(self):
actual = self.test_obj.positions([0], "afferent", "center")
expected = pd.DataFrame(
Expand Down
22 changes: 1 addition & 21 deletions tests/test_edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,7 @@ def test_get(self):
self.test_obj.get(properties=["other2"])

ids = CircuitEdgeIds.from_dict({"default": [0, 1, 2, 3], "default2": [0, 1, 2, 3]})
with pytest.deprecated_call(
match="Returning ids with get/properties is deprecated and will be removed in 1.0.0"
):
tested = self.test_obj.get(ids, None)
tested = self.test_obj.get(ids, None)
assert tested == ids

tested = self.test_obj.get(ids, properties=self.test_obj.property_names)
Expand Down Expand Up @@ -403,23 +400,6 @@ def test_get(self):
with pytest.raises(BluepySnapError, match="Unknown properties required: {'unknown'}"):
self.test_obj.get(ids, properties="unknown")

with pytest.deprecated_call(
match=(
"Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
"Please use Edges.ids instead."
)
):
self.test_obj.get(ids)

def test_properties_deprecated(self):
ids = CircuitEdgeIds.from_dict({"default": [0, 1, 2, 3], "default2": [0, 1, 2, 3]})
with pytest.deprecated_call(
match="Edges.properties function is deprecated and will be removed in 1.0.0"
):
tested = self.test_obj.properties(ids, properties=["other2", "@source_node"])
expected = self.test_obj.get(ids, properties=["other2", "@source_node"])
pdt.assert_frame_equal(tested, expected, check_exact=False)

def test_afferent_nodes(self):
assert self.test_obj.afferent_nodes(0) == CircuitNodeIds.from_arrays(["default"], [2])
assert self.test_obj.afferent_nodes(np.int64(0)) == CircuitNodeIds.from_arrays(
Expand Down
5 changes: 2 additions & 3 deletions tests/test_node_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import pandas.testing as pdt
import pytest
from numpy import dtype
from pandas.api.types import is_categorical_dtype

from bluepysnap.bbp import Cell
from bluepysnap.circuit import Circuit
Expand Down Expand Up @@ -420,7 +419,7 @@ def test_get_with_library_small_number_of_values(self):
)
assert test_obj.property_names == {"categorical", "string", "int", "float"}
res = test_obj.get(properties=["categorical", "string", "int", "float"])
assert is_categorical_dtype(res["categorical"])
assert isinstance(res["categorical"].dtype, pd.CategoricalDtype)
assert res["categorical"].tolist() == ["A", "A", "B", "A", "A", "A", "A"]
assert res["categorical"].cat.categories.tolist() == ["A", "B", "C"]
assert res["categorical"].cat.codes.tolist() == [0, 0, 1, 0, 0, 0, 0]
Expand All @@ -434,7 +433,7 @@ def test_get_with_library_large_number_of_values(self):
)
assert test_obj.property_names == {"categorical", "string", "int", "float"}
res = test_obj.get(properties=["categorical", "string", "int", "float"])
assert not is_categorical_dtype(res["categorical"])
assert not isinstance(res["categorical"].dtype, pd.CategoricalDtype)
assert res["categorical"].tolist() == ["A", "A", "B", "A"]
assert res["string"].tolist() == ["AA", "BB", "CC", "DD"]
assert res["int"].tolist() == [0, 0, 1, 0]
Expand Down
7 changes: 7 additions & 0 deletions tests/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,13 @@ def test_wrong_datatype(field):
assert f"incorrect datatype 'int16' for '{field}'" in errors[0].message


def test__get_reference_resolver():
expected = {"const": "test_value"}
schema = {"$mock_reference": expected}
resolver = test_module._get_reference_resolver(schema)
assert resolver.lookup("#/$mock_reference").contents == expected


def test_nodes_schema_types():
property_types, dynamics_params = test_module.nodes_schema_types("biophysical")
assert "x" in property_types
Expand Down

0 comments on commit 38e1d28

Please sign in to comment.