fix dtypes in CircuitIds.index_schema and several deprecation warnings (

#232) * fix dtypes in CircuitIds.index_schema for pandas 2.1.0 (caused all dtypes to be objects) * is_categorical_dtype -> isinstance(..., pd.CategoricalDtype) * stop using jsonschema.validators.RefResolver * fix future warning on concatenating empty dataframes * fix deprecationwarning on pkg_resources * Fix snap's own deprecation warnings * require jsonschema>=4.18.0 and referencing>=0.28.4
BlueBrain · Sep 8, 2023 · 38e1d28 · 38e1d28
1 parent 28a8860
commit 38e1d28
Show file tree

Hide file tree

Showing 16 changed files with 67 additions and 99 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -12,12 +12,14 @@ Improvements
 ~~~~~~~~~~~~
 - Node set resolution is done by libsonata
 - Added kwarg: `raise_missing_property` to `NodePopulation.get`
+- Undeprecated calling ``Edges.get`` and ``EdgePopulation.get`` with ``properties=None``
 
 Breaking Changes
 ~~~~~~~~~~~~~~~~
 - ``Circuit.node_sets``, ``Simulation.node_sets`` returns ``NodeSets`` object initialized with empty dict when node sets file is not present
 - ``NodeSet.resolved`` is no longer available
 - ``FrameReport.node_set`` returns node_set name instead of resolved node set query
+- Removed ``Edges.properties``, ``EdgePopulation.properties`` that were already supposed to be removed in v1.0.0
 
 
 Version v1.0.7

diff --git a/bluepysnap/_plotting.py b/bluepysnap/_plotting.py
@@ -123,7 +123,7 @@ def _update_raster_properties():
         if y_axis is None:
             props["node_id_offset"] += spikes.nodes.size
             props["pop_separators"].append(props["node_id_offset"])
-        elif pd.api.types.is_categorical_dtype(spikes.nodes.property_dtypes[y_axis]):
+        elif isinstance(spikes.nodes.property_dtypes[y_axis], pd.CategoricalDtype):
             props["categorical_values"].update(spikes.nodes.property_values(y_axis))
         else:
             props["ymin"] = min(props["ymin"], spikes.nodes.get(properties=y_axis).min())
@@ -133,7 +133,7 @@ def _update_raster_properties():
 
     # use np.int64 if displaying node_ids
     dtype = spike_report[population_names[0]].nodes.property_dtypes[y_axis] if y_axis else IDS_DTYPE
-    if pd.api.types.is_categorical_dtype(dtype):
+    if isinstance(dtype, pd.CategoricalDtype):
         # this is to prevent the problems when concatenating categoricals with unknown categories
         dtype = str
     data = pd.Series(index=report.index, dtype=dtype)

diff --git a/bluepysnap/circuit_ids.py b/bluepysnap/circuit_ids.py
@@ -57,8 +57,11 @@ def __init__(self, index, sort_index=True):
 
     @property
     def index_schema(self):
-        """Return an empty index with the same names of the wrapped index."""
-        return pd.MultiIndex.from_tuples([], names=self.index.names)
+        """Return an empty index with the same names and dtypes of the wrapped index."""
+        # NOTE: Since pandas 2.1.0, the index needs to contain the explicit dtypes. In pd.concat,
+        # the dtypes of multi-index are coerced to 'object' if any dataframe has indices with
+        # dtype='object'
+        return self.index[:0]
 
     @classmethod
     def _instance(cls, index, sort_index=True):

diff --git a/bluepysnap/edges/edge_population.py b/bluepysnap/edges/edge_population.py
@@ -29,7 +29,7 @@
 from bluepysnap.circuit_ids import CircuitEdgeId, CircuitEdgeIds
 from bluepysnap.exceptions import BluepySnapError
 from bluepysnap.sonata_constants import DYNAMICS_PREFIX, ConstContainer, Edge
-from bluepysnap.utils import IDS_DTYPE, Deprecate
+from bluepysnap.utils import IDS_DTYPE
 
 
 def _is_empty(xs):
@@ -186,10 +186,6 @@ def _get(self, selection, properties=None):
         """Get an array of edge IDs or DataFrame with edge properties."""
         edge_ids = utils.ensure_ids(selection.flatten())
         if properties is None:
-            Deprecate.warn(
-                "Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
-                "Please use EdgePopulation.ids instead."
-            )
             return edge_ids
 
         if utils.is_iterable(properties):
@@ -312,16 +308,6 @@ def get(self, edge_ids, properties):
         selection = libsonata.Selection(edge_ids)
         return self._get(selection, properties)
 
-    def properties(self, edge_ids, properties):
-        """Doc is overridden below."""
-        Deprecate.warn(
-            "EdgePopulation.properties function is deprecated and will be removed in 1.0.0. "
-            "Please use EdgePopulation.get instead."
-        )
-        return self.get(edge_ids, properties)
-
-    properties.__doc__ = get.__doc__
-
     def positions(self, edge_ids, side, kind):
         """Edge positions as a pandas DataFrame.
 

diff --git a/bluepysnap/edges/edges.py b/bluepysnap/edges/edges.py
@@ -24,7 +24,6 @@
 from bluepysnap.edges import EdgePopulation
 from bluepysnap.exceptions import BluepySnapError
 from bluepysnap.network import NetworkObject
-from bluepysnap.utils import Deprecate
 
 
 class Edges(
@@ -102,23 +101,9 @@ def get(self, edge_ids=None, properties=None):  # pylint: disable=arguments-rena
         if edge_ids is None:
             raise BluepySnapError("You need to set edge_ids in get.")
         if properties is None:
-            Deprecate.warn(
-                "Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
-                "Please use Edges.ids instead."
-            )
             return edge_ids
         return super().get(edge_ids, properties)
 
-    def properties(self, edge_ids, properties):
-        """Doc is overridden below."""
-        Deprecate.warn(
-            "Edges.properties function is deprecated and will be removed in 1.0.0. "
-            "Please use Edges.get instead."
-        )
-        return self.get(edge_ids, properties)
-
-    properties.__doc__ = get.__doc__
-
     def afferent_nodes(self, target, unique=True):
         """Get afferent CircuitNodeIDs for given target ``node_id``.
 

diff --git a/bluepysnap/nodes/node_population.py b/bluepysnap/nodes/node_population.py
@@ -307,7 +307,7 @@ def property_values(self, prop, is_present=False):
             The is_present argument forces the unique() even on the categorical fields.
         """
         res = self.get(properties=prop)
-        if pd.api.types.is_categorical_dtype(res) and not is_present:
+        if isinstance(res.dtype, pd.CategoricalDtype) and not is_present:
             return set(res.cat.categories)
         return set(res.unique())
 

diff --git a/bluepysnap/schemas/schemas.py b/bluepysnap/schemas/schemas.py
@@ -2,9 +2,10 @@
 from pathlib import Path
 
 import h5py
+import importlib_resources
 import jsonschema
 import numpy as np
-import pkg_resources
+import referencing
 import yaml
 
 DEFINITIONS = "definitions"
@@ -13,11 +14,10 @@
 def _load_schema_file(*args):
     """Load one of the predefined YAML schema files."""
     filename = str(Path(*args).with_suffix(".yaml"))
-    filepath = pkg_resources.resource_filename(__name__, filename)
-    if not Path(filepath).is_file():
+    filepath = importlib_resources.files(__package__) / filename
+    if not filepath.is_file():
         raise FileNotFoundError(f"Schema file {filepath} not found")
-    with open(filepath, encoding="utf-8") as fd:
-        return yaml.safe_load(fd)
+    return yaml.safe_load(filepath.read_text())
 
 
 def _parse_path(path, join_str):
@@ -216,7 +216,7 @@ def _resolve_types(resolver, types):
 
     def _resolve_type(type_):
         if type_ not in cache:
-            type_ = resolver.resolve(type_)[1]["properties"]["datatype"]["const"]
+            type_ = resolver.lookup(type_).contents["properties"]["datatype"]["const"]
             if hasattr(np, type_):
                 cache[type_] = getattr(np, type_)
             elif type_ == "utf-8":
@@ -227,8 +227,17 @@ def _resolve_type(type_):
     return {k: _resolve_type(v["$ref"]) for k, v in types.items()}
 
 
+def _get_reference_resolver(schema):
+    """Get reference resolver for the given schema."""
+    resource = referencing.Resource(
+        contents=schema,
+        specification=referencing.jsonschema.DRAFT202012,
+    )
+    return referencing.Registry().with_resource("", resource=resource).resolver()
+
+
 def nodes_schema_types(nodes_type):
-    """Get the datatypes of the attribute for nodes.
+    """Get the datatypes of the attributes for nodes.
 
     Args:
         nodes_type (str): node type (e.g., "biophysical")
@@ -237,7 +246,7 @@ def nodes_schema_types(nodes_type):
         dict: name -> type of column
     """
     schema = _parse_schema("node", nodes_type)
-    resolver = jsonschema.validators.RefResolver("", schema)
+    resolver = _get_reference_resolver(schema)
 
     schema = schema["$node_file_defs"]["nodes_file_root"]["properties"]["nodes"]
     schema = schema["patternProperties"][""]["properties"]["0"]["properties"]
@@ -249,7 +258,7 @@ def nodes_schema_types(nodes_type):
 
 
 def edges_schema_types(edges_type, virtual):
-    """Get the datatypes of the attribute for nodes.
+    """Get the datatypes of the attributes for edges.
 
     Args:
         edges_type (str): edges type (e.g., "chemical")
@@ -262,7 +271,7 @@ def edges_schema_types(edges_type, virtual):
         edges_type += "_virtual"
 
     schema = _parse_schema("edge", edges_type)
-    resolver = jsonschema.validators.RefResolver("", schema)
+    resolver = _get_reference_resolver(schema)
 
     schema = schema["$edge_file_defs"]["edges_file_root"]["properties"]["edges"]
     schema = schema["patternProperties"][""]["properties"]["0"]["properties"]

diff --git a/bluepysnap/spike_report.py b/bluepysnap/spike_report.py
@@ -158,18 +158,22 @@ def report(self):
             pandas.DataFrame: A DataFrame containing the data from the report. Row's indices are the
             different timestamps and the columns are ids and population names.
         """
-        res = pd.DataFrame()
+        dfs = []
         for population in self.spike_report.population_names:
             spikes = self.spike_report[population]
-            try:
-                ids = spikes.nodes.ids(group=self.group)
-            except BluepySnapError:
-                continue
+            ids = spikes.nodes.ids(group=self.group, raise_missing_property=False)
             data = spikes.get(group=ids, t_start=self.t_start, t_stop=self.t_stop).to_frame()
             data["population"] = np.full(len(data), population)
-            res = pd.concat([res, data])
-        if not res.empty:
-            res["population"] = res["population"].astype("category")
+
+            dfs.append(data)
+
+        if all(df.empty for df in dfs):
+            res = dfs[0][:0]
+        else:
+            res = pd.concat([df for df in dfs if not df.empty])
+
+        res["population"] = res["population"].astype("category")
+
         return res.sort_index()
 
     # pylint: disable=protected-access

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -6,14 +6,14 @@
 
 # -- Path setup --------------------------------------------------------------
 
-from pkg_resources import get_distribution
+import importlib
 
 # -- Project information -----------------------------------------------------
 
 project = "Blue Brain SNAP"
 author = "Blue Brain Project, EPFL"
 
-release = get_distribution("bluepysnap").version
+release = importlib.metadata.distribution("bluepysnap").version
 version = release
 
 # -- General configuration ---------------------------------------------------

diff --git a/setup.py b/setup.py
@@ -45,7 +45,9 @@ def __init__(self, *args, **kwargs):
     install_requires=[
         "cached_property>=1.0",
         "h5py>=3.0.1,<4.0.0",
-        "jsonschema>=4.0.0,<5.0.0",
+        "importlib_resources>=5.0.0",
+        "jsonschema>=4.18.0,<5.0.0",
+        "referencing>=0.28.4",
         "libsonata>=0.1.21,<1.0.0",
         "morphio>=3.0.0,<4.0.0",
         "morph-tool>=2.4.3,<3.0.0",

diff --git a/tests/test_circuit_ids.py b/tests/test_circuit_ids.py
@@ -81,6 +81,12 @@ def test_init(self):
 
         assert isinstance(self.test_obj_sorted, self.ids_cls)
 
+    def test_index_schema(self):
+        schema = self.test_obj_unsorted.index_schema
+        index = self.test_obj_unsorted.index
+        npt.assert_array_equal(schema.dtypes, index.dtypes)
+        npt.assert_array_equal(schema.names, index.names)
+
     def test_from_arrays(self):
         tested = self.ids_cls.from_arrays(["a", "b"], [0, 1])
         pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "b"], [0, 1]))

diff --git a/tests/test_edge_population.py b/tests/test_edge_population.py
@@ -244,25 +244,12 @@ def test_get_4(self):
         with pytest.raises(BluepySnapError):
             self.test_obj.get([0], "no-such-property")
 
-    def test_get_without_properties_deprecated(self):
+    def test_get_without_properties(self):
         edge_ids = [0, 1]
-        with pytest.deprecated_call(
-            match="Returning ids with get/properties is deprecated and will be removed in 1.0.0"
-        ):
-            actual = self.test_obj.get(edge_ids, None)
+        actual = self.test_obj.get(edge_ids, None)
         expected = np.asarray(edge_ids, dtype=np.int64)
         npt.assert_equal(actual, expected)
 
-    def test_properties_deprecated(self):
-        ids = [0, 1, 2, 3]
-        properties = ["@target_node", "@source_node"]
-        with pytest.deprecated_call(
-            match="EdgePopulation.properties function is deprecated and will be removed in 1.0.0"
-        ):
-            actual = self.test_obj.properties(ids, properties=properties)
-        expected = self.test_obj.get(ids, properties=properties)
-        pdt.assert_frame_equal(actual, expected, check_exact=False)
-
     def test_get_all_edge_ids_types(self):
         assert self.test_obj.get(0, Synapse.PRE_GID).tolist() == [2]
         assert self.test_obj.get(np.int64(0), Synapse.PRE_GID).tolist() == [2]
@@ -290,10 +277,6 @@ def test_get_all_edge_ids_types(self):
             == []
         )
 
-    def test_get_no_properties(self):
-        with pytest.deprecated_call():
-            self.test_obj.get(0, properties=None)
-
     def test_positions_1(self):
         actual = self.test_obj.positions([0], "afferent", "center")
         expected = pd.DataFrame(

diff --git a/tests/test_edges.py b/tests/test_edges.py
@@ -262,10 +262,7 @@ def test_get(self):
             self.test_obj.get(properties=["other2"])
 
         ids = CircuitEdgeIds.from_dict({"default": [0, 1, 2, 3], "default2": [0, 1, 2, 3]})
-        with pytest.deprecated_call(
-            match="Returning ids with get/properties is deprecated and will be removed in 1.0.0"
-        ):
-            tested = self.test_obj.get(ids, None)
+        tested = self.test_obj.get(ids, None)
         assert tested == ids
 
         tested = self.test_obj.get(ids, properties=self.test_obj.property_names)
@@ -403,23 +400,6 @@ def test_get(self):
         with pytest.raises(BluepySnapError, match="Unknown properties required: {'unknown'}"):
             self.test_obj.get(ids, properties="unknown")
 
-        with pytest.deprecated_call(
-            match=(
-                "Returning ids with get/properties is deprecated and will be removed in 1.0.0. "
-                "Please use Edges.ids instead."
-            )
-        ):
-            self.test_obj.get(ids)
-
-    def test_properties_deprecated(self):
-        ids = CircuitEdgeIds.from_dict({"default": [0, 1, 2, 3], "default2": [0, 1, 2, 3]})
-        with pytest.deprecated_call(
-            match="Edges.properties function is deprecated and will be removed in 1.0.0"
-        ):
-            tested = self.test_obj.properties(ids, properties=["other2", "@source_node"])
-        expected = self.test_obj.get(ids, properties=["other2", "@source_node"])
-        pdt.assert_frame_equal(tested, expected, check_exact=False)
-
     def test_afferent_nodes(self):
         assert self.test_obj.afferent_nodes(0) == CircuitNodeIds.from_arrays(["default"], [2])
         assert self.test_obj.afferent_nodes(np.int64(0)) == CircuitNodeIds.from_arrays(

diff --git a/tests/test_node_population.py b/tests/test_node_population.py
@@ -11,7 +11,6 @@
 import pandas.testing as pdt
 import pytest
 from numpy import dtype
-from pandas.api.types import is_categorical_dtype
 
 from bluepysnap.bbp import Cell
 from bluepysnap.circuit import Circuit
@@ -420,7 +419,7 @@ def test_get_with_library_small_number_of_values(self):
         )
         assert test_obj.property_names == {"categorical", "string", "int", "float"}
         res = test_obj.get(properties=["categorical", "string", "int", "float"])
-        assert is_categorical_dtype(res["categorical"])
+        assert isinstance(res["categorical"].dtype, pd.CategoricalDtype)
         assert res["categorical"].tolist() == ["A", "A", "B", "A", "A", "A", "A"]
         assert res["categorical"].cat.categories.tolist() == ["A", "B", "C"]
         assert res["categorical"].cat.codes.tolist() == [0, 0, 1, 0, 0, 0, 0]
@@ -434,7 +433,7 @@ def test_get_with_library_large_number_of_values(self):
         )
         assert test_obj.property_names == {"categorical", "string", "int", "float"}
         res = test_obj.get(properties=["categorical", "string", "int", "float"])
-        assert not is_categorical_dtype(res["categorical"])
+        assert not isinstance(res["categorical"].dtype, pd.CategoricalDtype)
         assert res["categorical"].tolist() == ["A", "A", "B", "A"]
         assert res["string"].tolist() == ["AA", "BB", "CC", "DD"]
         assert res["int"].tolist() == [0, 0, 1, 0]

diff --git a/tests/test_schemas.py b/tests/test_schemas.py
@@ -432,6 +432,13 @@ def test_wrong_datatype(field):
         assert f"incorrect datatype 'int16' for '{field}'" in errors[0].message
 
 
+def test__get_reference_resolver():
+    expected = {"const": "test_value"}
+    schema = {"$mock_reference": expected}
+    resolver = test_module._get_reference_resolver(schema)
+    assert resolver.lookup("#/$mock_reference").contents == expected
+
+
 def test_nodes_schema_types():
     property_types, dynamics_params = test_module.nodes_schema_types("biophysical")
     assert "x" in property_types