Skip to content

Commit

Permalink
Ensure the dtype for the ids (#121)
Browse files Browse the repository at this point in the history
Ensure the dtype for the ids due to numpy/numpy#15084 issue.
This commit adds:
- a IDS_DTYPE to contain the ids dtype
- a ensure_ids function used where ids are returned
  • Loading branch information
tomdele committed Jan 28, 2021
1 parent 275b997 commit 601aacc
Show file tree
Hide file tree
Showing 14 changed files with 109 additions and 53 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog
=========

Version v0.9.1
---------------

Bug Fixes
~~~~~~~~~
- Ensure the dtypes as int64 for the node/edge ids (#121).


Version v0.9.0
---------------
Expand Down
10 changes: 5 additions & 5 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import numpy as np
import pandas as pd

import bluepysnap.utils
from bluepysnap import utils
from bluepysnap.exceptions import BluepySnapError
from bluepysnap._doctools import AbstractDocSubstitutionMeta

Expand Down Expand Up @@ -76,7 +76,7 @@ def from_arrays(cls, populations, population_ids, sort_index=True):
provided.
"""
populations = np.asarray(populations)
population_ids = np.asarray(population_ids)
population_ids = utils.ensure_ids(population_ids)

if len(populations) != len(population_ids):
raise BluepySnapError("populations and population_ids must have the same size. "
Expand Down Expand Up @@ -104,9 +104,9 @@ def from_dict(cls, ids_dictionary):
>>> CircuitIds.from_dict({"pop1": [0,2,4], "pop2": [1,2,5]})
"""
populations = np.empty((0,), dtype=str)
population_ids = np.empty((0,), dtype=np.int64)
population_ids = np.empty((0,), dtype=utils.IDS_DTYPE)
for population, ids in ids_dictionary.items():
ids = np.asarray(ids)
ids = utils.ensure_ids(ids)
population_ids = np.append(population_ids, ids)
populations = np.append(populations, np.full(ids.shape, fill_value=population))
index = pd.MultiIndex.from_arrays([populations, population_ids])
Expand Down Expand Up @@ -141,7 +141,7 @@ def _locate(self, population):
numpy.array: indices corresponding to the population.
"""
try:
return self.index.get_locs(bluepysnap.utils.ensure_list(population))
return self.index.get_locs(utils.ensure_list(population))
except KeyError:
return []

Expand Down
14 changes: 7 additions & 7 deletions bluepysnap/edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,9 +441,9 @@ def container_property_names(self, container):

def _get_property(self, prop, selection):
if prop == Edge.SOURCE_NODE_ID:
result = self._population.source_nodes(selection)
result = utils.ensure_ids(self._population.source_nodes(selection))
elif prop == Edge.TARGET_NODE_ID:
result = self._population.target_nodes(selection)
result = utils.ensure_ids(self._population.target_nodes(selection))
elif prop in self._attribute_names:
result = self._population.get_attribute(prop, selection)
elif prop in self._dynamics_params_names:
Expand All @@ -455,7 +455,7 @@ def _get_property(self, prop, selection):

def _get(self, selection, properties=None):
"""Get an array of edge IDs or DataFrame with edge properties."""
edge_ids = np.asarray(selection.flatten(), dtype=np.int64)
edge_ids = utils.ensure_ids(selection.flatten())
if properties is None:
Deprecate.warn("Returning ids with get/properties will be removed in 1.0.0."
"Please use EdgePopulation.ids() instead.")
Expand Down Expand Up @@ -522,7 +522,7 @@ def ids(self, group=None, limit=None, sample=None):
result = np.random.choice(result, min(sample, len(result)), replace=False)
if limit is not None:
result = result[:limit]
return np.asarray(result)
return utils.ensure_ids(result)

def get(self, edge_ids, properties):
"""Edge properties as pandas DataFrame.
Expand Down Expand Up @@ -597,7 +597,7 @@ def afferent_nodes(self, target, unique=True):
result = self._population.source_nodes(selection)
if unique:
result = np.unique(result)
return result
return utils.ensure_ids(result)

def efferent_nodes(self, source, unique=True):
"""Get efferent node IDs for given source ``node_id``.
Expand All @@ -622,7 +622,7 @@ def efferent_nodes(self, source, unique=True):
result = self._population.target_nodes(selection)
if unique:
result = np.unique(result)
return result
return utils.ensure_ids(result)

def pathway_edges(self, source=None, target=None, properties=None):
"""Get edges corresponding to ``source`` -> ``target`` connections.
Expand Down Expand Up @@ -652,7 +652,7 @@ def pathway_edges(self, source=None, target=None, properties=None):

if properties:
return self._get(selection, properties)
return np.asarray(selection.flatten(), np.int64)
return utils.ensure_ids(selection.flatten())

def afferent_edges(self, node_id, properties=None):
"""Get afferent edges for given ``node_id``.
Expand Down
4 changes: 2 additions & 2 deletions bluepysnap/frame_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import bluepysnap._plotting
from bluepysnap.exceptions import BluepySnapError
from bluepysnap.utils import ensure_list
from bluepysnap.utils import ensure_list, ensure_ids

L = logging.getLogger(__name__)

Expand Down Expand Up @@ -121,7 +121,7 @@ def node_ids(self):
Returns:
np.Array: Numpy array containing the node_ids included in the report
"""
return np.sort(np.asarray(self._frame_population.get_node_ids(), dtype=np.int64))
return np.sort(ensure_ids(self._frame_population.get_node_ids()))


class FilteredFrameReport:
Expand Down
2 changes: 1 addition & 1 deletion bluepysnap/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def _get_ids_from_pop(self, fun_to_apply, returned_ids_cls, sample=None, limit=N
pops = np.full_like(pop_ids, fill_value=name_ids, dtype=str_type)
ids.append(pop_ids)
populations.append(pops)
ids = np.concatenate(ids).astype(np.int64)
ids = utils.ensure_ids(np.concatenate(ids))
populations = np.concatenate(populations).astype(str_type)
res = returned_ids_cls.from_arrays(populations, ids)
if sample:
Expand Down
2 changes: 1 addition & 1 deletion bluepysnap/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def ids(self, group=None, limit=None, sample=None, raise_missing_property=True):
if limit is not None:
result = result[:limit]

result = np.asarray(result, dtype=np.int64)
result = utils.ensure_ids(result)
if preserve_order:
return result
else:
Expand Down
3 changes: 2 additions & 1 deletion bluepysnap/spike_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from libsonata import SpikeReader, SonataError

from bluepysnap.exceptions import BluepySnapError
from bluepysnap.utils import IDS_DTYPE
import bluepysnap._plotting


Expand Down Expand Up @@ -110,7 +111,7 @@ def get(self, group=None, t_start=None, t_stop=None):
res = pd.DataFrame(data=res, columns=[series_name, "times"]).set_index("times")[series_name]
if self._sorted_by != "by_time":
res.sort_index(inplace=True)
return res.astype(np.int64)
return res.astype(IDS_DTYPE)

@cached_property
def node_ids(self):
Expand Down
18 changes: 18 additions & 0 deletions bluepysnap/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
from bluepysnap.circuit_ids import CircuitNodeId, CircuitEdgeId
from bluepysnap.sonata_constants import DYNAMICS_PREFIX

# dtypes for the different node and edge ids. We are using np.int64 to avoid the infamous
# https://github.com/numpy/numpy/issues/15084 numpy problem. This type needs to be used for
# all returned node or edge ids.
IDS_DTYPE = np.int64


class Deprecate:
"""Class for the deprecations in BluepySnap."""
Expand Down Expand Up @@ -61,6 +66,19 @@ def ensure_list(v):
return [v]


def ensure_ids(a):
"""Convert a numpy array dtype into IDS_DTYPE.
This function is here due to the https://github.com/numpy/numpy/issues/15084 numpy issue.
It is quite unsafe to the use uint64 for the ids due to this problem where :
numpy.uint64 + int --> float64
numpy.uint64 += int --> float64
This function needs to be used everywhere node_ids or edge_ids are returned.
"""
return np.asarray(a, IDS_DTYPE)


def add_dynamic_prefix(properties):
"""Add the dynamic prefix to a list of properties."""
return [DYNAMICS_PREFIX + name for name in list(properties)]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from bluepysnap.exceptions import BluepySnapError
import bluepysnap.circuit_ids as test_module
from bluepysnap.utils import IDS_DTYPE

from utils import setup_tempdir

Expand Down Expand Up @@ -174,8 +175,10 @@ def test_get_populations(self):
def test_get_ids(self):
tested = self.test_obj_sorted.get_ids()
npt.assert_equal(tested, [0, 1, 2, 0])
npt.assert_equal(tested.dtype, IDS_DTYPE)
tested = self.test_obj_unsorted.get_ids()
npt.assert_equal(tested, [0, 1, 0, 2])
npt.assert_equal(tested.dtype, IDS_DTYPE)

tested = self.test_obj_sorted.get_ids(unique=True)
npt.assert_equal(tested, [0, 1, 2])
Expand Down

0 comments on commit 601aacc

Please sign in to comment.