Skip to content

Commit

Permalink
Use RangeIndex in nodes dataframe and improve _check_id (#207)
Browse files Browse the repository at this point in the history
If the index is a regular index, when accessing df.loc[x] for the first time (when x is an array, and not an integer), Pandas calls the cached property Index.is_unique, that can take a few seconds with big dataframes:

Using a RangeIndex solves the issue, because is_unique is always True.
  • Loading branch information
GianlucaFicarelli committed May 9, 2023
1 parent 95e6006 commit c2d11e4
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions bluepysnap/nodes/node_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _data(self):
categoricals = nodes.enumeration_names

_all = nodes.select_all()
result = pd.DataFrame(index=np.arange(_all.flat_size))
result = pd.DataFrame(index=pd.RangeIndex(_all.flat_size))

for attr in sorted(nodes.attribute_names):
if attr in categoricals:
Expand Down Expand Up @@ -240,7 +240,7 @@ def property_dtypes(self):

def _check_id(self, node_id):
"""Check that single node ID belongs to the circuit."""
if node_id not in self._data.index:
if node_id < 0 or node_id >= len(self._data.index):
raise BluepySnapError(f"node ID not found: {node_id} in population '{self.name}'")

def _check_ids(self, node_ids):
Expand All @@ -254,9 +254,9 @@ def _check_ids(self, node_ids):
else:
max_id = max(node_ids)
min_id = min(node_ids)
if min_id < 0 or max_id >= self._data.index.shape[0]:
if min_id < 0 or max_id >= len(self._data.index):
raise BluepySnapError(
f"All node IDs must be >= 0 and < {self._data.index.shape[0]} "
f"All node IDs must be >= 0 and < {len(self._data.index)} "
f"for population '{self.name}'"
)

Expand Down

0 comments on commit c2d11e4

Please sign in to comment.