Skip to content

Commit

Permalink
Reducing memory usage for fields from @library (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomdele committed Jul 8, 2020
1 parent 35d6458 commit 29a957c
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
10 changes: 8 additions & 2 deletions bluepysnap/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,14 @@ def load_population_data(self, population):
_all = libsonata.Selection([(0, node_count)])
for attr in sorted(nodes.attribute_names):
if attr in categoricals:
result[attr] = pd.Categorical.from_codes(nodes.get_enumeration(attr, _all),
categories=nodes.enumeration_values(attr))
enumeration = np.asarray(nodes.get_enumeration(attr, _all))
values = np.asarray(nodes.enumeration_values(attr))
# if the size of `values` is large enough compared to `enumeration`, not using
# categorical reduces the memory usage.
if values.shape[0] < 0.5 * enumeration.shape[0]:
result[attr] = pd.Categorical.from_codes(enumeration, categories=values)
else:
result[attr] = values[enumeration]
else:
result[attr] = nodes.get_attribute(attr, _all)
for attr in sorted(utils.add_dynamic_prefix(nodes.dynamics_attribute_names)):
Expand Down
File renamed without changes.
Binary file added tests/data/nodes_with_library_small.h5
Binary file not shown.
22 changes: 17 additions & 5 deletions tests/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_property_values(self):
{'morph-A', 'morph-B', 'morph-C'}
)
test_obj_library = create_node_population(
str(TEST_DATA_DIR / 'nodes_with_library.h5'),
str(TEST_DATA_DIR / 'nodes_with_library_small.h5'),
"default")
assert test_obj_library.property_values("categorical") == {"A", "B", "C"}
assert test_obj_library.property_values("categorical", is_present=True) == {"A", "B"}
Expand Down Expand Up @@ -317,16 +317,28 @@ def test_get(self):
with pytest.raises(BluepySnapError):
_call([0, 999]) # one of node ids is invalid

def test_get_with_library(self):
def test_get_with_library_small_number_of_values(self):
test_obj = create_node_population(
str(TEST_DATA_DIR / 'nodes_with_library.h5'),
str(TEST_DATA_DIR / 'nodes_with_library_small.h5'),
"default")
assert test_obj.property_names == {"categorical", "string", "int", "float"}
res = test_obj.get(properties=["categorical", "string", "int", "float"])
assert is_categorical(res["categorical"])
assert res["categorical"].tolist() == ['A', 'A', 'B', 'A']
assert res["categorical"].tolist() == ['A', 'A', 'B', 'A', 'A', 'A', 'A']
assert res["categorical"].cat.categories.tolist() == ['A', 'B', 'C']
assert res["categorical"].cat.codes.tolist() == [0, 0, 1, 0]
assert res["categorical"].cat.codes.tolist() == [0, 0, 1, 0, 0, 0, 0]
assert res["string"].tolist() == ["AA", "BB", "CC", "DD", "EE", "FF", "GG"]
assert res["int"].tolist() == [0, 0, 1, 0, 0, 0, 0]
npt.assert_allclose(res["float"].tolist(), [0., 0., 1.1, 0., 0., 0., 0.])

def test_get_with_library_large_number_of_values(self):
test_obj = create_node_population(
str(TEST_DATA_DIR / 'nodes_with_library_large.h5'),
"default")
assert test_obj.property_names == {"categorical", "string", "int", "float"}
res = test_obj.get(properties=["categorical", "string", "int", "float"])
assert not is_categorical(res["categorical"])
assert res["categorical"].tolist() == ['A', 'A', 'B', 'A']
assert res["string"].tolist() == ["AA", "BB", "CC", "DD"]
assert res["int"].tolist() == [0, 0, 1, 0]
npt.assert_allclose(res["float"].tolist(), [0., 0., 1.1, 0.])
Expand Down

0 comments on commit 29a957c

Please sign in to comment.