Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def concat(
col_arr[cumulative_elements : cumulative_elements + mmap.number_nonzero_values()] = mmap.col_index.data
# Fill the row array for the span of this scmmap
row_arr[cumulative_rows : cumulative_rows + mmap.number_of_rows() + 1] = (
mmap.row_index + int(cumulative_rows)
mmap.row_index + int(cumulative_elements)
).data

self._feature_index.concat(mmap._feature_index)
Expand Down
30 changes: 30 additions & 0 deletions sub-packages/bionemo-scdl/tests/bionemo/scdl/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.


import shutil
from pathlib import Path

import pytest
Expand All @@ -29,3 +30,32 @@ def test_directory() -> Path:
A Path object that is the directory with test data.
"""
return load("scdl/sample") / "scdl_data"


@pytest.fixture
def create_cellx_val_data(tmpdir) -> Path:
"""Gets the path to the directory with test data.

Returns:
A Path object that is the directory with test data.
"""
cellx_input_val_path = (
load("single_cell/testdata-20240506")
/ "cellxgene_2023-12-15_small"
/ "input_data"
/ "val"
/ "assay__10x_3_v2/"
)
file1 = (
cellx_input_val_path
/ "sex__female/development_stage__74-year-old_human_stage/self_reported_ethnicity__Asian/tissue_general__lung/dataset_id__f64e1be1-de15-4d27-8da4-82225cd4c035/sidx_40575621_2_0.h5ad"
)
file2 = (
cellx_input_val_path
/ "sex__male/development_stage__82-year-old_human_stage/self_reported_ethnicity__European/tissue_general__lung/dataset_id__f64e1be1-de15-4d27-8da4-82225cd4c035/sidx_40596188_1_0.h5ad"
)
collated_dir = tmpdir / "collated_val"
collated_dir.mkdir()
shutil.copy(file1, collated_dir)
shutil.copy(file2, collated_dir)
return collated_dir
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

import os
import tempfile
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -80,3 +81,15 @@ def test_sccollection_serialization(tmp_path, test_directory):
assert np.isclose(coll.sparsity(), 0.972753346080306, rtol=1e-6)
for fn in ["col_ptr.npy", "data.npy", "features", "metadata.json", "row_ptr.npy", "version.json"]:
assert os.path.exists(tmp_path / "flattened" / fn)


def test_sc_concat_in_flatten_cellxval(tmp_path, create_cellx_val_data):
memmap_data = tmp_path / "out"
with tempfile.TemporaryDirectory() as temp_dir:
coll = SingleCellCollection(temp_dir)
coll.load_h5ad_multi(create_cellx_val_data, max_workers=4, use_processes=False)
coll.flatten(memmap_data, destroy_on_copy=True)
data = SingleCellMemMapDataset(memmap_data)
assert np.array(data.row_index)[2] != 2 # regression test for bug
assert np.array(data.row_index)[3] != 1149 # regression test for bug
assert all(data.row_index == np.array([0, 440, 972, 2119]))