diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fda1210..9809afe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [v0.5.9] + +### Fix +- Fix AnnData reading over HTTP when directory listing is disabled: skip optional Zarr groups (`uns`, `obsm`, `varm`, etc.) that cannot be discovered without listing. + ## [v0.5.8] ### Fix diff --git a/pixi.lock b/pixi.lock index d0d55be5..c401da1f 100644 --- a/pixi.lock +++ b/pixi.lock @@ -7961,7 +7961,7 @@ packages: requires_python: '>=3.11,!=3.14.1' - pypi: ./ name: ngio - version: 0.5.8.dev31+g56ae64be3 + version: 0.5.9.dev1+gf371c0950 sha256: 312a795bde3799d89350f99cf0796c741385c7a0dd865afebd40327c9ce495c1 requires_dist: - aiohttp diff --git a/src/ngio/tables/backends/_anndata_utils.py b/src/ngio/tables/backends/_anndata_utils.py index 997dda4e..1ac3e6b9 100644 --- a/src/ngio/tables/backends/_anndata_utils.py +++ b/src/ngio/tables/backends/_anndata_utils.py @@ -15,6 +15,7 @@ StoreOrGroup, open_group_wrapper, ) +from ngio.utils._zarr_utils import is_group_listable if TYPE_CHECKING: from collections.abc import Callable, Sequence @@ -47,6 +48,11 @@ def custom_anndata_read_zarr( "layers", ] + if not is_group_listable(group): + # If not listable we filter some elements + non_listable_elems = ["uns", "obsm", "varm", "obsp", "varp", "layers"] + elem_to_read = [elem for elem in elem_to_read if elem not in non_listable_elems] + # Read with handling for backwards compat def callback(func: Callable, elem_name: str, elem: Any, iospec: Any) -> Any: if iospec.encoding_type == "anndata" or elem_name.endswith("/"): diff --git a/tests/stores/conftest.py b/tests/stores/conftest.py index 362aaeac..076196b9 100644 --- a/tests/stores/conftest.py +++ b/tests/stores/conftest.py @@ -3,13 +3,23 @@ import socket import subprocess import sys +import threading import time -from pathlib import Path +from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer import boto3 import pytest +class _NoListingHTTPHandler(SimpleHTTPRequestHandler): + def list_directory(self, path): + self.send_error(403, "Directory listing not allowed") + return None + + def log_message(self, format, *args): + pass + + def _running_on_github_ci() -> bool: return os.getenv("GITHUB_ACTIONS") == "true" or os.getenv("CI") == "true" @@ -111,50 +121,24 @@ def _find_free_port(host="127.0.0.1"): @pytest.fixture(scope="session") def http_static_server(tmp_path_factory): """ - Serve a temporary directory via `python -m http.server`. + Serve a temporary directory via a non-listable HTTP server. - From the test code's perspective this is read-only: you write files - directly into `root` on disk, and then access them via HTTP. + Directory listing is disabled (403) to match production HTTP stores that + do not support listing. Individual file GETs work normally. """ + root = tmp_path_factory.mktemp("http_static_root") host = "127.0.0.1" port = _find_free_port(host) - root = tmp_path_factory.mktemp("http_static_root") - - cmd = [ - sys.executable, - "-m", - "http.server", - str(port), - "--bind", - host, - ] - - env = os.environ.copy() - env["PYTHONUNBUFFERED"] = "1" - - proc = subprocess.Popen( - cmd, - cwd=str(root), # serve this directory - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - env=env, + server = ThreadingHTTPServer( + (host, port), + lambda *a, **kw: _NoListingHTTPHandler(*a, directory=str(root), **kw), ) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() - try: - _wait_for_port(proc, host, port, timeout=10) - except Exception as e: - proc.terminate() - try: - proc.wait(timeout=5) - except subprocess.TimeoutExpired: - proc.kill() - raise RuntimeError(f"Failed to start http server: {e}") from e - - yield {"url": f"http://{host}:{port}", "root": Path(root)} + yield {"url": f"http://{host}:{port}", "root": root} - proc.terminate() - try: - proc.wait(timeout=5) - except subprocess.TimeoutExpired: - proc.kill() + server.shutdown() + server.server_close() + thread.join(timeout=5) diff --git a/tests/stores/test_http_store.py b/tests/stores/test_http_store.py index 79f0a2fe..9a041acf 100644 --- a/tests/stores/test_http_store.py +++ b/tests/stores/test_http_store.py @@ -1,5 +1,6 @@ from pathlib import Path +import pytest from utils import ( check_ome_zarr, create_sample_ome_zarr, @@ -10,6 +11,7 @@ ) from ngio import open_ome_zarr_container +from ngio.utils import NgioValueError HTTP_STORE_SUPPORTED_BACKENDS = ["anndata", "json", "csv", "parquet"] @@ -44,8 +46,8 @@ def test_http_store_derive_to_s3_store( bucket_name=moto_s3_server["bucket_name"], zarr_path=random_zarr_path(), ) - derived_ome_zarr = derive_image(ome_zarr, other_store=other_store) - check_ome_zarr(derived_ome_zarr, supported_backends=HTTP_STORE_SUPPORTED_BACKENDS) + with pytest.raises(NgioValueError, match="not listable"): + derive_image(ome_zarr, other_store=other_store) def test_http_store_derive_to_local_store( @@ -61,8 +63,8 @@ def test_http_store_derive_to_local_store( ome_zarr = open_ome_zarr_container(store=http_mapper) other_store = tmp_path / "http_local_store_test" / random_zarr_path() - derived_ome_zarr = derive_image(ome_zarr, other_store=other_store) - check_ome_zarr(derived_ome_zarr, supported_backends=HTTP_STORE_SUPPORTED_BACKENDS) + with pytest.raises(NgioValueError, match="not listable"): + derive_image(ome_zarr, other_store=other_store) def test_http_store_derive_to_memory_store(http_static_server: dict) -> None: @@ -76,5 +78,5 @@ def test_http_store_derive_to_memory_store(http_static_server: dict) -> None: ome_zarr = open_ome_zarr_container(store=http_mapper) other_store = {} - derived_ome_zarr = derive_image(ome_zarr, other_store=other_store) - check_ome_zarr(derived_ome_zarr, supported_backends=["anndata", "json"]) + with pytest.raises(NgioValueError, match="not listable"): + derive_image(ome_zarr, other_store=other_store)