diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index 02a32be2..16ec5cd1 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -185,6 +185,10 @@ def analyze_lines_for_guns( num_guns = unique_guns_in_line.shape[0] unique_guns_per_line[str(line_val)] = list(unique_guns_in_line) + # Skip geometry detection if we already know it's Type A + if geom_type == ShotGunGeometryType.A: + continue + for gun in unique_guns_in_line: gun_mask = gun_current == gun shots_for_gun = shot_current[gun_mask] @@ -194,7 +198,7 @@ def analyze_lines_for_guns( msg = "%s %s has %s shots; div by %s guns gives %s unique mod shots." logger.info(msg, line_field, line_val, num_shots, num_guns, len(np.unique(mod_shots))) geom_type = ShotGunGeometryType.A - return unique_lines, unique_guns_per_line, geom_type + break # No need to check more guns for this line return unique_lines, unique_guns_per_line, geom_type @@ -611,18 +615,27 @@ def transform( logger.info("%s: %s has guns: %s", line_field, line_val, guns) max_num_guns = max(len(guns), max_num_guns) - # Only calculate shot_index when shot points are interleaved across guns (Type B) - if geom_type == ShotGunGeometryType.B: - shot_index = np.empty(len(index_headers), dtype="uint32") - # Use .base if available (view of another array), otherwise use the array directly - base_array = index_headers.base if index_headers.base is not None else index_headers - index_headers = rfn.append_fields(base_array, "shot_index", shot_index) + # Always calculate shot_index - the OBN template requires it + shot_index = np.empty(len(index_headers), dtype="uint32") + # Use .base if available (view of another array), otherwise use the array directly + base_array = index_headers.base if index_headers.base is not None else index_headers + index_headers = rfn.append_fields(base_array, "shot_index", shot_index) + if geom_type == ShotGunGeometryType.B: + # Type B: shot points are interleaved across guns, divide to get dense index for line_val in unique_lines: line_idxs = np.where(index_headers[line_field][:] == line_val) index_headers["shot_index"][line_idxs] = np.floor(index_headers["shot_point"][line_idxs] / max_num_guns) # Make shot index zero-based PER line index_headers["shot_index"][line_idxs] -= index_headers["shot_index"][line_idxs].min() + else: + # Type A: shot points are already unique per gun, create 0-based index from unique values + for line_val in unique_lines: + line_idxs = np.where(index_headers[line_field][:] == line_val) + shot_points = index_headers["shot_point"][line_idxs] + # np.unique returns sorted values; searchsorted maps each shot_point to its 0-based index + unique_shots = np.unique(shot_points) + index_headers["shot_index"][line_idxs] = np.searchsorted(unique_shots, shot_points) return index_headers diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 74cba0c8..5d14fc0d 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -342,3 +342,76 @@ def segy_mock_obn_no_component(fake_segy_tmp: Path) -> Path: components=None, # No component header filename_suffix="no_component", ) + + +@pytest.fixture(scope="module") +def segy_mock_obn_multiline_type_a(fake_segy_tmp: Path) -> Path: + """Generate mock OBN SEG-Y file with multiple shot lines and Type A geometry. + + This fixture tests the scenario where: + - Multiple shot lines exist (3 lines) + - Shot points are NOT interleaved across guns (Type A geometry) + - Each gun has the same dense shot point values + + This specifically tests the fix for the bug where an early return in + analyze_lines_for_guns() would skip populating unique_guns_per_line for + lines after the first Type A detection, causing KeyError when later + code tried to access those lines. + """ + num_samples = 25 + receivers = [101, 102] + shot_lines = [1, 2, 3] # Multiple lines to test all are processed + guns = [1, 2] + components = [1] # Single component for simplicity + + # Non-interleaved (Type A): both guns have the same shot point values + # This triggers Type A detection because floor(shot_point / num_guns) + # produces duplicates when shot points are dense per gun + shot_points_per_gun = { + 1: [1, 2, 3], # gun 1: dense shot points + 2: [1, 2, 3], # gun 2: same dense shot points (Type A) + } + + return create_segy_mock_obn( + fake_segy_tmp, + num_samples=num_samples, + receivers=receivers, + shot_lines=shot_lines, + guns=guns, + shot_points_per_gun=shot_points_per_gun, + components=components, + filename_suffix="multiline_type_a", + ) + + +@pytest.fixture(scope="module") +def segy_mock_obn_multiline_type_a_sparse(fake_segy_tmp: Path) -> Path: + """Generate mock OBN SEG-Y file with Type A geometry and sparse shot points. + + Variant of segy_mock_obn_multiline_type_a using non-contiguous shot point + values per gun. Exercises the vectorized Type A shot_index mapping + (np.searchsorted over np.unique) on sparse values to ensure indices are + assigned positionally within the sorted unique set, not by value. + """ + num_samples = 25 + receivers = [101, 102] + shot_lines = [1, 2] + guns = [1, 2] + components = [1] + + # Sparse, non-contiguous shot points; same set per gun keeps geometry Type A + shot_points_per_gun = { + 1: [10, 50, 100], + 2: [10, 50, 100], + } + + return create_segy_mock_obn( + fake_segy_tmp, + num_samples=num_samples, + receivers=receivers, + shot_lines=shot_lines, + guns=guns, + shot_points_per_gun=shot_points_per_gun, + components=components, + filename_suffix="multiline_type_a_sparse", + ) diff --git a/tests/integration/test_import_obn_grid_overrides.py b/tests/integration/test_import_obn_grid_overrides.py index d65c7ef4..d1f4a6c1 100644 --- a/tests/integration/test_import_obn_grid_overrides.py +++ b/tests/integration/test_import_obn_grid_overrides.py @@ -6,6 +6,7 @@ from typing import TYPE_CHECKING import dask +import numpy as np import pytest import xarray.testing as xrt from tests.integration.conftest import get_segy_mock_obn_spec @@ -158,3 +159,104 @@ def test_import_obn_without_calculate_shot_index_raises( error_message = str(exc_info.value) assert "shot_index" in error_message assert "ObnReceiverGathers3D" in error_message + + +class TestImportObnMultilineTypeA: + """Test OBN SEG-Y import with multiple shot lines and Type A geometry. + + This test class verifies the fix for a bug where analyze_lines_for_guns() + would return early upon detecting Type A geometry, leaving the + unique_guns_per_line dictionary incomplete. This caused KeyError when + CalculateShotIndex.transform() tried to access shot lines that weren't + in the dictionary. + + Regression test for: KeyError when ingesting OBN data with multiple shot + lines where Type A geometry is detected on an earlier line. + """ + + def test_import_obn_multiline_type_a_all_lines_processed( + self, + segy_mock_obn_multiline_type_a: Path, + zarr_tmp: Path, + ) -> None: + """Test that all shot lines are processed with Type A geometry. + + This test verifies that: + 1. CalculateShotIndex works with Type A geometry (non-interleaved shots) + 2. All shot lines are included in the output, not just the first one + 3. shot_index is correctly calculated for Type A (0-based from unique values) + """ + segy_spec = get_segy_mock_obn_spec(include_component=True) + grid_override = {"CalculateShotIndex": True} + + segy_to_mdio( + segy_spec=segy_spec, + mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"), + input_path=segy_mock_obn_multiline_type_a, + output_path=zarr_tmp, + overwrite=True, + grid_overrides=grid_override, + ) + + ds = open_mdio(zarr_tmp) + + # Verify ALL shot lines are present (the bug would cause lines to be missing) + expected_shot_lines = [1, 2, 3] + xrt.assert_duckarray_equal(ds["shot_line"], expected_shot_lines) + + # Verify guns are present + expected_guns = [1, 2] + xrt.assert_duckarray_equal(ds["gun"], expected_guns) + + # Verify shot_index is calculated correctly for Type A geometry + # Type A: shot points [1, 2, 3] are already unique per gun + # shot_index should be 0-based indices: [0, 1, 2] + expected_shot_index = [0, 1, 2] + xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index) + + # Verify other dimensions + expected_receivers = [101, 102] + xrt.assert_duckarray_equal(ds["receiver"], expected_receivers) + + expected_components = [1] + xrt.assert_duckarray_equal(ds["component"], expected_components) + + # Verify shot_point is preserved as a coordinate + assert "shot_point" in ds.coords + assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index") + + def test_import_obn_multiline_type_a_sparse_shot_points( + self, + segy_mock_obn_multiline_type_a_sparse: Path, + zarr_tmp: Path, + ) -> None: + """Test Type A shot_index mapping with sparse, non-contiguous shot points. + + Guards the vectorized Type A path (np.searchsorted over np.unique) by + using shot points that are not 0/1-based or contiguous. shot_index must + be a dense 0-based sequence over the sorted unique shot points, and the + original shot_point values must be preserved as a coordinate. + """ + segy_spec = get_segy_mock_obn_spec(include_component=True) + grid_override = {"CalculateShotIndex": True} + + segy_to_mdio( + segy_spec=segy_spec, + mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"), + input_path=segy_mock_obn_multiline_type_a_sparse, + output_path=zarr_tmp, + overwrite=True, + grid_overrides=grid_override, + ) + + ds = open_mdio(zarr_tmp) + + # Sparse shot points [10, 50, 100] map to dense 0-based indices [0, 1, 2] + expected_shot_index = [0, 1, 2] + xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index) + + # Original shot_point values preserved as coordinate, not remapped + assert "shot_point" in ds.coords + assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index") + unique_shot_points = np.unique(ds["shot_point"].values) + xrt.assert_duckarray_equal(unique_shot_points, [10, 50, 100])