Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions src/mdio/segy/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def analyze_lines_for_guns(
num_guns = unique_guns_in_line.shape[0]
unique_guns_per_line[str(line_val)] = list(unique_guns_in_line)

# Skip geometry detection if we already know it's Type A
if geom_type == ShotGunGeometryType.A:
continue

for gun in unique_guns_in_line:
gun_mask = gun_current == gun
shots_for_gun = shot_current[gun_mask]
Expand All @@ -194,7 +198,7 @@ def analyze_lines_for_guns(
msg = "%s %s has %s shots; div by %s guns gives %s unique mod shots."
logger.info(msg, line_field, line_val, num_shots, num_guns, len(np.unique(mod_shots)))
geom_type = ShotGunGeometryType.A
return unique_lines, unique_guns_per_line, geom_type
break # No need to check more guns for this line

return unique_lines, unique_guns_per_line, geom_type

Expand Down Expand Up @@ -611,18 +615,27 @@ def transform(
logger.info("%s: %s has guns: %s", line_field, line_val, guns)
max_num_guns = max(len(guns), max_num_guns)

# Only calculate shot_index when shot points are interleaved across guns (Type B)
if geom_type == ShotGunGeometryType.B:
shot_index = np.empty(len(index_headers), dtype="uint32")
# Use .base if available (view of another array), otherwise use the array directly
base_array = index_headers.base if index_headers.base is not None else index_headers
index_headers = rfn.append_fields(base_array, "shot_index", shot_index)
# Always calculate shot_index - the OBN template requires it
shot_index = np.empty(len(index_headers), dtype="uint32")
# Use .base if available (view of another array), otherwise use the array directly
base_array = index_headers.base if index_headers.base is not None else index_headers
index_headers = rfn.append_fields(base_array, "shot_index", shot_index)

if geom_type == ShotGunGeometryType.B:
# Type B: shot points are interleaved across guns, divide to get dense index
for line_val in unique_lines:
line_idxs = np.where(index_headers[line_field][:] == line_val)
index_headers["shot_index"][line_idxs] = np.floor(index_headers["shot_point"][line_idxs] / max_num_guns)
# Make shot index zero-based PER line
index_headers["shot_index"][line_idxs] -= index_headers["shot_index"][line_idxs].min()
else:
# Type A: shot points are already unique per gun, create 0-based index from unique values
for line_val in unique_lines:
line_idxs = np.where(index_headers[line_field][:] == line_val)
shot_points = index_headers["shot_point"][line_idxs]
# np.unique returns sorted values; searchsorted maps each shot_point to its 0-based index
unique_shots = np.unique(shot_points)
index_headers["shot_index"][line_idxs] = np.searchsorted(unique_shots, shot_points)

return index_headers

Expand Down
73 changes: 73 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,3 +342,76 @@ def segy_mock_obn_no_component(fake_segy_tmp: Path) -> Path:
components=None, # No component header
filename_suffix="no_component",
)


@pytest.fixture(scope="module")
def segy_mock_obn_multiline_type_a(fake_segy_tmp: Path) -> Path:
"""Generate mock OBN SEG-Y file with multiple shot lines and Type A geometry.

This fixture tests the scenario where:
- Multiple shot lines exist (3 lines)
- Shot points are NOT interleaved across guns (Type A geometry)
- Each gun has the same dense shot point values

This specifically tests the fix for the bug where an early return in
analyze_lines_for_guns() would skip populating unique_guns_per_line for
lines after the first Type A detection, causing KeyError when later
code tried to access those lines.
"""
num_samples = 25
receivers = [101, 102]
shot_lines = [1, 2, 3] # Multiple lines to test all are processed
guns = [1, 2]
components = [1] # Single component for simplicity

# Non-interleaved (Type A): both guns have the same shot point values
# This triggers Type A detection because floor(shot_point / num_guns)
# produces duplicates when shot points are dense per gun
shot_points_per_gun = {
1: [1, 2, 3], # gun 1: dense shot points
2: [1, 2, 3], # gun 2: same dense shot points (Type A)
}

return create_segy_mock_obn(
fake_segy_tmp,
num_samples=num_samples,
receivers=receivers,
shot_lines=shot_lines,
guns=guns,
shot_points_per_gun=shot_points_per_gun,
components=components,
filename_suffix="multiline_type_a",
)


@pytest.fixture(scope="module")
def segy_mock_obn_multiline_type_a_sparse(fake_segy_tmp: Path) -> Path:
"""Generate mock OBN SEG-Y file with Type A geometry and sparse shot points.

Variant of segy_mock_obn_multiline_type_a using non-contiguous shot point
values per gun. Exercises the vectorized Type A shot_index mapping
(np.searchsorted over np.unique) on sparse values to ensure indices are
assigned positionally within the sorted unique set, not by value.
"""
num_samples = 25
receivers = [101, 102]
shot_lines = [1, 2]
guns = [1, 2]
components = [1]

# Sparse, non-contiguous shot points; same set per gun keeps geometry Type A
shot_points_per_gun = {
1: [10, 50, 100],
2: [10, 50, 100],
}

return create_segy_mock_obn(
fake_segy_tmp,
num_samples=num_samples,
receivers=receivers,
shot_lines=shot_lines,
guns=guns,
shot_points_per_gun=shot_points_per_gun,
components=components,
filename_suffix="multiline_type_a_sparse",
)
102 changes: 102 additions & 0 deletions tests/integration/test_import_obn_grid_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import TYPE_CHECKING

import dask
import numpy as np
import pytest
import xarray.testing as xrt
from tests.integration.conftest import get_segy_mock_obn_spec
Expand Down Expand Up @@ -158,3 +159,104 @@ def test_import_obn_without_calculate_shot_index_raises(
error_message = str(exc_info.value)
assert "shot_index" in error_message
assert "ObnReceiverGathers3D" in error_message


class TestImportObnMultilineTypeA:
"""Test OBN SEG-Y import with multiple shot lines and Type A geometry.

This test class verifies the fix for a bug where analyze_lines_for_guns()
would return early upon detecting Type A geometry, leaving the
unique_guns_per_line dictionary incomplete. This caused KeyError when
CalculateShotIndex.transform() tried to access shot lines that weren't
in the dictionary.

Regression test for: KeyError when ingesting OBN data with multiple shot
lines where Type A geometry is detected on an earlier line.
"""

def test_import_obn_multiline_type_a_all_lines_processed(
self,
segy_mock_obn_multiline_type_a: Path,
zarr_tmp: Path,
) -> None:
"""Test that all shot lines are processed with Type A geometry.

This test verifies that:
1. CalculateShotIndex works with Type A geometry (non-interleaved shots)
2. All shot lines are included in the output, not just the first one
3. shot_index is correctly calculated for Type A (0-based from unique values)
"""
segy_spec = get_segy_mock_obn_spec(include_component=True)
grid_override = {"CalculateShotIndex": True}

segy_to_mdio(
segy_spec=segy_spec,
mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
input_path=segy_mock_obn_multiline_type_a,
output_path=zarr_tmp,
overwrite=True,
grid_overrides=grid_override,
)

ds = open_mdio(zarr_tmp)

# Verify ALL shot lines are present (the bug would cause lines to be missing)
expected_shot_lines = [1, 2, 3]
xrt.assert_duckarray_equal(ds["shot_line"], expected_shot_lines)

# Verify guns are present
expected_guns = [1, 2]
xrt.assert_duckarray_equal(ds["gun"], expected_guns)

# Verify shot_index is calculated correctly for Type A geometry
# Type A: shot points [1, 2, 3] are already unique per gun
# shot_index should be 0-based indices: [0, 1, 2]
expected_shot_index = [0, 1, 2]
xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index)

# Verify other dimensions
expected_receivers = [101, 102]
xrt.assert_duckarray_equal(ds["receiver"], expected_receivers)

expected_components = [1]
xrt.assert_duckarray_equal(ds["component"], expected_components)

# Verify shot_point is preserved as a coordinate
assert "shot_point" in ds.coords
assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index")

def test_import_obn_multiline_type_a_sparse_shot_points(
self,
segy_mock_obn_multiline_type_a_sparse: Path,
zarr_tmp: Path,
) -> None:
"""Test Type A shot_index mapping with sparse, non-contiguous shot points.

Guards the vectorized Type A path (np.searchsorted over np.unique) by
using shot points that are not 0/1-based or contiguous. shot_index must
be a dense 0-based sequence over the sorted unique shot points, and the
original shot_point values must be preserved as a coordinate.
"""
segy_spec = get_segy_mock_obn_spec(include_component=True)
grid_override = {"CalculateShotIndex": True}

segy_to_mdio(
segy_spec=segy_spec,
mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
input_path=segy_mock_obn_multiline_type_a_sparse,
output_path=zarr_tmp,
overwrite=True,
grid_overrides=grid_override,
)

ds = open_mdio(zarr_tmp)

# Sparse shot points [10, 50, 100] map to dense 0-based indices [0, 1, 2]
expected_shot_index = [0, 1, 2]
xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index)

# Original shot_point values preserved as coordinate, not remapped
assert "shot_point" in ds.coords
assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index")
unique_shot_points = np.unique(ds["shot_point"].values)
xrt.assert_duckarray_equal(unique_shot_points, [10, 50, 100])
Loading