From e7a1a3dfb91e97d46969883781b338527c601766 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 10 Sep 2025 09:56:20 -0500 Subject: [PATCH 01/31] Add template for efficient storage of pre-stack data. --- .../builder/templates/seismic_prestack.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 src/mdio/builder/templates/seismic_prestack.py diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py new file mode 100644 index 000000000..2788eb2c8 --- /dev/null +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -0,0 +1,81 @@ +"""SeismicPreStackTemplate MDIO v1 dataset templates.""" + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + + +class SeismicPreStackTemplate(AbstractDatasetTemplate): + """ + Seismic pre-stack time Dataset template. + + This should be used for both 2D and 3D datasets. Common-shot or common-channel datasets + + Args: + domain: The domain of the dataset. + + Raises: + ValueError: If the domain is not 'time' or 'depth'. + """ + + def __init__(self, domain: str = "time"): + super().__init__(domain=domain) + + self._coord_dim_names = [ + "shot_line", + "gun", + "shot_point", + "cable", + "channel", + ] # Custom coordinates for shot gathers + self._dim_names = [*self._coord_dim_names, self._trace_domain] + self._coord_names = ["source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"] + self._var_chunk_shape = [1, 1, 16, 1, 32, -1] + + @property + def _name(self) -> str: + return f"PreStackGathers3D{self._trace_domain.capitalize()}" + + def _load_dataset_attributes(self) -> UserAttributes: + return UserAttributes( + attributes={ + "surveyDimensionality": "3D", + "ensembleType": "shot_point", + "processingStage": "pre-stack", + } + ) + + def _add_coordinates(self) -> None: + # Add dimension coordinates + for name in self._dim_names: + self._builder.add_coordinate( + name, + dimensions=[name], + data_type=ScalarType.INT32, + metadata_info=None, + ) + + self._builder.add_coordinate( + "source_coord_x", + dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "source_coord_y", + dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "group_coord_x", + dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "group_coord_y", + dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) From 810ad684477eb7e6a8efc66fede2f82d89a7c0d2 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 10 Sep 2025 10:00:55 -0500 Subject: [PATCH 02/31] Resolve linting issues in seismic_prestack.py --- src/mdio/builder/templates/seismic_prestack.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py index 2788eb2c8..56fa545dd 100644 --- a/src/mdio/builder/templates/seismic_prestack.py +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -6,16 +6,12 @@ class SeismicPreStackTemplate(AbstractDatasetTemplate): - """ - Seismic pre-stack time Dataset template. - + """Seismic pre-stack time Dataset template. + This should be used for both 2D and 3D datasets. Common-shot or common-channel datasets Args: domain: The domain of the dataset. - - Raises: - ValueError: If the domain is not 'time' or 'depth'. """ def __init__(self, domain: str = "time"): From a114ba4f1ca4fdaa9acce3a739ee6d1209d4ccdf Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 24 Sep 2025 11:29:32 -0500 Subject: [PATCH 03/31] Add test to cover prestack template. --- src/mdio/builder/template_registry.py | 3 + .../builder/templates/seismic_prestack.py | 68 +++--- .../v1/templates/test_seismic_prestack.py | 207 ++++++++++++++++++ .../v1/templates/test_template_registry.py | 13 +- 4 files changed, 255 insertions(+), 36 deletions(-) create mode 100644 tests/unit/v1/templates/test_seismic_prestack.py diff --git a/src/mdio/builder/template_registry.py b/src/mdio/builder/template_registry.py index 60b7f0620..e25b4b43c 100644 --- a/src/mdio/builder/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -27,6 +27,8 @@ from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate +from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecords + if TYPE_CHECKING: from mdio.builder.templates.base import AbstractDatasetTemplate @@ -135,6 +137,7 @@ def _register_default_templates(self) -> None: # Field (shot) data self.register(Seismic2DStreamerShotGathersTemplate()) self.register(Seismic3DStreamerShotGathersTemplate()) + self.register(Seismic3DStreamerFieldRecords()) def get(self, template_name: str) -> AbstractDatasetTemplate: """Get an instance of a template from the registry by its name. diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py index 56fa545dd..041785e7f 100644 --- a/src/mdio/builder/templates/seismic_prestack.py +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -1,8 +1,11 @@ """SeismicPreStackTemplate MDIO v1 dataset templates.""" -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain class SeismicPreStackTemplate(AbstractDatasetTemplate): @@ -14,8 +17,8 @@ class SeismicPreStackTemplate(AbstractDatasetTemplate): domain: The domain of the dataset. """ - def __init__(self, domain: str = "time"): - super().__init__(domain=domain) + def __init__(self, data_domain: SeismicDataDomain): + super().__init__(data_domain=data_domain) self._coord_dim_names = [ "shot_line", @@ -24,54 +27,59 @@ def __init__(self, domain: str = "time"): "cable", "channel", ] # Custom coordinates for shot gathers - self._dim_names = [*self._coord_dim_names, self._trace_domain] - self._coord_names = ["source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"] + self._dim_names = [*self._coord_dim_names, self._data_domain] + self._coord_names = [ + "energy_source_point_number", + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + ] self._var_chunk_shape = [1, 1, 16, 1, 32, -1] @property def _name(self) -> str: - return f"PreStackGathers3D{self._trace_domain.capitalize()}" + return f"PreStackGathers3D{self._data_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "3D", - "ensembleType": "shot_point", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return { + "surveyDimensionality": "3D", + "ensembleType": "shot_point", + "processingStage": "pre-stack", + } def _add_coordinates(self) -> None: # Add dimension coordinates for name in self._dim_names: - self._builder.add_coordinate( - name, - dimensions=[name], - data_type=ScalarType.INT32, - metadata_info=None, - ) + self._builder.add_coordinate(name, dimensions=(name,), data_type=ScalarType.INT32) + # Add non-dimension coordinates + self._builder.add_coordinate( + "energy_source_point_number", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.INT32, + ) self._builder.add_coordinate( "source_coord_x", - dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + dimensions=("shot_line", "gun", "shot_point"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "source_coord_y", - dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + dimensions=("shot_line", "gun", "shot_point"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "group_coord_x", - dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "group_coord_y", - dimensions=["shot_line", "gun", "shot_point", "cable", "channel"], + dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_prestack.py new file mode 100644 index 000000000..ad18ab752 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_prestack.py @@ -0,0 +1,207 @@ +"""Unit tests for SeismicPreStackTemplate.""" + +import pytest +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.templates.seismic_prestack import SeismicPreStackTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 6 dim coords + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 14 variables + assert len(dataset.variables) == 14 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + shot_line = validate_variable( + dataset, + name="shot_line", + dims=[("shot_line", 1)], + coords=["shot_line"], + dtype=ScalarType.INT32, + ) + assert shot_line.metadata is None + + gun = validate_variable( + dataset, + name="gun", + dims=[("gun", 3)], + coords=["gun"], + dtype=ScalarType.INT32, + ) + assert gun.metadata is None + + shot_point = validate_variable( + dataset, + name="shot_point", + dims=[("shot_point", 256)], + coords=["shot_point"], + dtype=ScalarType.INT32, + ) + assert shot_point.metadata is None + + cable = validate_variable( + dataset, + name="cable", + dims=[("cable", 512)], + coords=["cable"], + dtype=ScalarType.INT32, + ) + assert cable.metadata is None + + channel = validate_variable( + dataset, + name="channel", + dims=[("channel", 24)], + coords=["channel"], + dtype=ScalarType.INT32, + ) + assert channel.metadata is None + + domain_var = validate_variable( + dataset, + name=domain, + dims=[(domain, 2048)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain_var.metadata is None + + # Verify non-dimension coordinate variables + validate_variable( + dataset, + name="energy_source_point_number", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], + coords=["energy_source_point_number"], + dtype=ScalarType.INT32, + ) + + source_coord_x = validate_variable( + dataset, + name="source_coord_x", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], + coords=["source_coord_x"], + dtype=ScalarType.FLOAT64, + ) + assert source_coord_x.metadata.units_v1.length == LengthUnitEnum.METER + + source_coord_y = validate_variable( + dataset, + name="source_coord_y", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], + coords=["source_coord_y"], + dtype=ScalarType.FLOAT64, + ) + assert source_coord_y.metadata.units_v1.length == LengthUnitEnum.METER + + group_coord_x = validate_variable( + dataset, + name="group_coord_x", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["group_coord_x"], + dtype=ScalarType.FLOAT64, + ) + assert group_coord_x.metadata.units_v1.length == LengthUnitEnum.METER + + group_coord_y = validate_variable( + dataset, + name="group_coord_y", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["group_coord_y"], + dtype=ScalarType.FLOAT64, + ) + assert group_coord_y.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DPreStackShotTemplate: + """Unit tests for SeismicPreStackTemplate.""" + + def test_configuration(self) -> None: + """Unit tests for SeismicPreStackTemplate in time domain.""" + t = SeismicPreStackTemplate(data_domain="time") + + # Template attributes for prestack shot + assert t._data_domain == "time" + assert t._coord_dim_names == ["shot_line", "gun", "shot_point", "cable", "channel"] + assert t._dim_names == ["shot_line", "gun", "shot_point", "cable", "channel", "time"] + assert t._coord_names == ["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"] + assert t._var_chunk_shape == [1, 1, 16, 1, 32, -1] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == () + assert t._horizontal_coord_unit is None + + # Verify prestack shot attributes + attrs = t._load_dataset_attributes() + assert attrs == {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} + + assert t.name == "PreStackGathers3DTime" + + def test_build_dataset(self, structured_headers: StructuredType) -> None: + """Unit tests for SeismicPreStackTemplate build in time domain.""" + t = SeismicPreStackTemplate(data_domain="time") + + assert t.name == "PreStackGathers3DTime" + dataset = t.build_dataset( + "North Sea 3D Shot Time", + sizes=(1, 3, 256, 512, 24, 2048), + horizontal_coord_unit=UNITS_METER, + header_dtype=structured_headers, + ) + + assert dataset.metadata.name == "North Sea 3D Shot Time" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "shot_point" + assert dataset.metadata.attributes["processingStage"] == "pre-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic variable (prestack shot time data) + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24), ("time", 2048)], + coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 1, 16, 1, 32, -1) + assert seismic.metadata.stats_v1 is None + + +@pytest.mark.parametrize("data_domain", ["Time", "TiME"]) +def test_domain_case_handling(data_domain: str) -> None: + """Test that domain parameter handles different cases correctly.""" + template = SeismicPreStackTemplate(data_domain=data_domain) + assert template._data_domain == data_domain.lower() + assert template.name.endswith(data_domain.capitalize()) diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py index 743c17650..29cd83429 100644 --- a/tests/unit/v1/templates/test_template_registry.py +++ b/tests/unit/v1/templates/test_template_registry.py @@ -33,6 +33,7 @@ "CocaGathers3DDepth", "StreamerShotGathers2D", "StreamerShotGathers3D", + "StreamerFieldRecords3D" ] @@ -239,7 +240,7 @@ def test_list_all_templates(self) -> None: registry.register(template2) templates = registry.list_all_templates() - assert len(templates) == 16 + 2 # 16 default + 2 custom + assert len(templates) == 17 + 2 # 17 default + 2 custom assert "Template_One" in templates assert "Template_Two" in templates @@ -249,7 +250,7 @@ def test_clear_templates(self) -> None: # Default templates are always installed templates = list_templates() - assert len(templates) == 16 + assert len(templates) == 17 # Add some templates template1 = MockDatasetTemplate("Template1") @@ -258,7 +259,7 @@ def test_clear_templates(self) -> None: registry.register(template1) registry.register(template2) - assert len(registry.list_all_templates()) == 16 + 2 # 16 default + 2 custom + assert len(registry.list_all_templates()) == 17 + 2 # 17 default + 2 custom # Clear all registry.clear() @@ -391,7 +392,7 @@ def test_list_templates_global(self) -> None: register_template(template2) templates = list_templates() - assert len(templates) == 18 # 16 default + 2 custom + assert len(templates) == 19 # 17 default + 2 custom assert "template1" in templates assert "template2" in templates @@ -433,8 +434,8 @@ def register_template_worker(template_id: int) -> None: # All registrations should succeed assert len(errors) == 0 assert len(results) == 10 - # Including 8 default templates - assert len(registry.list_all_templates()) == 26 # 16 default + 10 registered + # Including default templates + assert len(registry.list_all_templates()) == 27 # 17 default + 10 registered # Check all templates are registered for i in range(10): From 65f4562cecb7fb0a9b87fbdb33cb1a63d379bf1e Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 24 Sep 2025 11:32:57 -0500 Subject: [PATCH 04/31] Linting update. --- tests/unit/v1/templates/test_seismic_prestack.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_prestack.py index ad18ab752..3f7e01b8e 100644 --- a/tests/unit/v1/templates/test_seismic_prestack.py +++ b/tests/unit/v1/templates/test_seismic_prestack.py @@ -151,7 +151,13 @@ def test_configuration(self) -> None: assert t._data_domain == "time" assert t._coord_dim_names == ["shot_line", "gun", "shot_point", "cable", "channel"] assert t._dim_names == ["shot_line", "gun", "shot_point", "cable", "channel", "time"] - assert t._coord_names == ["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"] + assert t._coord_names == [ + "energy_source_point_number", + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + ] assert t._var_chunk_shape == [1, 1, 16, 1, 32, -1] # Variables instantiated when build_dataset() is called From 0dbd334b73cf4b63b1bbb83e7c51823f47ec4ba3 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 24 Sep 2025 11:39:56 -0500 Subject: [PATCH 05/31] Fix docsting for SeismicPreStackTemplate. --- src/mdio/builder/templates/seismic_prestack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py index 041785e7f..33e313c24 100644 --- a/src/mdio/builder/templates/seismic_prestack.py +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -14,7 +14,7 @@ class SeismicPreStackTemplate(AbstractDatasetTemplate): This should be used for both 2D and 3D datasets. Common-shot or common-channel datasets Args: - domain: The domain of the dataset. + data_domain: The domain of the dataset. """ def __init__(self, data_domain: SeismicDataDomain): From 9bb161dd2fdb7526fd1a1b28bc1af047307cfa41 Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Thu, 16 Oct 2025 21:30:10 +0000 Subject: [PATCH 06/31] Adjust PreStackGathers3DTime template --- .../builder/templates/seismic_prestack.py | 37 +++++++--------- tests/integration/conftest.py | 4 +- .../test_import_streamer_grid_overrides.py | 15 ++++--- .../v1/templates/test_seismic_prestack.py | 43 +++++++++++-------- 4 files changed, 51 insertions(+), 48 deletions(-) diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py index 33e313c24..7eb23e9d4 100644 --- a/src/mdio/builder/templates/seismic_prestack.py +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -20,22 +20,17 @@ class SeismicPreStackTemplate(AbstractDatasetTemplate): def __init__(self, data_domain: SeismicDataDomain): super().__init__(data_domain=data_domain) - self._coord_dim_names = [ - "shot_line", - "gun", - "shot_point", - "cable", - "channel", - ] # Custom coordinates for shot gathers - self._dim_names = [*self._coord_dim_names, self._data_domain] - self._coord_names = [ - "energy_source_point_number", - "source_coord_x", - "source_coord_y", - "group_coord_x", - "group_coord_y", - ] - self._var_chunk_shape = [1, 1, 16, 1, 32, -1] + self._spatial_dim_names = ("shot_line", "gun", "shot_point", "cable", "channel") + self._dim_names = (*self._spatial_dim_names, self._data_domain) + self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") + self._logical_coord_names = ("orig_field_record_num",) + # TODO(Dmitriy Repin): Allow specifying full-dimension-extent chunk size in templates. + # https://github.com/TGSAI/mdio-python/issues/720 + # When implemented, the following will be requesting the chunk size of the last dimension + # to be equal to the size of the dimension. + # self._var_chunk_shape = (1, 1, 16, 1, 32, -1) + # For now, we are hardcoding the chunk size to 1024. + self._var_chunk_shape = (1, 1, 16, 1, 32, 1024) @property def _name(self) -> str: @@ -55,7 +50,7 @@ def _add_coordinates(self) -> None: # Add non-dimension coordinates self._builder.add_coordinate( - "energy_source_point_number", + "orig_field_record_num", dimensions=("shot_line", "gun", "shot_point"), data_type=ScalarType.INT32, ) @@ -63,23 +58,23 @@ def _add_coordinates(self) -> None: "source_coord_x", dimensions=("shot_line", "gun", "shot_point"), data_type=ScalarType.FLOAT64, - metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), ) self._builder.add_coordinate( "source_coord_y", dimensions=("shot_line", "gun", "shot_point"), data_type=ScalarType.FLOAT64, - metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), ) self._builder.add_coordinate( "group_coord_x", dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")), ) self._builder.add_coordinate( "group_coord_y", dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")), ) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index d91a83832..224e2898a 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -22,7 +22,7 @@ def get_segy_mock_4d_spec() -> SegySpec: """Create a mock 4D SEG-Y specification.""" trace_header_fields = [ - HeaderField(name="field_rec_no", byte=9, format="int32"), + HeaderField(name="orig_field_record_num", byte=9, format="int32"), HeaderField(name="channel", byte=13, format="int32"), HeaderField(name="shot_point", byte=17, format="int32"), HeaderField(name="offset", byte=37, format="int32"), @@ -118,7 +118,7 @@ def create_segy_mock_4d( # noqa: PLR0913 channel, gun, shot_line = 0, 0, 0 # Assign dimension coordinate fields with calculated mock data - header_fields = ["field_rec_no", "channel", "shot_point", "offset", "shot_line", "cable", "gun"] + header_fields = ["orig_field_record_num", "channel", "shot_point", "offset", "shot_line", "cable", "gun"] headers[header_fields][trc_idx] = (shot, channel, shot, offset, shot_line, cable, gun) # Assign coordinate fields with mock data diff --git a/tests/integration/test_import_streamer_grid_overrides.py b/tests/integration/test_import_streamer_grid_overrides.py index 7ff89fb03..0618e6209 100644 --- a/tests/integration/test_import_streamer_grid_overrides.py +++ b/tests/integration/test_import_streamer_grid_overrides.py @@ -31,7 +31,9 @@ # TODO(Altay): Finish implementing these grid overrides. # https://github.com/TGSAI/mdio-python/issues/612 @pytest.mark.skip(reason="NonBinned and HasDuplicates haven't been properly implemented yet.") -@pytest.mark.parametrize("grid_override", [{"NonBinned": True}, {"HasDuplicates": True}]) +@pytest.mark.parametrize( + "grid_override", [{"NonBinned": True}, {"HasDuplicates": True}], ids=["NonBinned", "HasDuplicates"] +) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.C]) class TestImport4DNonReg: # pragma: no cover - tests is skipped """Test for 4D segy import with grid overrides.""" @@ -78,7 +80,7 @@ def test_import_4d_segy( # noqa: PLR0913 xrt.assert_duckarray_equal(ds["time"], times_expected) -@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, None]) +@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, None], ids=["AutoChannelWrap", "None"]) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B]) class TestImport4D: """Test for 4D segy import with grid overrides.""" @@ -156,10 +158,9 @@ def test_import_4d_segy( # noqa: PLR0913 assert "This grid is very sparse and most likely user error with indexing." in str(execinfo.value) -# TODO(Altay): Finish implementing these grid overrides. -# https://github.com/TGSAI/mdio-python/issues/612 -@pytest.mark.skip(reason="AutoShotWrap requires a template that is not implemented yet.") -@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, {"AutoShotWrap": True}, None]) +@pytest.mark.parametrize( + "grid_override", [{"AutoChannelWrap": True, "AutoShotWrap": True}, None], ids=["Channel&ShotWrap", "None"] +) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B]) class TestImport6D: # pragma: no cover - tests is skipped """Test for 6D segy import with grid overrides.""" @@ -177,7 +178,7 @@ def test_import_6d_segy( # noqa: PLR0913 segy_to_mdio( segy_spec=segy_spec, - mdio_template=TemplateRegistry().get("XYZ"), # Placeholder for the template + mdio_template=TemplateRegistry().get("PreStackGathers3DTime"), # Placeholder for the template input_path=segy_path, output_path=zarr_tmp, overwrite=True, diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_prestack.py index 3f7e01b8e..0f8a858d1 100644 --- a/tests/unit/v1/templates/test_seismic_prestack.py +++ b/tests/unit/v1/templates/test_seismic_prestack.py @@ -11,9 +11,12 @@ from mdio.builder.schemas.v1.dataset import Dataset from mdio.builder.schemas.v1.units import LengthUnitEnum from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel from mdio.builder.templates.seismic_prestack import SeismicPreStackTemplate UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -27,7 +30,7 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur dataset, name="headers", dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], dtype=headers, ) @@ -35,7 +38,7 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur dataset, name="trace_mask", dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], dtype=ScalarType.BOOL, ) @@ -97,9 +100,9 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur # Verify non-dimension coordinate variables validate_variable( dataset, - name="energy_source_point_number", + name="orig_field_record_num", dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], - coords=["energy_source_point_number"], + coords=["orig_field_record_num"], dtype=ScalarType.INT32, ) @@ -148,39 +151,43 @@ def test_configuration(self) -> None: t = SeismicPreStackTemplate(data_domain="time") # Template attributes for prestack shot - assert t._data_domain == "time" - assert t._coord_dim_names == ["shot_line", "gun", "shot_point", "cable", "channel"] - assert t._dim_names == ["shot_line", "gun", "shot_point", "cable", "channel", "time"] - assert t._coord_names == [ - "energy_source_point_number", + assert t.name == "PreStackGathers3DTime" + assert t.default_variable_name == "amplitude" + assert t.trace_domain == "time" + assert t.spatial_dimension_names == ("shot_line", "gun", "shot_point", "cable", "channel") + assert t.dimension_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") + assert t.physical_coordinate_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") + assert t.logical_coordinate_names == ("orig_field_record_num",) + assert t.coordinate_names == ( "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y", - ] - assert t._var_chunk_shape == [1, 1, 16, 1, 32, -1] + "orig_field_record_num", + ) + assert t.full_chunk_size == (1, 1, 16, 1, 32, -1) # Variables instantiated when build_dataset() is called assert t._builder is None assert t._dim_sizes == () - assert t._horizontal_coord_unit is None + assert t._units == {} # Verify prestack shot attributes attrs = t._load_dataset_attributes() assert attrs == {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} + assert t.default_variable_name == "amplitude" assert t.name == "PreStackGathers3DTime" def test_build_dataset(self, structured_headers: StructuredType) -> None: """Unit tests for SeismicPreStackTemplate build in time domain.""" t = SeismicPreStackTemplate(data_domain="time") + t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) # spatial domain units + t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units + t.add_units({"time": UNITS_SECOND}) # data domain units - assert t.name == "PreStackGathers3DTime" dataset = t.build_dataset( - "North Sea 3D Shot Time", - sizes=(1, 3, 256, 512, 24, 2048), - horizontal_coord_unit=UNITS_METER, - header_dtype=structured_headers, + "North Sea 3D Shot Time", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers ) assert dataset.metadata.name == "North Sea 3D Shot Time" @@ -195,7 +202,7 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: dataset, name="amplitude", dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24), ("time", 2048)], - coords=["energy_source_point_number", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], dtype=ScalarType.FLOAT32, ) assert isinstance(seismic.compressor, Blosc) From b9a36c79b85587d7eb6708df1eed08b93e424b51 Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Thu, 16 Oct 2025 21:47:18 +0000 Subject: [PATCH 07/31] touch --- tests/integration/testing_helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py index c871ba6db..7e362075e 100644 --- a/tests/integration/testing_helpers.py +++ b/tests/integration/testing_helpers.py @@ -49,3 +49,4 @@ def validate_variable( # noqa PLR0913 if expected_values is not None and actual_value_generator is not None: actual_values = actual_value_generator(arr) assert np.array_equal(expected_values, actual_values) + From 437333016df11f8a014dd3943130d3b4dfcb36bd Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Thu, 16 Oct 2025 21:49:50 +0000 Subject: [PATCH 08/31] Precomit --- tests/integration/testing_helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py index 7e362075e..c871ba6db 100644 --- a/tests/integration/testing_helpers.py +++ b/tests/integration/testing_helpers.py @@ -49,4 +49,3 @@ def validate_variable( # noqa PLR0913 if expected_values is not None and actual_value_generator is not None: actual_values = actual_value_generator(arr) assert np.array_equal(expected_values, actual_values) - From 6d88a2b77962af6c216131c082dd3e653938b9d6 Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Fri, 17 Oct 2025 11:58:05 +0000 Subject: [PATCH 09/31] Fix unit tests --- tests/unit/v1/templates/test_seismic_prestack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_prestack.py index 0f8a858d1..35188fa33 100644 --- a/tests/unit/v1/templates/test_seismic_prestack.py +++ b/tests/unit/v1/templates/test_seismic_prestack.py @@ -165,7 +165,7 @@ def test_configuration(self) -> None: "group_coord_y", "orig_field_record_num", ) - assert t.full_chunk_size == (1, 1, 16, 1, 32, -1) + assert t.full_chunk_size == (1, 1, 16, 1, 32, 1024) # Variables instantiated when build_dataset() is called assert t._builder is None @@ -208,7 +208,7 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: assert isinstance(seismic.compressor, Blosc) assert seismic.compressor.cname == BloscCname.zstd assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) - assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 1, 16, 1, 32, -1) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 1, 16, 1, 32, 1024) assert seismic.metadata.stats_v1 is None From 85af57eceeaf8b0182680d3f8550e39386a3aa54 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 15:27:46 +0000 Subject: [PATCH 10/31] Fix import --- src/mdio/builder/templates/seismic_prestack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_prestack.py index 7eb23e9d4..410729da1 100644 --- a/src/mdio/builder/templates/seismic_prestack.py +++ b/src/mdio/builder/templates/seismic_prestack.py @@ -4,7 +4,7 @@ from mdio.builder.schemas.dtype import ScalarType from mdio.builder.schemas.v1.variable import CoordinateMetadata -from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.builder.templates.base import AbstractDatasetTemplate from mdio.builder.templates.types import SeismicDataDomain From c10f6ad634bbc73f8d643a49645047e628219aeb Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 15:29:25 +0000 Subject: [PATCH 11/31] Update attribute name --- tests/unit/v1/templates/test_seismic_prestack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_prestack.py index 35188fa33..5ddf0e430 100644 --- a/tests/unit/v1/templates/test_seismic_prestack.py +++ b/tests/unit/v1/templates/test_seismic_prestack.py @@ -165,7 +165,7 @@ def test_configuration(self) -> None: "group_coord_y", "orig_field_record_num", ) - assert t.full_chunk_size == (1, 1, 16, 1, 32, 1024) + assert t.full_chunk_shape == (1, 1, 16, 1, 32, 1024) # Variables instantiated when build_dataset() is called assert t._builder is None From f89de1dd1f8f42438705327be2e7ff54f3297c1b Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 15:35:36 +0000 Subject: [PATCH 12/31] Rename source files --- src/mdio/builder/template_registry.py | 1 - ...seismic_prestack.py => seismic_3d_prestack_field_records.py} | 0 ...ic_prestack.py => test_seismic_3d_prestack_field_records.py} | 2 +- 3 files changed, 1 insertion(+), 2 deletions(-) rename src/mdio/builder/templates/{seismic_prestack.py => seismic_3d_prestack_field_records.py} (100%) rename tests/unit/v1/templates/{test_seismic_prestack.py => test_seismic_3d_prestack_field_records.py} (98%) diff --git a/src/mdio/builder/template_registry.py b/src/mdio/builder/template_registry.py index e25b4b43c..c4d319bf4 100644 --- a/src/mdio/builder/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -29,7 +29,6 @@ from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecords - if TYPE_CHECKING: from mdio.builder.templates.base import AbstractDatasetTemplate diff --git a/src/mdio/builder/templates/seismic_prestack.py b/src/mdio/builder/templates/seismic_3d_prestack_field_records.py similarity index 100% rename from src/mdio/builder/templates/seismic_prestack.py rename to src/mdio/builder/templates/seismic_3d_prestack_field_records.py diff --git a/tests/unit/v1/templates/test_seismic_prestack.py b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py similarity index 98% rename from tests/unit/v1/templates/test_seismic_prestack.py rename to tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py index 5ddf0e430..6b764abe1 100644 --- a/tests/unit/v1/templates/test_seismic_prestack.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py @@ -13,7 +13,7 @@ from mdio.builder.schemas.v1.units import LengthUnitModel from mdio.builder.schemas.v1.units import TimeUnitEnum from mdio.builder.schemas.v1.units import TimeUnitModel -from mdio.builder.templates.seismic_prestack import SeismicPreStackTemplate +from mdio.builder.templates.seismic_3d_prestack_field_records import SeismicPreStackTemplate UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) From c6b99365328c26eb33f9292baf94847926442a93 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 15:47:10 +0000 Subject: [PATCH 13/31] Rename template name --- .../templates/seismic_3d_prestack_field_records.py | 4 ++-- .../test_import_streamer_grid_overrides.py | 2 +- .../test_seismic_3d_prestack_field_records.py | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py b/src/mdio/builder/templates/seismic_3d_prestack_field_records.py index 410729da1..5f61b78bf 100644 --- a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_field_records.py @@ -8,7 +8,7 @@ from mdio.builder.templates.types import SeismicDataDomain -class SeismicPreStackTemplate(AbstractDatasetTemplate): +class Seismic3DPreStackFieldRecordsTemplate(AbstractDatasetTemplate): """Seismic pre-stack time Dataset template. This should be used for both 2D and 3D datasets. Common-shot or common-channel datasets @@ -34,7 +34,7 @@ def __init__(self, data_domain: SeismicDataDomain): @property def _name(self) -> str: - return f"PreStackGathers3D{self._data_domain.capitalize()}" + return f"PreStackFieldRecords3D{self._data_domain.capitalize()}" def _load_dataset_attributes(self) -> dict[str, Any]: return { diff --git a/tests/integration/test_import_streamer_grid_overrides.py b/tests/integration/test_import_streamer_grid_overrides.py index 0618e6209..2662733af 100644 --- a/tests/integration/test_import_streamer_grid_overrides.py +++ b/tests/integration/test_import_streamer_grid_overrides.py @@ -178,7 +178,7 @@ def test_import_6d_segy( # noqa: PLR0913 segy_to_mdio( segy_spec=segy_spec, - mdio_template=TemplateRegistry().get("PreStackGathers3DTime"), # Placeholder for the template + mdio_template=TemplateRegistry().get("PreStackFieldRecords3DTime"), input_path=segy_path, output_path=zarr_tmp, overwrite=True, diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py index 6b764abe1..c3b07d70e 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py @@ -13,7 +13,7 @@ from mdio.builder.schemas.v1.units import LengthUnitModel from mdio.builder.schemas.v1.units import TimeUnitEnum from mdio.builder.schemas.v1.units import TimeUnitModel -from mdio.builder.templates.seismic_3d_prestack_field_records import SeismicPreStackTemplate +from mdio.builder.templates.seismic_3d_prestack_field_records import Seismic3DPreStackFieldRecordsTemplate UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) @@ -143,15 +143,15 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur assert group_coord_y.metadata.units_v1.length == LengthUnitEnum.METER -class TestSeismic3DPreStackShotTemplate: +class TestSeismic3DPreStackFieldRecordsTemplate: """Unit tests for SeismicPreStackTemplate.""" def test_configuration(self) -> None: """Unit tests for SeismicPreStackTemplate in time domain.""" - t = SeismicPreStackTemplate(data_domain="time") + t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") # Template attributes for prestack shot - assert t.name == "PreStackGathers3DTime" + assert t.name == "PreStackFieldRecords3DTime" assert t.default_variable_name == "amplitude" assert t.trace_domain == "time" assert t.spatial_dimension_names == ("shot_line", "gun", "shot_point", "cable", "channel") @@ -177,11 +177,11 @@ def test_configuration(self) -> None: assert attrs == {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} assert t.default_variable_name == "amplitude" - assert t.name == "PreStackGathers3DTime" + assert t.name == "PreStackFieldRecords3DTime" def test_build_dataset(self, structured_headers: StructuredType) -> None: """Unit tests for SeismicPreStackTemplate build in time domain.""" - t = SeismicPreStackTemplate(data_domain="time") + t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) # spatial domain units t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units t.add_units({"time": UNITS_SECOND}) # data domain units @@ -215,6 +215,6 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: @pytest.mark.parametrize("data_domain", ["Time", "TiME"]) def test_domain_case_handling(data_domain: str) -> None: """Test that domain parameter handles different cases correctly.""" - template = SeismicPreStackTemplate(data_domain=data_domain) + template = Seismic3DPreStackFieldRecordsTemplate(data_domain=data_domain) assert template._data_domain == data_domain.lower() assert template.name.endswith(data_domain.capitalize()) From a6a67b090ea169fee82cc6dcd8008166cfef3d30 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 15:50:07 +0000 Subject: [PATCH 14/31] Update todo message --- .../builder/templates/seismic_3d_prestack_field_records.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py b/src/mdio/builder/templates/seismic_3d_prestack_field_records.py index 5f61b78bf..b01691a1f 100644 --- a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_field_records.py @@ -24,12 +24,9 @@ def __init__(self, data_domain: SeismicDataDomain): self._dim_names = (*self._spatial_dim_names, self._data_domain) self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") self._logical_coord_names = ("orig_field_record_num",) - # TODO(Dmitriy Repin): Allow specifying full-dimension-extent chunk size in templates. - # https://github.com/TGSAI/mdio-python/issues/720 - # When implemented, the following will be requesting the chunk size of the last dimension - # to be equal to the size of the dimension. + # TODO(Anyone): Disable chunking in time domain when support is merged. + # https://github.com/TGSAI/mdio-python/pull/723 # self._var_chunk_shape = (1, 1, 16, 1, 32, -1) - # For now, we are hardcoding the chunk size to 1024. self._var_chunk_shape = (1, 1, 16, 1, 32, 1024) @property From 70ee9b28ada10191db22c7c27fdb0e3aad808148 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 16:17:25 +0000 Subject: [PATCH 15/31] Alignment with current unit testing standards --- .../test_seismic_3d_prestack_field_records.py | 148 +++++------------- 1 file changed, 36 insertions(+), 112 deletions(-) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py index c3b07d70e..2213cedc1 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py @@ -43,59 +43,19 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur ) # Verify dimension coordinate variables - shot_line = validate_variable( - dataset, - name="shot_line", - dims=[("shot_line", 1)], - coords=["shot_line"], - dtype=ScalarType.INT32, - ) - assert shot_line.metadata is None - - gun = validate_variable( - dataset, - name="gun", - dims=[("gun", 3)], - coords=["gun"], - dtype=ScalarType.INT32, - ) - assert gun.metadata is None - - shot_point = validate_variable( - dataset, - name="shot_point", - dims=[("shot_point", 256)], - coords=["shot_point"], - dtype=ScalarType.INT32, - ) - assert shot_point.metadata is None - - cable = validate_variable( - dataset, - name="cable", - dims=[("cable", 512)], - coords=["cable"], - dtype=ScalarType.INT32, - ) - assert cable.metadata is None - - channel = validate_variable( - dataset, - name="channel", - dims=[("channel", 24)], - coords=["channel"], - dtype=ScalarType.INT32, - ) - assert channel.metadata is None - - domain_var = validate_variable( - dataset, - name=domain, - dims=[(domain, 2048)], - coords=[domain], - dtype=ScalarType.INT32, - ) - assert domain_var.metadata is None + for dim_name in ["shot_line", "gun", "shot_point", "cable", "channel", domain]: + validate_variable( + dataset, + name=dim_name, + dims=[ + ( + dim_name, + {"shot_line": 1, "gun": 3, "shot_point": 256, "cable": 512, "channel": 24, domain: 2048}[dim_name], + ) + ], + coords=[dim_name], + dtype=ScalarType.INT32, + ) # Verify non-dimension coordinate variables validate_variable( @@ -106,98 +66,62 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur dtype=ScalarType.INT32, ) - source_coord_x = validate_variable( - dataset, - name="source_coord_x", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], - coords=["source_coord_x"], - dtype=ScalarType.FLOAT64, - ) - assert source_coord_x.metadata.units_v1.length == LengthUnitEnum.METER - - source_coord_y = validate_variable( - dataset, - name="source_coord_y", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], - coords=["source_coord_y"], - dtype=ScalarType.FLOAT64, - ) - assert source_coord_y.metadata.units_v1.length == LengthUnitEnum.METER - - group_coord_x = validate_variable( - dataset, - name="group_coord_x", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["group_coord_x"], - dtype=ScalarType.FLOAT64, - ) - assert group_coord_x.metadata.units_v1.length == LengthUnitEnum.METER - - group_coord_y = validate_variable( - dataset, - name="group_coord_y", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["group_coord_y"], - dtype=ScalarType.FLOAT64, - ) - assert group_coord_y.metadata.units_v1.length == LengthUnitEnum.METER + # Verify coordinate variables with units + for coord_name in ["source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)] + + ([("cable", 512), ("channel", 24)] if "group" in coord_name else []), + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER class TestSeismic3DPreStackFieldRecordsTemplate: - """Unit tests for SeismicPreStackTemplate.""" + """Unit tests for Seismic3DPreStackFieldRecordsTemplate.""" def test_configuration(self) -> None: - """Unit tests for SeismicPreStackTemplate in time domain.""" + """Unit tests for Seismic3DPreStackFieldRecordsTemplate.""" t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") - # Template attributes for prestack shot + # Template attributes assert t.name == "PreStackFieldRecords3DTime" - assert t.default_variable_name == "amplitude" - assert t.trace_domain == "time" - assert t.spatial_dimension_names == ("shot_line", "gun", "shot_point", "cable", "channel") - assert t.dimension_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") - assert t.physical_coordinate_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") - assert t.logical_coordinate_names == ("orig_field_record_num",) - assert t.coordinate_names == ( - "source_coord_x", - "source_coord_y", - "group_coord_x", - "group_coord_y", - "orig_field_record_num", - ) + assert t._dim_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") + assert t._physical_coord_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") + # TODO(Anyone): Disable chunking in time domain when support is merged. + # https://github.com/TGSAI/mdio-python/pull/723 assert t.full_chunk_shape == (1, 1, 16, 1, 32, 1024) # Variables instantiated when build_dataset() is called assert t._builder is None assert t._dim_sizes == () - assert t._units == {} - # Verify prestack shot attributes + # Verify dataset attributes attrs = t._load_dataset_attributes() assert attrs == {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} assert t.default_variable_name == "amplitude" - assert t.name == "PreStackFieldRecords3DTime" - def test_build_dataset(self, structured_headers: StructuredType) -> None: - """Unit tests for SeismicPreStackTemplate build in time domain.""" + """Unit tests for Seismic3DPreStackFieldRecordsTemplate build.""" t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) # spatial domain units t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units t.add_units({"time": UNITS_SECOND}) # data domain units dataset = t.build_dataset( - "North Sea 3D Shot Time", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers + "North Sea 3D Field Records", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers ) - assert dataset.metadata.name == "North Sea 3D Shot Time" + assert dataset.metadata.name == "North Sea 3D Field Records" assert dataset.metadata.attributes["surveyDimensionality"] == "3D" assert dataset.metadata.attributes["ensembleType"] == "shot_point" assert dataset.metadata.attributes["processingStage"] == "pre-stack" _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") - # Verify seismic variable (prestack shot time data) + # Verify seismic variable seismic = validate_variable( dataset, name="amplitude", From a7803f70bce6420ff7ed97413bd1bacd1ad53617 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 16:29:11 +0000 Subject: [PATCH 16/31] Use more correct template name --- ...mic_3d_prestack_streamer_field_records.py} | 8 ++++--- .../test_import_streamer_grid_overrides.py | 2 +- ...mic_3d_prestack_streamer_field_records.py} | 22 +++++++++---------- 3 files changed, 17 insertions(+), 15 deletions(-) rename src/mdio/builder/templates/{seismic_3d_prestack_field_records.py => seismic_3d_prestack_streamer_field_records.py} (90%) rename tests/unit/v1/templates/{test_seismic_3d_prestack_field_records.py => test_seismic_3d_prestack_streamer_field_records.py} (86%) diff --git a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py b/src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py similarity index 90% rename from src/mdio/builder/templates/seismic_3d_prestack_field_records.py rename to src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py index b01691a1f..b3b425125 100644 --- a/src/mdio/builder/templates/seismic_3d_prestack_field_records.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py @@ -8,10 +8,12 @@ from mdio.builder.templates.types import SeismicDataDomain -class Seismic3DPreStackFieldRecordsTemplate(AbstractDatasetTemplate): +class Seismic3DPreStackStreamerFieldRecordsTemplate(AbstractDatasetTemplate): """Seismic pre-stack time Dataset template. - This should be used for both 2D and 3D datasets. Common-shot or common-channel datasets + A generalized template for pre-stack field records in either 2D or 3D. + - Common-shot dataset + - Common-channel dataset Args: data_domain: The domain of the dataset. @@ -31,7 +33,7 @@ def __init__(self, data_domain: SeismicDataDomain): @property def _name(self) -> str: - return f"PreStackFieldRecords3D{self._data_domain.capitalize()}" + return f"PreStackStreamerFieldRecords3D{self._data_domain.capitalize()}" def _load_dataset_attributes(self) -> dict[str, Any]: return { diff --git a/tests/integration/test_import_streamer_grid_overrides.py b/tests/integration/test_import_streamer_grid_overrides.py index 2662733af..9e8f9eb5b 100644 --- a/tests/integration/test_import_streamer_grid_overrides.py +++ b/tests/integration/test_import_streamer_grid_overrides.py @@ -178,7 +178,7 @@ def test_import_6d_segy( # noqa: PLR0913 segy_to_mdio( segy_spec=segy_spec, - mdio_template=TemplateRegistry().get("PreStackFieldRecords3DTime"), + mdio_template=TemplateRegistry().get("PreStackStreamerFieldRecords3DTime"), input_path=segy_path, output_path=zarr_tmp, overwrite=True, diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py similarity index 86% rename from tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py rename to tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py index 2213cedc1..74a5cd4ec 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py @@ -13,7 +13,7 @@ from mdio.builder.schemas.v1.units import LengthUnitModel from mdio.builder.schemas.v1.units import TimeUnitEnum from mdio.builder.schemas.v1.units import TimeUnitModel -from mdio.builder.templates.seismic_3d_prestack_field_records import Seismic3DPreStackFieldRecordsTemplate +from mdio.builder.templates.seismic_3d_prestack_streamer_field_records import Seismic3DPreStackStreamerFieldRecordsTemplate UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) @@ -79,15 +79,15 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur assert coord.metadata.units_v1.length == LengthUnitEnum.METER -class TestSeismic3DPreStackFieldRecordsTemplate: - """Unit tests for Seismic3DPreStackFieldRecordsTemplate.""" +class TestSeismic3DPreStackStreamerFieldRecordsTemplate: + """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" def test_configuration(self) -> None: - """Unit tests for Seismic3DPreStackFieldRecordsTemplate.""" - t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") + """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" + t = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain="time") # Template attributes - assert t.name == "PreStackFieldRecords3DTime" + assert t.name == "PreStackStreamerFieldRecords3DTime" assert t._dim_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") assert t._physical_coord_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") # TODO(Anyone): Disable chunking in time domain when support is merged. @@ -104,17 +104,17 @@ def test_configuration(self) -> None: assert t.default_variable_name == "amplitude" def test_build_dataset(self, structured_headers: StructuredType) -> None: - """Unit tests for Seismic3DPreStackFieldRecordsTemplate build.""" - t = Seismic3DPreStackFieldRecordsTemplate(data_domain="time") + """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate build.""" + t = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain="time") t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) # spatial domain units t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units t.add_units({"time": UNITS_SECOND}) # data domain units dataset = t.build_dataset( - "North Sea 3D Field Records", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers + "North Sea 3D Streamer Field Records", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers ) - assert dataset.metadata.name == "North Sea 3D Field Records" + assert dataset.metadata.name == "North Sea 3D Streamer Field Records" assert dataset.metadata.attributes["surveyDimensionality"] == "3D" assert dataset.metadata.attributes["ensembleType"] == "shot_point" assert dataset.metadata.attributes["processingStage"] == "pre-stack" @@ -139,6 +139,6 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: @pytest.mark.parametrize("data_domain", ["Time", "TiME"]) def test_domain_case_handling(data_domain: str) -> None: """Test that domain parameter handles different cases correctly.""" - template = Seismic3DPreStackFieldRecordsTemplate(data_domain=data_domain) + template = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain=data_domain) assert template._data_domain == data_domain.lower() assert template.name.endswith(data_domain.capitalize()) From 7b3eb5e7fa3c2bc2be6a796ba89226450974cfc4 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 16:38:52 +0000 Subject: [PATCH 17/31] pre-commit --- .../test_seismic_3d_prestack_streamer_field_records.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py index 74a5cd4ec..6fb343c08 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py @@ -13,7 +13,9 @@ from mdio.builder.schemas.v1.units import LengthUnitModel from mdio.builder.schemas.v1.units import TimeUnitEnum from mdio.builder.schemas.v1.units import TimeUnitModel -from mdio.builder.templates.seismic_3d_prestack_streamer_field_records import Seismic3DPreStackStreamerFieldRecordsTemplate +from mdio.builder.templates.seismic_3d_prestack_streamer_field_records import ( + Seismic3DPreStackStreamerFieldRecordsTemplate, +) UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) From fe02c3821d52c1133e05f1256f439c010e394df6 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 21 Oct 2025 16:41:15 +0000 Subject: [PATCH 18/31] Update doc string --- .../test_seismic_3d_prestack_streamer_field_records.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py index 6fb343c08..d07bd03d8 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py @@ -1,4 +1,4 @@ -"""Unit tests for SeismicPreStackTemplate.""" +"""Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" import pytest from tests.unit.v1.helpers import validate_variable From 28ee56b2ec067c1f79628c933b2f54e09b9ebd9e Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 29 Oct 2025 14:28:37 -0500 Subject: [PATCH 19/31] rename streamer field data template --- src/mdio/builder/template_registry.py | 4 ++-- ...ecords.py => seismic_3d_streamer_field.py} | 15 +++++-------- ...s.py => test_seismic_3d_streamer_field.py} | 22 +++++++++---------- .../v1/templates/test_template_registry.py | 2 +- 4 files changed, 19 insertions(+), 24 deletions(-) rename src/mdio/builder/templates/{seismic_3d_prestack_streamer_field_records.py => seismic_3d_streamer_field.py} (82%) rename tests/unit/v1/templates/{test_seismic_3d_prestack_streamer_field_records.py => test_seismic_3d_streamer_field.py} (88%) diff --git a/src/mdio/builder/template_registry.py b/src/mdio/builder/template_registry.py index c4d319bf4..d90d5cfd5 100644 --- a/src/mdio/builder/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -26,8 +26,8 @@ from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate -from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecords if TYPE_CHECKING: from mdio.builder.templates.base import AbstractDatasetTemplate @@ -136,7 +136,7 @@ def _register_default_templates(self) -> None: # Field (shot) data self.register(Seismic2DStreamerShotGathersTemplate()) self.register(Seismic3DStreamerShotGathersTemplate()) - self.register(Seismic3DStreamerFieldRecords()) + self.register(Seismic3DStreamerFieldRecordsTemplate()) def get(self, template_name: str) -> AbstractDatasetTemplate: """Get an instance of a template from the registry by its name. diff --git a/src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py similarity index 82% rename from src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py rename to src/mdio/builder/templates/seismic_3d_streamer_field.py index b3b425125..eeb88fe86 100644 --- a/src/mdio/builder/templates/seismic_3d_prestack_streamer_field_records.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -1,4 +1,4 @@ -"""SeismicPreStackTemplate MDIO v1 dataset templates.""" +"""Seismic3DStreamerFieldRecordsTemplate MDIO v1 dataset templates.""" from typing import Any @@ -8,7 +8,7 @@ from mdio.builder.templates.types import SeismicDataDomain -class Seismic3DPreStackStreamerFieldRecordsTemplate(AbstractDatasetTemplate): +class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate): """Seismic pre-stack time Dataset template. A generalized template for pre-stack field records in either 2D or 3D. @@ -19,26 +19,23 @@ class Seismic3DPreStackStreamerFieldRecordsTemplate(AbstractDatasetTemplate): data_domain: The domain of the dataset. """ - def __init__(self, data_domain: SeismicDataDomain): + def __init__(self, data_domain: SeismicDataDomain = "time"): super().__init__(data_domain=data_domain) self._spatial_dim_names = ("shot_line", "gun", "shot_point", "cable", "channel") self._dim_names = (*self._spatial_dim_names, self._data_domain) self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") - self._logical_coord_names = ("orig_field_record_num",) - # TODO(Anyone): Disable chunking in time domain when support is merged. - # https://github.com/TGSAI/mdio-python/pull/723 - # self._var_chunk_shape = (1, 1, 16, 1, 32, -1) + self._logical_coord_names = ("orig_field_record_num",) # ffid self._var_chunk_shape = (1, 1, 16, 1, 32, 1024) @property def _name(self) -> str: - return f"PreStackStreamerFieldRecords3D{self._data_domain.capitalize()}" + return "StreamerFieldRecords3D" def _load_dataset_attributes(self) -> dict[str, Any]: return { "surveyDimensionality": "3D", - "ensembleType": "shot_point", + "ensembleType": "track", "processingStage": "pre-stack", } diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py similarity index 88% rename from tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py rename to tests/unit/v1/templates/test_seismic_3d_streamer_field.py index d07bd03d8..ca96ff37e 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_streamer_field_records.py +++ b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py @@ -1,4 +1,4 @@ -"""Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" +"""Unit tests for Seismic3DStreamerFieldRecordsTemplate.""" import pytest from tests.unit.v1.helpers import validate_variable @@ -13,9 +13,7 @@ from mdio.builder.schemas.v1.units import LengthUnitModel from mdio.builder.schemas.v1.units import TimeUnitEnum from mdio.builder.schemas.v1.units import TimeUnitModel -from mdio.builder.templates.seismic_3d_prestack_streamer_field_records import ( - Seismic3DPreStackStreamerFieldRecordsTemplate, -) +from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) @@ -81,15 +79,15 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur assert coord.metadata.units_v1.length == LengthUnitEnum.METER -class TestSeismic3DPreStackStreamerFieldRecordsTemplate: - """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" +class TestSeismic3DStreamerFieldRecordsTemplate: + """Unit tests for Seismic3DStreamerFieldRecordsTemplate.""" def test_configuration(self) -> None: - """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate.""" - t = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain="time") + """Unit tests for Seismic3DStreamerFieldRecordsTemplate.""" + t = Seismic3DStreamerFieldRecordsTemplate(data_domain="time") # Template attributes - assert t.name == "PreStackStreamerFieldRecords3DTime" + assert t.name == "StreamerFieldRecords3D" assert t._dim_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") assert t._physical_coord_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") # TODO(Anyone): Disable chunking in time domain when support is merged. @@ -106,8 +104,8 @@ def test_configuration(self) -> None: assert t.default_variable_name == "amplitude" def test_build_dataset(self, structured_headers: StructuredType) -> None: - """Unit tests for Seismic3DPreStackStreamerFieldRecordsTemplate build.""" - t = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain="time") + """Unit tests for Seismic3DStreamerFieldRecordsTemplate build.""" + t = Seismic3DStreamerFieldRecordsTemplate(data_domain="time") t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) # spatial domain units t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units t.add_units({"time": UNITS_SECOND}) # data domain units @@ -141,6 +139,6 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: @pytest.mark.parametrize("data_domain", ["Time", "TiME"]) def test_domain_case_handling(data_domain: str) -> None: """Test that domain parameter handles different cases correctly.""" - template = Seismic3DPreStackStreamerFieldRecordsTemplate(data_domain=data_domain) + template = Seismic3DStreamerFieldRecordsTemplate(data_domain=data_domain) assert template._data_domain == data_domain.lower() assert template.name.endswith(data_domain.capitalize()) diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py index 29cd83429..e0b641f04 100644 --- a/tests/unit/v1/templates/test_template_registry.py +++ b/tests/unit/v1/templates/test_template_registry.py @@ -33,7 +33,7 @@ "CocaGathers3DDepth", "StreamerShotGathers2D", "StreamerShotGathers3D", - "StreamerFieldRecords3D" + "StreamerFieldRecords3D", ] From adcd7c9b1ee3ca56d48efa111bdc245000da798f Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:18:23 -0500 Subject: [PATCH 20/31] fix broken template tests --- .../v1/templates/test_seismic_3d_streamer_field.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/unit/v1/templates/test_seismic_3d_streamer_field.py b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py index ca96ff37e..044be69e6 100644 --- a/tests/unit/v1/templates/test_seismic_3d_streamer_field.py +++ b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py @@ -1,6 +1,5 @@ """Unit tests for Seismic3DStreamerFieldRecordsTemplate.""" -import pytest from tests.unit.v1.helpers import validate_variable from mdio.builder.schemas.chunk_grid import RegularChunkGrid @@ -100,7 +99,7 @@ def test_configuration(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs == {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} + assert attrs == {"surveyDimensionality": "3D", "ensembleType": "track", "processingStage": "pre-stack"} assert t.default_variable_name == "amplitude" def test_build_dataset(self, structured_headers: StructuredType) -> None: @@ -116,7 +115,7 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: assert dataset.metadata.name == "North Sea 3D Streamer Field Records" assert dataset.metadata.attributes["surveyDimensionality"] == "3D" - assert dataset.metadata.attributes["ensembleType"] == "shot_point" + assert dataset.metadata.attributes["ensembleType"] == "track" assert dataset.metadata.attributes["processingStage"] == "pre-stack" _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") @@ -134,11 +133,3 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 1, 16, 1, 32, 1024) assert seismic.metadata.stats_v1 is None - - -@pytest.mark.parametrize("data_domain", ["Time", "TiME"]) -def test_domain_case_handling(data_domain: str) -> None: - """Test that domain parameter handles different cases correctly.""" - template = Seismic3DStreamerFieldRecordsTemplate(data_domain=data_domain) - assert template._data_domain == data_domain.lower() - assert template.name.endswith(data_domain.capitalize()) From 893f5d27a2222cfaef164fe7ae337fa8619f3ab7 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:57:00 -0500 Subject: [PATCH 21/31] fix broken test --- .../test_import_streamer_grid_overrides.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_import_streamer_grid_overrides.py b/tests/integration/test_import_streamer_grid_overrides.py index 9e8f9eb5b..986d15b38 100644 --- a/tests/integration/test_import_streamer_grid_overrides.py +++ b/tests/integration/test_import_streamer_grid_overrides.py @@ -31,9 +31,7 @@ # TODO(Altay): Finish implementing these grid overrides. # https://github.com/TGSAI/mdio-python/issues/612 @pytest.mark.skip(reason="NonBinned and HasDuplicates haven't been properly implemented yet.") -@pytest.mark.parametrize( - "grid_override", [{"NonBinned": True}, {"HasDuplicates": True}], ids=["NonBinned", "HasDuplicates"] -) +@pytest.mark.parametrize("grid_override", [{"NonBinned": True}, {"HasDuplicates": True}]) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.C]) class TestImport4DNonReg: # pragma: no cover - tests is skipped """Test for 4D segy import with grid overrides.""" @@ -80,7 +78,7 @@ def test_import_4d_segy( # noqa: PLR0913 xrt.assert_duckarray_equal(ds["time"], times_expected) -@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, None], ids=["AutoChannelWrap", "None"]) +@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, None]) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B]) class TestImport4D: """Test for 4D segy import with grid overrides.""" @@ -158,11 +156,9 @@ def test_import_4d_segy( # noqa: PLR0913 assert "This grid is very sparse and most likely user error with indexing." in str(execinfo.value) -@pytest.mark.parametrize( - "grid_override", [{"AutoChannelWrap": True, "AutoShotWrap": True}, None], ids=["Channel&ShotWrap", "None"] -) +@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True, "AutoShotWrap": True}, None]) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B]) -class TestImport6D: # pragma: no cover - tests is skipped +class TestImport6D: """Test for 6D segy import with grid overrides.""" def test_import_6d_segy( # noqa: PLR0913 @@ -178,7 +174,7 @@ def test_import_6d_segy( # noqa: PLR0913 segy_to_mdio( segy_spec=segy_spec, - mdio_template=TemplateRegistry().get("PreStackStreamerFieldRecords3DTime"), + mdio_template=TemplateRegistry().get("StreamerFieldRecords3D"), input_path=segy_path, output_path=zarr_tmp, overwrite=True, From 5e5bd5abc5e76b02a584c5cfcebeb0a68eba6bee Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:57:09 -0500 Subject: [PATCH 22/31] update docstring --- src/mdio/builder/templates/seismic_3d_streamer_field.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py index eeb88fe86..4a2e4b25e 100644 --- a/src/mdio/builder/templates/seismic_3d_streamer_field.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -9,9 +9,9 @@ class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate): - """Seismic pre-stack time Dataset template. + """Seismic 3D streamer shot field records template. - A generalized template for pre-stack field records in either 2D or 3D. + A generalized template for streamer field records that are optimized for: - Common-shot dataset - Common-channel dataset From 690e41e25c94f22d0ab95e39fe0ea11ba6a6c97b Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:03:41 -0500 Subject: [PATCH 23/31] add whole survey to docstring --- src/mdio/builder/templates/seismic_3d_streamer_field.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py index 4a2e4b25e..f05f6f621 100644 --- a/src/mdio/builder/templates/seismic_3d_streamer_field.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -12,8 +12,10 @@ class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate): """Seismic 3D streamer shot field records template. A generalized template for streamer field records that are optimized for: - - Common-shot dataset - - Common-channel dataset + - Common-shot access + - Common-channel access + + It can also store all the shot-lines of a survey in one MDIO if needed. Args: data_domain: The domain of the dataset. From b28aafdb9895bae9ed9f4758ab44bf5609d0d33d Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Fri, 31 Oct 2025 16:42:12 -0500 Subject: [PATCH 24/31] modify for new dim names --- .../templates/seismic_3d_streamer_field.py | 58 +++++++++---- src/mdio/segy/geometry.py | 18 ++-- tests/integration/conftest.py | 10 +-- .../test_seismic_3d_streamer_field.py | 87 ++++++++++++------- 4 files changed, 115 insertions(+), 58 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py index f05f6f621..834544eab 100644 --- a/src/mdio/builder/templates/seismic_3d_streamer_field.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -24,10 +24,13 @@ class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate): def __init__(self, data_domain: SeismicDataDomain = "time"): super().__init__(data_domain=data_domain) - self._spatial_dim_names = ("shot_line", "gun", "shot_point", "cable", "channel") + self._spatial_dim_names = ("sail_line", "gun", "shot_index", "cable", "channel") self._dim_names = (*self._spatial_dim_names, self._data_domain) self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") - self._logical_coord_names = ("orig_field_record_num",) # ffid + self._logical_coord_names = ( + "shot_point", + "orig_field_record_num", + ) # ffid self._var_chunk_shape = (1, 1, 16, 1, 32, 1024) @property @@ -35,44 +38,69 @@ def _name(self) -> str: return "StreamerFieldRecords3D" def _load_dataset_attributes(self) -> dict[str, Any]: - return { - "surveyDimensionality": "3D", - "ensembleType": "track", - "processingStage": "pre-stack", - } + return {"surveyDimensionality": "3D", "ensembleType": "common_source_by_gun"} def _add_coordinates(self) -> None: # Add dimension coordinates - for name in self._dim_names: - self._builder.add_coordinate(name, dimensions=(name,), data_type=ScalarType.INT32) + # EXCLUDE: `shot_index` since its 0-N + self._builder.add_coordinate( + "sail_line", + dimensions=("sail_line",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "gun", + dimensions=("gun",), + data_type=ScalarType.UINT8, + ) + self._builder.add_coordinate( + "cable", + dimensions=("cable",), + data_type=ScalarType.UINT8, + ) + self._builder.add_coordinate( + "channel", + dimensions=("channel",), + data_type=ScalarType.UINT16, + ) + self._builder.add_coordinate( + self._data_domain, + dimensions=(self._data_domain,), + data_type=ScalarType.INT32, + ) # Add non-dimension coordinates self._builder.add_coordinate( "orig_field_record_num", - dimensions=("shot_line", "gun", "shot_point"), - data_type=ScalarType.INT32, + dimensions=("sail_line", "gun", "shot_index"), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "shot_point", + dimensions=("sail_line", "gun", "shot_index"), + data_type=ScalarType.UINT32, ) self._builder.add_coordinate( "source_coord_x", - dimensions=("shot_line", "gun", "shot_point"), + dimensions=("sail_line", "gun", "shot_index"), data_type=ScalarType.FLOAT64, metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), ) self._builder.add_coordinate( "source_coord_y", - dimensions=("shot_line", "gun", "shot_point"), + dimensions=("sail_line", "gun", "shot_index"), data_type=ScalarType.FLOAT64, metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), ) self._builder.add_coordinate( "group_coord_x", - dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), + dimensions=("sail_line", "gun", "shot_index", "cable", "channel"), data_type=ScalarType.FLOAT64, metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")), ) self._builder.add_coordinate( "group_coord_y", - dimensions=("shot_line", "gun", "shot_point", "cable", "channel"), + dimensions=("sail_line", "gun", "shot_index", "cable", "channel"), data_type=ScalarType.FLOAT64, metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")), ) diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index ed41e42e8..e0b5e7857 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -459,7 +459,7 @@ def transform( class AutoShotWrap(GridOverrideCommand): """Automatically determine ShotGun acquisition type.""" - required_keys = {"shot_line", "gun", "shot_point", "cable", "channel"} + required_keys = {"sail_line", "gun", "shot_point", "cable", "channel"} required_parameters = None def validate(self, index_headers: HeaderArray, grid_overrides: dict[str, bool | int]) -> None: @@ -476,22 +476,22 @@ def transform( self.validate(index_headers, grid_overrides) result = analyze_shotlines_for_guns(index_headers) - unique_shot_lines, unique_guns_in_shot_line, geom_type = result + unique_sail_lines, unique_guns_in_sail_line, geom_type = result logger.info("Ingesting dataset as shot type: %s", geom_type.name) max_num_guns = 1 - for shot_line in unique_shot_lines: - logger.info("shot_line: %s has guns: %s", shot_line, unique_guns_in_shot_line[str(shot_line)]) - num_guns = len(unique_guns_in_shot_line[str(shot_line)]) + for sail_line in unique_sail_lines: + logger.info("sail_line: %s has guns: %s", sail_line, unique_guns_in_sail_line[str(sail_line)]) + num_guns = len(unique_guns_in_sail_line[str(sail_line)]) max_num_guns = max(num_guns, max_num_guns) # This might be slow and potentially could be improved with a rewrite # to prevent so many lookups if geom_type == ShotGunGeometryType.B: - for shot_line in unique_shot_lines: - shot_line_idxs = np.where(index_headers["shot_line"][:] == shot_line) - index_headers["shot_point"][shot_line_idxs] = np.floor( - index_headers["shot_point"][shot_line_idxs] / max_num_guns + for sail_line in unique_sail_lines: + sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line) + index_headers["shot_point"][sail_line_idxs] = np.floor( + index_headers["shot_point"][sail_line_idxs] / max_num_guns ) return index_headers diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 224e2898a..e3f8ca623 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -28,7 +28,7 @@ def get_segy_mock_4d_spec() -> SegySpec: HeaderField(name="offset", byte=37, format="int32"), HeaderField(name="samples_per_trace", byte=115, format="int16"), HeaderField(name="sample_interval", byte=117, format="int16"), - HeaderField(name="shot_line", byte=133, format="int16"), + HeaderField(name="sail_line", byte=133, format="int16"), HeaderField(name="cable", byte=137, format="int16"), HeaderField(name="gun", byte=171, format="int16"), HeaderField(name="coordinate_scalar", byte=71, format="int16"), @@ -111,15 +111,15 @@ def create_segy_mock_4d( # noqa: PLR0913 gun = gun_headers[trc_idx] cable = cable_headers[trc_idx] channel = channel_headers[trc_idx] - shot_line = 1 + sail_line = 1 offset = 0 if index_receivers is False: - channel, gun, shot_line = 0, 0, 0 + channel, gun, sail_line = 0, 0, 0 # Assign dimension coordinate fields with calculated mock data - header_fields = ["orig_field_record_num", "channel", "shot_point", "offset", "shot_line", "cable", "gun"] - headers[header_fields][trc_idx] = (shot, channel, shot, offset, shot_line, cable, gun) + header_fields = ["orig_field_record_num", "channel", "shot_point", "offset", "sail_line", "cable", "gun"] + headers[header_fields][trc_idx] = (shot, channel, shot, offset, sail_line, cable, gun) # Assign coordinate fields with mock data x = start_x + step_x * trc_shot_idx diff --git a/tests/unit/v1/templates/test_seismic_3d_streamer_field.py b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py index 044be69e6..9ea837939 100644 --- a/tests/unit/v1/templates/test_seismic_3d_streamer_field.py +++ b/tests/unit/v1/templates/test_seismic_3d_streamer_field.py @@ -18,6 +18,18 @@ UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) +DATASET_SIZE_MAP = {"sail_line": 1, "gun": 2, "shot_index": 128, "cable": 256, "channel": 12, "time": 1024} +DATASET_DTYPE_MAP = {"sail_line": "uint32", "gun": "uint8", "cable": "uint8", "channel": "uint16", "time": "int32"} +EXPECTED_COORDINATES = [ + "shot_point", + "orig_field_record_num", + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", +] + + def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: """Validate the coordinate, headers, trace_mask variables in the dataset.""" # Verify variables @@ -28,50 +40,65 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur validate_variable( dataset, name="headers", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, dtype=headers, ) validate_variable( dataset, name="trace_mask", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24)], - coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, dtype=ScalarType.BOOL, ) # Verify dimension coordinate variables - for dim_name in ["shot_line", "gun", "shot_point", "cable", "channel", domain]: + for dim_name, dim_size in DATASET_SIZE_MAP.items(): + if dim_name == "shot_index": + continue + validate_variable( dataset, name=dim_name, - dims=[ - ( - dim_name, - {"shot_line": 1, "gun": 3, "shot_point": 256, "cable": 512, "channel": 24, domain: 2048}[dim_name], - ) - ], + dims=[(dim_name, dim_size)], coords=[dim_name], - dtype=ScalarType.INT32, + dtype=ScalarType(DATASET_DTYPE_MAP[dim_name]), ) # Verify non-dimension coordinate variables validate_variable( dataset, name="orig_field_record_num", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)], + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["sail_line", "gun", "shot_index"]], coords=["orig_field_record_num"], - dtype=ScalarType.INT32, + dtype=ScalarType.UINT32, + ) + + validate_variable( + dataset, + name="shot_point", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["sail_line", "gun", "shot_index"]], + coords=["shot_point"], + dtype=ScalarType.UINT32, ) # Verify coordinate variables with units - for coord_name in ["source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"]: + for coord_name in ["source_coord_x", "source_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["sail_line", "gun", "shot_index"]], + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + for coord_name in ["group_coord_x", "group_coord_y"]: coord = validate_variable( dataset, name=coord_name, - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256)] - + ([("cable", 512), ("channel", 24)] if "group" in coord_name else []), + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], coords=[coord_name], dtype=ScalarType.FLOAT64, ) @@ -87,10 +114,8 @@ def test_configuration(self) -> None: # Template attributes assert t.name == "StreamerFieldRecords3D" - assert t._dim_names == ("shot_line", "gun", "shot_point", "cable", "channel", "time") + assert t._dim_names == ("sail_line", "gun", "shot_index", "cable", "channel", "time") assert t._physical_coord_names == ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") - # TODO(Anyone): Disable chunking in time domain when support is merged. - # https://github.com/TGSAI/mdio-python/pull/723 assert t.full_chunk_shape == (1, 1, 16, 1, 32, 1024) # Variables instantiated when build_dataset() is called @@ -99,7 +124,7 @@ def test_configuration(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs == {"surveyDimensionality": "3D", "ensembleType": "track", "processingStage": "pre-stack"} + assert attrs == {"surveyDimensionality": "3D", "ensembleType": "common_source_by_gun"} assert t.default_variable_name == "amplitude" def test_build_dataset(self, structured_headers: StructuredType) -> None: @@ -109,14 +134,11 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) # spatial domain units t.add_units({"time": UNITS_SECOND}) # data domain units - dataset = t.build_dataset( - "North Sea 3D Streamer Field Records", sizes=(1, 3, 256, 512, 24, 2048), header_dtype=structured_headers - ) + dataset = t.build_dataset("Survey3D", sizes=(1, 2, 128, 256, 12, 1024), header_dtype=structured_headers) - assert dataset.metadata.name == "North Sea 3D Streamer Field Records" + assert dataset.metadata.name == "Survey3D" assert dataset.metadata.attributes["surveyDimensionality"] == "3D" - assert dataset.metadata.attributes["ensembleType"] == "track" - assert dataset.metadata.attributes["processingStage"] == "pre-stack" + assert dataset.metadata.attributes["ensembleType"] == "common_source_by_gun" _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") @@ -124,8 +146,15 @@ def test_build_dataset(self, structured_headers: StructuredType) -> None: seismic = validate_variable( dataset, name="amplitude", - dims=[("shot_line", 1), ("gun", 3), ("shot_point", 256), ("cable", 512), ("channel", 24), ("time", 2048)], - coords=["orig_field_record_num", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y"], + dims=[("sail_line", 1), ("gun", 2), ("shot_index", 128), ("cable", 256), ("channel", 12), ("time", 1024)], + coords=[ + "shot_point", + "orig_field_record_num", + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + ], dtype=ScalarType.FLOAT32, ) assert isinstance(seismic.compressor, Blosc) From 8a4787dc1f11acf7ee0e1162bb1e994056928197 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 08:32:02 -0600 Subject: [PATCH 25/31] fix geometry calculation by adding the new `shot_index` field --- src/mdio/segy/geometry.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index e0b5e7857..8c37da97f 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -487,11 +487,13 @@ def transform( # This might be slow and potentially could be improved with a rewrite # to prevent so many lookups + shot_index = np.empty(len(index_headers), dtype="uint8") + index_headers = rfn.append_fields(index_headers, "shot_index", shot_index) if geom_type == ShotGunGeometryType.B: for sail_line in unique_sail_lines: sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line) - index_headers["shot_point"][sail_line_idxs] = np.floor( - index_headers["shot_point"][sail_line_idxs] / max_num_guns + index_headers["shot_index"][sail_line_idxs] = np.floor( + index_headers["shot_index"][sail_line_idxs] / max_num_guns ) return index_headers From e4790dd2f8bc57877e16fcfed8d98d51dce4358b Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 08:36:36 -0600 Subject: [PATCH 26/31] lint --- src/mdio/builder/templates/seismic_3d_streamer_field.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py index 834544eab..1763861b0 100644 --- a/src/mdio/builder/templates/seismic_3d_streamer_field.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -27,10 +27,7 @@ def __init__(self, data_domain: SeismicDataDomain = "time"): self._spatial_dim_names = ("sail_line", "gun", "shot_index", "cable", "channel") self._dim_names = (*self._spatial_dim_names, self._data_domain) self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") - self._logical_coord_names = ( - "shot_point", - "orig_field_record_num", - ) # ffid + self._logical_coord_names = ("shot_point", "orig_field_record_num") # ffid self._var_chunk_shape = (1, 1, 16, 1, 32, 1024) @property From 233afefc1d2e9a6a45d4094f454d4cdbc97a79b3 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 08:42:45 -0600 Subject: [PATCH 27/31] update `shot_index` dtype to uint32 and add `calculated_dims` property --- src/mdio/builder/templates/base.py | 6 ++++++ src/mdio/builder/templates/seismic_3d_streamer_field.py | 1 + src/mdio/converters/segy.py | 1 + src/mdio/segy/geometry.py | 2 +- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py index 879f1fd6f..50f775dc7 100644 --- a/src/mdio/builder/templates/base.py +++ b/src/mdio/builder/templates/base.py @@ -39,6 +39,7 @@ def __init__(self, data_domain: SeismicDataDomain) -> None: raise ValueError(msg) self._dim_names: tuple[str, ...] = () + self._calculated_dims: tuple[str, ...] = () self._physical_coord_names: tuple[str, ...] = () self._logical_coord_names: tuple[str, ...] = () self._var_chunk_shape: tuple[int, ...] = () @@ -130,6 +131,11 @@ def dimension_names(self) -> tuple[str, ...]: """Returns the names of the dimensions.""" return copy.deepcopy(self._dim_names) + @property + def calculated_dimension_names(self) -> tuple[str, ...]: + """Returns the names of the dimensions.""" + return copy.deepcopy(self._calculated_dims) + @property def physical_coordinate_names(self) -> tuple[str, ...]: """Returns the names of the physical (world) coordinates.""" diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py index 1763861b0..c550ca7d2 100644 --- a/src/mdio/builder/templates/seismic_3d_streamer_field.py +++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py @@ -25,6 +25,7 @@ def __init__(self, data_domain: SeismicDataDomain = "time"): super().__init__(data_domain=data_domain) self._spatial_dim_names = ("sail_line", "gun", "shot_index", "cable", "channel") + self._calculated_dims = ("shot_index",) self._dim_names = (*self._spatial_dim_names, self._data_domain) self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y") self._logical_coord_names = ("shot_point", "orig_field_record_num") # ffid diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 708d4aed1..f19aacd3f 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -488,6 +488,7 @@ def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatas header_fields = {field.name for field in segy_spec.trace.header.fields} required_fields = set(mdio_template.spatial_dimension_names) | set(mdio_template.coordinate_names) + required_fields = required_fields - set(mdio_template.calculated_dimension_names) # remove to be calculated required_fields = required_fields | {"coordinate_scalar"} # ensure coordinate scalar is always present missing_fields = required_fields - header_fields diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index 8c37da97f..bd451f714 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -487,7 +487,7 @@ def transform( # This might be slow and potentially could be improved with a rewrite # to prevent so many lookups - shot_index = np.empty(len(index_headers), dtype="uint8") + shot_index = np.empty(len(index_headers), dtype="uint32") index_headers = rfn.append_fields(index_headers, "shot_index", shot_index) if geom_type == ShotGunGeometryType.B: for sail_line in unique_sail_lines: From dd99f20edbf150d6d72a8dd37be732d5dd9af37b Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 08:54:39 -0600 Subject: [PATCH 28/31] exclude `calculated_dimension_names` from `horizontal_coordinates` --- src/mdio/segy/utilities.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index f6d76bc6a..8988ab1f9 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -58,6 +58,10 @@ def get_grid_plan( # noqa: C901, PLR0913 # Keep only dimension and non-dimension coordinates excluding the vertical axis horizontal_dimensions = template.spatial_dimension_names horizontal_coordinates = horizontal_dimensions + template.coordinate_names + + # Remove any to be computed fields + horizontal_coordinates = tuple(set(horizontal_coordinates) - set(template.calculated_dimension_names)) + headers_subset = parse_headers( segy_file_kwargs=segy_file_kwargs, num_traces=segy_file_info.num_traces, From 8e3a1c9b10c460eae09d67dc3537295792a38b14 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 19:09:24 -0600 Subject: [PATCH 29/31] update test cases and geometry calculation to reflect changes in cable and shot indexing --- src/mdio/converters/segy.py | 7 ++-- src/mdio/segy/geometry.py | 34 ++++++++++--------- src/mdio/segy/utilities.py | 13 ++++++- tests/integration/conftest.py | 2 +- .../test_import_streamer_grid_overrides.py | 29 ++++++++++------ 5 files changed, 54 insertions(+), 31 deletions(-) diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index f19aacd3f..e2fd6b357 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -345,10 +345,10 @@ def _populate_coordinates( """ drop_vars_delayed = [] # Populate the dimension coordinate variables (1-D arrays) - dataset, vars_to_drop_later = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed) + dataset, drop_vars_delayed = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed) # Populate the non-dimension coordinate variables (N-dim arrays) - dataset, vars_to_drop_later = populate_non_dim_coordinates( + dataset, drop_vars_delayed = populate_non_dim_coordinates( dataset, grid, coordinates=coords, @@ -593,6 +593,9 @@ def segy_to_mdio( # noqa PLR0913 to_mdio(xr_dataset, output_path=output_path, mode="w", compute=False) # This will write the non-dimension coordinates and trace mask + # We also remove dimensions that don't have associated coordinates + unindexed_dims = [d for d in xr_dataset.dims if d not in xr_dataset.coords] + [drop_vars_delayed.remove(d) for d in unindexed_dims] meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]] to_mdio(meta_ds, output_path=output_path, mode="r+", compute=True) diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index bd451f714..259b342c0 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -149,7 +149,7 @@ def analyze_streamer_headers( return unique_cables, cable_chan_min, cable_chan_max, geom_type -def analyze_shotlines_for_guns( +def analyze_saillines_for_guns( index_headers: HeaderArray, ) -> tuple[NDArray, dict[str, list], ShotGunGeometryType]: """Check input headers for SEG-Y input to help determine geometry of shots and guns. @@ -161,27 +161,27 @@ def analyze_shotlines_for_guns( index_headers: numpy array with index headers Returns: - tuple of unique_shot_lines, unique_guns_in_shot_line, geom_type + tuple of unique_sail_lines, unique_guns_in_sail_line, geom_type """ # Find unique cable ids - unique_shot_lines = np.sort(np.unique(index_headers["shot_line"])) + unique_sail_lines = np.sort(np.unique(index_headers["sail_line"])) unique_guns = np.sort(np.unique(index_headers["gun"])) - logger.info("unique_shot_lines: %s", unique_shot_lines) + logger.info("unique_sail_lines: %s", unique_sail_lines) logger.info("unique_guns: %s", unique_guns) # Find channel min and max values for each cable - unique_guns_in_shot_line = {} + unique_guns_in_sail_line = {} geom_type = ShotGunGeometryType.B # Check shot numbers are still unique if div/num_guns - for shot_line in unique_shot_lines: - shot_line_mask = index_headers["shot_line"] == shot_line - shot_current_sl = index_headers["shot_point"][shot_line_mask] - gun_current_sl = index_headers["gun"][shot_line_mask] + for sail_line in unique_sail_lines: + sail_line_mask = index_headers["sail_line"] == sail_line + shot_current_sl = index_headers["shot_point"][sail_line_mask] + gun_current_sl = index_headers["gun"][sail_line_mask] unique_guns_sl = np.sort(np.unique(gun_current_sl)) num_guns_sl = unique_guns_sl.shape[0] - unique_guns_in_shot_line[str(shot_line)] = list(unique_guns_sl) + unique_guns_in_sail_line[str(sail_line)] = list(unique_guns_sl) for gun in unique_guns_sl: gun_mask = gun_current_sl == gun @@ -190,10 +190,10 @@ def analyze_shotlines_for_guns( mod_shots = np.floor(shots_current_sl_gun / num_guns_sl) if len(np.unique(mod_shots)) != num_shots_sl: msg = "Shot line %s has %s when using div by %s %s has %s unique mod shots." - logger.info(msg, shot_line, num_shots_sl, num_guns_sl, np.unique(mod_shots)) + logger.info(msg, sail_line, num_shots_sl, num_guns_sl, np.unique(mod_shots)) geom_type = ShotGunGeometryType.A - return unique_shot_lines, unique_guns_in_shot_line, geom_type - return unique_shot_lines, unique_guns_in_shot_line, geom_type + return unique_sail_lines, unique_guns_in_sail_line, geom_type + return unique_sail_lines, unique_guns_in_sail_line, geom_type def create_counter( @@ -475,7 +475,7 @@ def transform( """Perform the grid transform.""" self.validate(index_headers, grid_overrides) - result = analyze_shotlines_for_guns(index_headers) + result = analyze_saillines_for_guns(index_headers) unique_sail_lines, unique_guns_in_sail_line, geom_type = result logger.info("Ingesting dataset as shot type: %s", geom_type.name) @@ -487,14 +487,16 @@ def transform( # This might be slow and potentially could be improved with a rewrite # to prevent so many lookups - shot_index = np.empty(len(index_headers), dtype="uint32") - index_headers = rfn.append_fields(index_headers, "shot_index", shot_index) if geom_type == ShotGunGeometryType.B: + shot_index = np.empty(len(index_headers), dtype="uint32") + index_headers = rfn.append_fields(index_headers.base, "shot_index", shot_index) for sail_line in unique_sail_lines: sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line) index_headers["shot_index"][sail_line_idxs] = np.floor( index_headers["shot_index"][sail_line_idxs] / max_num_guns ) + # Make shot index zero-based PER sail line + index_headers["shot_index"][sail_line_idxs] -= index_headers["shot_index"][sail_line_idxs].min() return index_headers diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index 8988ab1f9..3362f0d82 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -51,6 +51,9 @@ def get_grid_plan( # noqa: C901, PLR0913 Returns: All index dimensions and chunksize or dimensions and chunksize together with header values. + + Raises: + ValueError: If computed fields are not found after header overrides. """ if grid_overrides is None: grid_overrides = {} @@ -60,7 +63,8 @@ def get_grid_plan( # noqa: C901, PLR0913 horizontal_coordinates = horizontal_dimensions + template.coordinate_names # Remove any to be computed fields - horizontal_coordinates = tuple(set(horizontal_coordinates) - set(template.calculated_dimension_names)) + computed_fields = set(template.calculated_dimension_names) + horizontal_coordinates = tuple(set(horizontal_coordinates) - computed_fields) headers_subset = parse_headers( segy_file_kwargs=segy_file_kwargs, @@ -77,6 +81,13 @@ def get_grid_plan( # noqa: C901, PLR0913 grid_overrides=grid_overrides, ) + if len(computed_fields) > 0 and not computed_fields.issubset(headers_subset.dtype.names): + err = ( + f"Required computed fields {sorted(computed_fields)} for template {template.name} " + f"not found after header overrides. Please ensure correct overrides are applied." + ) + raise ValueError(err) + dimensions = [] for dim_name in horizontal_dimensions: dim_unique = np.unique(headers_subset[dim_name]) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e3f8ca623..5fe7aed16 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -144,7 +144,7 @@ def segy_mock_4d_shots(fake_segy_tmp: Path) -> dict[StreamerShotGeometryType, Pa num_samples = 25 shots = [2, 3, 5, 6, 7, 8, 9] guns = [1, 2] - cables = [0, 101, 201, 301] + cables = [0, 3, 5, 7] receivers_per_cable = [1, 5, 7, 5] segy_paths = {} diff --git a/tests/integration/test_import_streamer_grid_overrides.py b/tests/integration/test_import_streamer_grid_overrides.py index 986d15b38..c90d8c8c8 100644 --- a/tests/integration/test_import_streamer_grid_overrides.py +++ b/tests/integration/test_import_streamer_grid_overrides.py @@ -59,7 +59,7 @@ def test_import_4d_segy( # noqa: PLR0913 # Expected values num_samples = 25 shots = [2, 3, 5, 6, 7, 8, 9] - cables = [0, 101, 201, 301] + cables = [0, 3, 5, 7] receivers_per_cable = [1, 5, 7, 5] ds = open_mdio(zarr_tmp) @@ -106,7 +106,7 @@ def test_import_4d_segy( # noqa: PLR0913 # Expected values num_samples = 25 shots = [2, 3, 5, 6, 7, 8, 9] - cables = [0, 101, 201, 301] + cables = [0, 3, 5, 7] receivers_per_cable = [1, 5, 7, 5] ds = open_mdio(zarr_tmp) @@ -156,7 +156,7 @@ def test_import_4d_segy( # noqa: PLR0913 assert "This grid is very sparse and most likely user error with indexing." in str(execinfo.value) -@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True, "AutoShotWrap": True}, None]) +@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True, "AutoShotWrap": True}, {"AutoShotWrap": True}]) @pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B]) class TestImport6D: """Test for 6D segy import with grid overrides.""" @@ -183,26 +183,33 @@ def test_import_6d_segy( # noqa: PLR0913 # Expected values num_samples = 25 - shots = [2, 3, 5, 6, 7, 8, 9] # original shot list - if grid_override is not None and "AutoShotWrap" in grid_override: - shots_new = [int(shot / 2) for shot in shots] # Updated shot index when ingesting with 2 guns - shots_set = set(shots_new) # remove duplicates - shots = list(shots_set) # Unique shot points for 6D indexed with gun - cables = [0, 101, 201, 301] + shot_points = [2, 3, 5, 6, 7, 8, 9] # original shot list, missing shot ~ 4. + + shot_index = [int(sp / 2) for sp in shot_points] # Updated shot index when ingesting with 2 guns + shot_index = np.unique(shot_index) - 1 # Unique shot point indices for 6D indexed with gun + cables = [0, 3, 5, 7] guns = [1, 2] receivers_per_cable = [1, 5, 7, 5] ds = open_mdio(zarr_tmp) xrt.assert_duckarray_equal(ds["gun"], guns) - xrt.assert_duckarray_equal(ds["shot_point"], shots) + xrt.assert_duckarray_equal(ds["shot_index"], shot_index) xrt.assert_duckarray_equal(ds["cable"], cables) - if chan_header_type == StreamerShotGeometryType.B and grid_override is None: + if chan_header_type == StreamerShotGeometryType.B and "AutoChannelWrap" not in grid_override: expected = list(range(1, np.sum(receivers_per_cable) + 1)) else: expected = list(range(1, np.amax(receivers_per_cable) + 1)) xrt.assert_duckarray_equal(ds["channel"], expected) + expected_shot_points = [ + [ + [2, 4294967295, 6, 8], # gun = 1 + [3, 5, 7, 9], # gun = 2 + ], # sail_line = 1 + ] + xrt.assert_duckarray_equal(ds["shot_point"], expected_shot_points) + times_expected = list(range(0, num_samples, 1)) xrt.assert_duckarray_equal(ds["time"], times_expected) From e9bcf40325465e3396b58438781faecac6f9c9d5 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Mon, 3 Nov 2025 19:12:28 -0600 Subject: [PATCH 30/31] header -> grid --- src/mdio/segy/utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index 3362f0d82..195a02c8a 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -53,7 +53,7 @@ def get_grid_plan( # noqa: C901, PLR0913 All index dimensions and chunksize or dimensions and chunksize together with header values. Raises: - ValueError: If computed fields are not found after header overrides. + ValueError: If computed fields are not found after grid overrides. """ if grid_overrides is None: grid_overrides = {} @@ -84,7 +84,7 @@ def get_grid_plan( # noqa: C901, PLR0913 if len(computed_fields) > 0 and not computed_fields.issubset(headers_subset.dtype.names): err = ( f"Required computed fields {sorted(computed_fields)} for template {template.name} " - f"not found after header overrides. Please ensure correct overrides are applied." + f"not found after grid overrides. Please ensure correct overrides are applied." ) raise ValueError(err) From 0d77df4717666c7b9654a82ff36cf1b435430aa0 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Thu, 6 Nov 2025 11:44:51 -0600 Subject: [PATCH 31/31] fix shot index calculation --- src/mdio/segy/geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py index 259b342c0..bdb0b81b5 100644 --- a/src/mdio/segy/geometry.py +++ b/src/mdio/segy/geometry.py @@ -493,7 +493,7 @@ def transform( for sail_line in unique_sail_lines: sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line) index_headers["shot_index"][sail_line_idxs] = np.floor( - index_headers["shot_index"][sail_line_idxs] / max_num_guns + index_headers["shot_point"][sail_line_idxs] / max_num_guns ) # Make shot index zero-based PER sail line index_headers["shot_index"][sail_line_idxs] -= index_headers["shot_index"][sail_line_idxs].min()