From 697ed6132a9ca68430473154d13374d86a3703b7 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 8 Oct 2025 14:44:58 +0000 Subject: [PATCH 1/6] Implement validation for all required fields --- src/mdio/converters/segy.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 425afb16c..64829e7d0 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -482,6 +482,21 @@ def determine_target_size(var_type: str) -> int: ds.variables[index].metadata.chunk_grid = chunk_grid +def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate) -> None: + """Validate that the SegySpec has all required fields in the MDIO template.""" + # TODO(BrianMichell): Implement a simple test for this + # https://github.com/TGSAI/mdio-python/issues/703 + header_fields = [field.name for field in segy_spec.trace.header.fields] + for i in range(len(mdio_template._dim_names) - 1): + if mdio_template._dim_names[i] not in header_fields: + err = f"Dimension '{mdio_template._dim_names[i]}' not found in SegySpec.trace.header.fields." + raise ValueError(err) + for coord in mdio_template._coord_names: + if coord not in header_fields: + err = f"Coordinate '{coord}' not found in SegySpec.trace.header.fields." + raise ValueError(err) + + def segy_to_mdio( # noqa PLR0913 segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate, @@ -507,6 +522,8 @@ def segy_to_mdio( # noqa PLR0913 Raises: FileExistsError: If the output location already exists and overwrite is False. """ + _validate_spec_in_template(segy_spec, mdio_template) + input_path = _normalize_path(input_path) output_path = _normalize_path(output_path) From 2f4d681931ff13d7de4510aa291650ccd8d28b69 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 8 Oct 2025 15:13:56 +0000 Subject: [PATCH 2/6] Update logic to use sets --- src/mdio/converters/segy.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 64829e7d0..dd9fd666f 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -486,15 +486,14 @@ def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatas """Validate that the SegySpec has all required fields in the MDIO template.""" # TODO(BrianMichell): Implement a simple test for this # https://github.com/TGSAI/mdio-python/issues/703 - header_fields = [field.name for field in segy_spec.trace.header.fields] - for i in range(len(mdio_template._dim_names) - 1): - if mdio_template._dim_names[i] not in header_fields: - err = f"Dimension '{mdio_template._dim_names[i]}' not found in SegySpec.trace.header.fields." - raise ValueError(err) - for coord in mdio_template._coord_names: - if coord not in header_fields: - err = f"Coordinate '{coord}' not found in SegySpec.trace.header.fields." - raise ValueError(err) + header_fields = {field.name for field in segy_spec.trace.header.fields} + + required_fields = set(mdio_template._dim_names[:-1]) | set(mdio_template._coord_names) + missing_fields = required_fields - header_fields + + if missing_fields: + err = f"Required fields {sorted(missing_fields)} not found in the provided segy_spec" + raise ValueError(err) def segy_to_mdio( # noqa PLR0913 From 21164cecebc9725b722ac5a5f53b78f22b0e1cd0 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 8 Oct 2025 18:34:20 +0000 Subject: [PATCH 3/6] Add requested unit tests --- src/mdio/converters/segy.py | 2 - tests/unit/test_segy_spec_validation.py | 86 +++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_segy_spec_validation.py diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index dd9fd666f..23f413bf5 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -484,8 +484,6 @@ def determine_target_size(var_type: str) -> int: def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate) -> None: """Validate that the SegySpec has all required fields in the MDIO template.""" - # TODO(BrianMichell): Implement a simple test for this - # https://github.com/TGSAI/mdio-python/issues/703 header_fields = {field.name for field in segy_spec.trace.header.fields} required_fields = set(mdio_template._dim_names[:-1]) | set(mdio_template._coord_names) diff --git a/tests/unit/test_segy_spec_validation.py b/tests/unit/test_segy_spec_validation.py new file mode 100644 index 000000000..2284e7079 --- /dev/null +++ b/tests/unit/test_segy_spec_validation.py @@ -0,0 +1,86 @@ +"""Tests for SEG-Y spec validation against MDIO templates.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from segy.schema import HeaderField +from segy.standards import get_segy_standard + +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain +from mdio.converters.segy import _validate_spec_in_template + + +class MockTemplate(AbstractDatasetTemplate): + """Mock template for testing validation.""" + + def __init__( + self, + data_domain: SeismicDataDomain, + dim_names: tuple[str, ...], + coord_names: tuple[str, ...], + ) -> None: + super().__init__(data_domain=data_domain) + self._dim_names = dim_names + self._coord_names = coord_names + + @property + def _name(self) -> str: + return "MockTemplate" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {} + + +class TestValidateSpecInTemplate: + """Test cases for _validate_spec_in_template function.""" + + def test_validation_passes_with_all_required_fields(self) -> None: + """Test that validation passes when all required fields are present.""" + # Template requiring standard SEG-Y fields + template = MockTemplate( + data_domain="time", + dim_names=("inline", "crossline", "time"), + coord_names=("cdp_x", "cdp_y"), + ) + + # SegySpec with all required fields + spec = get_segy_standard(1.0) + header_fields = [ + HeaderField(name="inline", byte=189, format="int32"), + HeaderField(name="crossline", byte=193, format="int32"), + HeaderField(name="cdp_x", byte=181, format="int32"), + HeaderField(name="cdp_y", byte=185, format="int32"), + ] + segy_spec = spec.customize(trace_header_fields=header_fields) + + # Should not raise any exception + _validate_spec_in_template(segy_spec, template) + + def test_validation_fails_with_missing_fields(self) -> None: + """Test that validation fails when required fields are missing.""" + # Template requiring custom fields not in standard spec + template = MockTemplate( + data_domain="time", + dim_names=("custom_dim1", "custom_dim2", "time"), + coord_names=("custom_coord_x", "custom_coord_y"), + ) + + # SegySpec with only one of the required custom fields + spec = get_segy_standard(1.0) + header_fields = [ + HeaderField(name="custom_dim1", byte=189, format="int32"), + ] + segy_spec = spec.customize(trace_header_fields=header_fields) + + # Should raise ValueError listing the missing fields + with pytest.raises(ValueError, match=r"Required fields.*not found in the provided segy_spec") as exc_info: + _validate_spec_in_template(segy_spec, template) + + error_message = str(exc_info.value) + assert "custom_dim2" in error_message + assert "custom_coord_x" in error_message + assert "custom_coord_y" in error_message + From 35bb44919de5cbe974392ed07a0da72dd3bea2ec Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 8 Oct 2025 19:00:47 +0000 Subject: [PATCH 4/6] Pre-commit --- tests/unit/test_segy_spec_validation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_segy_spec_validation.py b/tests/unit/test_segy_spec_validation.py index 2284e7079..cc6124ed6 100644 --- a/tests/unit/test_segy_spec_validation.py +++ b/tests/unit/test_segy_spec_validation.py @@ -2,6 +2,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING from typing import Any import pytest @@ -9,9 +10,11 @@ from segy.standards import get_segy_standard from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.builder.templates.types import SeismicDataDomain from mdio.converters.segy import _validate_spec_in_template +if TYPE_CHECKING: + from mdio.builder.templates.types import SeismicDataDomain + class MockTemplate(AbstractDatasetTemplate): """Mock template for testing validation.""" @@ -83,4 +86,3 @@ def test_validation_fails_with_missing_fields(self) -> None: assert "custom_dim2" in error_message assert "custom_coord_x" in error_message assert "custom_coord_y" in error_message - From 922b61456b777581f6ebe862e75de9dc33f639a3 Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 8 Oct 2025 15:43:48 -0500 Subject: [PATCH 5/6] use magic mock --- tests/unit/test_segy_spec_validation.py | 45 ++++--------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/tests/unit/test_segy_spec_validation.py b/tests/unit/test_segy_spec_validation.py index cc6124ed6..6ada079cb 100644 --- a/tests/unit/test_segy_spec_validation.py +++ b/tests/unit/test_segy_spec_validation.py @@ -2,52 +2,23 @@ from __future__ import annotations -from typing import TYPE_CHECKING -from typing import Any +from unittest.mock import MagicMock import pytest from segy.schema import HeaderField from segy.standards import get_segy_standard -from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate from mdio.converters.segy import _validate_spec_in_template -if TYPE_CHECKING: - from mdio.builder.templates.types import SeismicDataDomain - - -class MockTemplate(AbstractDatasetTemplate): - """Mock template for testing validation.""" - - def __init__( - self, - data_domain: SeismicDataDomain, - dim_names: tuple[str, ...], - coord_names: tuple[str, ...], - ) -> None: - super().__init__(data_domain=data_domain) - self._dim_names = dim_names - self._coord_names = coord_names - - @property - def _name(self) -> str: - return "MockTemplate" - - def _load_dataset_attributes(self) -> dict[str, Any]: - return {} - class TestValidateSpecInTemplate: """Test cases for _validate_spec_in_template function.""" def test_validation_passes_with_all_required_fields(self) -> None: """Test that validation passes when all required fields are present.""" - # Template requiring standard SEG-Y fields - template = MockTemplate( - data_domain="time", - dim_names=("inline", "crossline", "time"), - coord_names=("cdp_x", "cdp_y"), - ) + template = MagicMock() + template._dim_names = ("inline", "crossline", "time") + template._coord_names = ("cdp_x", "cdp_y") # SegySpec with all required fields spec = get_segy_standard(1.0) @@ -65,11 +36,9 @@ def test_validation_passes_with_all_required_fields(self) -> None: def test_validation_fails_with_missing_fields(self) -> None: """Test that validation fails when required fields are missing.""" # Template requiring custom fields not in standard spec - template = MockTemplate( - data_domain="time", - dim_names=("custom_dim1", "custom_dim2", "time"), - coord_names=("custom_coord_x", "custom_coord_y"), - ) + template = MagicMock() + template._dim_names = ("custom_dim1", "custom_dim2", "time") + template._coord_names = ("custom_coord_x", "custom_coord_y") # SegySpec with only one of the required custom fields spec = get_segy_standard(1.0) From 83d71ac728effcf29a339abcac87a0447dc8593f Mon Sep 17 00:00:00 2001 From: Altay Sansal <13684161+tasansal@users.noreply.github.com> Date: Wed, 8 Oct 2025 15:53:10 -0500 Subject: [PATCH 6/6] also print template name in error --- src/mdio/converters/segy.py | 5 ++++- tests/unit/test_segy_spec_validation.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 23f413bf5..3017473ac 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -490,7 +490,10 @@ def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatas missing_fields = required_fields - header_fields if missing_fields: - err = f"Required fields {sorted(missing_fields)} not found in the provided segy_spec" + err = ( + f"Required fields {sorted(missing_fields)} for template {mdio_template.name} " + f"not found in the provided segy_spec" + ) raise ValueError(err) diff --git a/tests/unit/test_segy_spec_validation.py b/tests/unit/test_segy_spec_validation.py index 6ada079cb..f8d65fcd9 100644 --- a/tests/unit/test_segy_spec_validation.py +++ b/tests/unit/test_segy_spec_validation.py @@ -37,6 +37,7 @@ def test_validation_fails_with_missing_fields(self) -> None: """Test that validation fails when required fields are missing.""" # Template requiring custom fields not in standard spec template = MagicMock() + template.name = "CustomTemplate" template._dim_names = ("custom_dim1", "custom_dim2", "time") template._coord_names = ("custom_coord_x", "custom_coord_y") @@ -48,10 +49,11 @@ def test_validation_fails_with_missing_fields(self) -> None: segy_spec = spec.customize(trace_header_fields=header_fields) # Should raise ValueError listing the missing fields - with pytest.raises(ValueError, match=r"Required fields.*not found in the provided segy_spec") as exc_info: + with pytest.raises(ValueError, match=r"Required fields.*not found in.*segy_spec") as exc_info: _validate_spec_in_template(segy_spec, template) error_message = str(exc_info.value) assert "custom_dim2" in error_message assert "custom_coord_x" in error_message assert "custom_coord_y" in error_message + assert "CustomTemplate" in error_message