Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6bb180a
Update Xarray api access (#688)
BrianMichell Sep 30, 2025
1271bb7
Reimplement disaster recovery logic
BrianMichell Sep 9, 2025
fb60ef5
Ensure getting true raw bytes for DR array
BrianMichell Sep 10, 2025
a2ee5b8
Linting
BrianMichell Sep 10, 2025
7516ab7
Add v2 issue check
BrianMichell Sep 10, 2025
ed6116e
Fix pre-commit
BrianMichell Sep 10, 2025
7dad280
Profiled disaster recovery array (#8)
BrianMichell Sep 15, 2025
cc1895e
Fix unclosed parenthesis
BrianMichell Sep 15, 2025
8fa4928
Linting
BrianMichell Sep 15, 2025
a5154c6
Test DR compatibility with all tested schemas
BrianMichell Sep 15, 2025
280ea62
Fix missing test fixture error
BrianMichell Sep 16, 2025
c589bc0
Suppress unused linting error
BrianMichell Sep 16, 2025
c242eeb
Attempt to use view
BrianMichell Sep 19, 2025
ddcadb6
Add hex-dump and MDIO output reproducer
BrianMichell Sep 19, 2025
3d48891
Fixes
BrianMichell Sep 19, 2025
f1beb92
Cleanup
BrianMichell Sep 19, 2025
d277d5b
Provide clean disaster recovery interface
BrianMichell Sep 22, 2025
c9cf0e9
Begin work on tests
BrianMichell Sep 22, 2025
9c8619a
Fix flattening issue
BrianMichell Sep 22, 2025
da49329
Push for debugging
BrianMichell Sep 22, 2025
7995912
Numpy updates
BrianMichell Sep 22, 2025
78f1db9
Testing
BrianMichell Sep 23, 2025
6a638bc
Working end-to-end examples
BrianMichell Sep 24, 2025
a8f7952
Cleanup
BrianMichell Sep 24, 2025
f7d7c2c
Bandaid fix
BrianMichell Sep 24, 2025
c6a277f
linting pass 1
BrianMichell Sep 24, 2025
5b826fe
Fix logic issue
BrianMichell Sep 24, 2025
1266be9
Use wrapper class
BrianMichell Sep 24, 2025
fd890fc
Precommit
BrianMichell Sep 24, 2025
e8027de
Remove external debugging code
BrianMichell Sep 24, 2025
97d3131
Remove debug code
BrianMichell Sep 24, 2025
aab7e69
Remove errant numpy additon to pyproject toml
BrianMichell Sep 24, 2025
72e411c
Fix uv lock to mainline
BrianMichell Sep 24, 2025
55d9464
Pre-commit
BrianMichell Sep 24, 2025
7b4020e
Removed raw byte inserts
BrianMichell Sep 29, 2025
f915e14
Use new segy API calls
BrianMichell Sep 30, 2025
937317f
Updates to get working
BrianMichell Sep 30, 2025
3862e1f
Use released version
BrianMichell Sep 30, 2025
28f645c
Linting
BrianMichell Sep 30, 2025
5a7ef81
Merge branch 'DR_with_modifications' into DR_raw_bytes_patch
BrianMichell Sep 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ dependencies = [
"psutil>=7.0.0",
"pydantic>=2.11.9",
"rich>=14.1.0",
"segy>=0.5.0",
"segy>=0.5.1.post1",
"tqdm>=4.67.1",
"universal-pathlib>=0.2.6",
"xarray>=2025.9.0",
"xarray>=2025.9.1",
"zarr>=3.1.3",
]

Expand Down
2 changes: 1 addition & 1 deletion src/mdio/api/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from upath import UPath
from xarray import Dataset as xr_Dataset
from xarray import open_zarr as xr_open_zarr
from xarray.backends.api import to_zarr as xr_to_zarr
from xarray.backends.writers import to_zarr as xr_to_zarr

from mdio.constants import ZarrFormat
from mdio.core.zarr_io import zarr_warnings_suppress_unstable_structs_v3
Expand Down
22 changes: 11 additions & 11 deletions src/mdio/segy/_disaster_recovery_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from numpy.typing import NDArray
Expand All @@ -16,19 +14,21 @@ class SegyFileTraceDataWrapper:
def __init__(self, segy_file: SegyFile, indices: int | list[int] | NDArray | slice):
self.segy_file = segy_file
self.indices = indices
self._header_pipeline = deepcopy(segy_file.accessors.header_decode_pipeline)
segy_file.accessors.header_decode_pipeline.transforms = []
self.traces = segy_file.trace[indices]

@property
def header(self) -> NDArray:
# The copy is necessary to avoid applying the pipeline to the original header.
return self._header_pipeline.apply(self.traces.header.copy())
self.idx = self.segy_file.trace.normalize_and_validate_query(self.indices)
self.traces = self.segy_file.trace.fetch(self.idx, raw=True)

self.raw_view = self.traces.view(self.segy_file.spec.trace.dtype)
self.decoded_traces = self.segy_file.accessors.trace_decode_pipeline.apply(self.raw_view.copy())

@property
def raw_header(self) -> NDArray:
return np.ascontiguousarray(self.traces.header.copy()).view("|V240")
return self.raw_view.header.view("|V240")

@property
def header(self) -> NDArray:
return self.decoded_traces.header

@property
def sample(self) -> NDArray:
return self.traces.sample
return self.decoded_traces.sample
40 changes: 38 additions & 2 deletions src/mdio/segy/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,38 @@
logger = logging.getLogger(__name__)


def _filter_raw_unspecified_fields(headers: NDArray) -> NDArray:
"""Filter out __MDIO_RAW_UNSPECIFIED_Field_* fields from headers array.

These fields are added during SEGY import to preserve raw header bytes,
but they cause dtype mismatches during export. This function removes them.

Args:
headers: Header array that may contain raw unspecified fields.

Returns:
Header array with raw unspecified fields removed.
"""
if headers.dtype.names is None:
return headers

# Find field names that don't start with __MDIO_RAW_UNSPECIFIED_
field_names = [name for name in headers.dtype.names if not name.startswith("__MDIO_RAW_UNSPECIFIED_")]

if len(field_names) == len(headers.dtype.names):
# No raw unspecified fields found, return as-is
return headers

# Create new structured array with only the non-raw fields
new_dtype = [(name, headers.dtype.fields[name][0]) for name in field_names]
filtered_headers = np.empty(headers.shape, dtype=new_dtype)

for name in field_names:
filtered_headers[name] = headers[name]

return filtered_headers


def make_segy_factory(spec: SegySpec, binary_header: dict[str, int]) -> SegyFactory:
"""Generate SEG-Y factory from MDIO metadata."""
sample_interval = binary_header["sample_interval"]
Expand Down Expand Up @@ -167,7 +199,9 @@ def serialize_to_segy_stack( # noqa: PLR0913
samples = samples[live_mask]
headers = headers[live_mask]

buffer = segy_factory.create_traces(headers, samples)
# Filter out raw unspecified fields that cause dtype mismatches
filtered_headers = _filter_raw_unspecified_fields(headers)
buffer = segy_factory.create_traces(filtered_headers, samples)

global_index = block_start[0]
record_id_str = str(global_index)
Expand Down Expand Up @@ -199,7 +233,9 @@ def serialize_to_segy_stack( # noqa: PLR0913
rec_samples = samples[rec_index][rec_live_mask]
rec_headers = headers[rec_index][rec_live_mask]

buffer = segy_factory.create_traces(rec_headers, rec_samples)
# Filter out raw unspecified fields that cause dtype mismatches
filtered_headers = _filter_raw_unspecified_fields(rec_headers)
buffer = segy_factory.create_traces(filtered_headers, rec_samples)

global_index = tuple(block_start[i] + rec_index[i] for i in range(record_ndim))
record_id_str = "/".join(map(str, global_index))
Expand Down
36 changes: 0 additions & 36 deletions tests/unit/test_disaster_recovery_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,39 +287,3 @@ def test_different_index_types(
expected_count = 1

assert wrapper.header.size == expected_count

def test_header_pipeline_preservation(self, temp_dir: Path, basic_segy_spec: SegySpec, segy_config: dict) -> None:
"""Test that the wrapper preserves the original header pipeline."""
config_name = segy_config["name"]
endianness = segy_config["endianness"]
data_format = segy_config["data_format"]

segy_path = temp_dir / f"test_pipeline_{config_name}.segy"

# Create test SEGY file
num_traces = 5
samples_per_trace = SAMPLES_PER_TRACE

spec = self.create_test_segy_file(
spec=basic_segy_spec,
num_traces=num_traces,
samples_per_trace=samples_per_trace,
output_path=segy_path,
endianness=endianness,
data_format=data_format,
)

# Load the SEGY file
segy_file = SegyFile(segy_path, spec=spec)

# Store original pipeline transforms count
original_transforms_count = len(segy_file.accessors.header_decode_pipeline.transforms)

# Create wrapper
wrapper = SegyFileTraceDataWrapper(segy_file, 0)

# Verify that the original SEGY file's pipeline was modified (transforms cleared)
assert len(segy_file.accessors.header_decode_pipeline.transforms) == 0

# Verify that the wrapper has its own pipeline with the original transforms
assert len(wrapper._header_pipeline.transforms) == original_transforms_count
22 changes: 11 additions & 11 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading