Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e7a1a3d
Add template for efficient storage of pre-stack data.
markspec Sep 10, 2025
810ad68
Resolve linting issues in seismic_prestack.py
markspec Sep 10, 2025
a114ba4
Add test to cover prestack template.
markspec Sep 24, 2025
65f4562
Linting update.
markspec Sep 24, 2025
0dbd334
Fix docsting for SeismicPreStackTemplate.
markspec Sep 24, 2025
9bb161d
Adjust PreStackGathers3DTime template
dmitriyrepin Oct 16, 2025
b9a36c7
touch
dmitriyrepin Oct 16, 2025
4373330
Precomit
dmitriyrepin Oct 16, 2025
6d88a2b
Fix unit tests
dmitriyrepin Oct 17, 2025
85af57e
Fix import
BrianMichell Oct 21, 2025
c10f6ad
Update attribute name
BrianMichell Oct 21, 2025
f89de1d
Rename source files
BrianMichell Oct 21, 2025
c6b9936
Rename template name
BrianMichell Oct 21, 2025
a6a67b0
Update todo message
BrianMichell Oct 21, 2025
70ee9b2
Alignment with current unit testing standards
BrianMichell Oct 21, 2025
a7803f7
Use more correct template name
BrianMichell Oct 21, 2025
7b3eb5e
pre-commit
BrianMichell Oct 21, 2025
fe02c38
Update doc string
BrianMichell Oct 21, 2025
28ee56b
rename streamer field data template
tasansal Oct 29, 2025
adcd7c9
fix broken template tests
tasansal Oct 29, 2025
893f5d2
fix broken test
tasansal Oct 29, 2025
5e5bd5a
update docstring
tasansal Oct 29, 2025
690e41e
add whole survey to docstring
tasansal Oct 29, 2025
b28aafd
modify for new dim names
tasansal Oct 31, 2025
8a4787d
fix geometry calculation by adding the new `shot_index` field
tasansal Nov 3, 2025
e4790dd
lint
tasansal Nov 3, 2025
233afef
update `shot_index` dtype to uint32 and add `calculated_dims` property
tasansal Nov 3, 2025
dd99f20
exclude `calculated_dimension_names` from `horizontal_coordinates`
tasansal Nov 3, 2025
8e3a1c9
update test cases and geometry calculation to reflect changes in cabl…
tasansal Nov 4, 2025
e9bcf40
header -> grid
tasansal Nov 4, 2025
0d77df4
fix shot index calculation
tasansal Nov 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/mdio/builder/template_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate
from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate
from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate
from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate
from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate

if TYPE_CHECKING:
Expand Down Expand Up @@ -135,6 +136,7 @@ def _register_default_templates(self) -> None:
# Field (shot) data
self.register(Seismic2DStreamerShotGathersTemplate())
self.register(Seismic3DStreamerShotGathersTemplate())
self.register(Seismic3DStreamerFieldRecordsTemplate())

def get(self, template_name: str) -> AbstractDatasetTemplate:
"""Get an instance of a template from the registry by its name.
Expand Down
6 changes: 6 additions & 0 deletions src/mdio/builder/templates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, data_domain: SeismicDataDomain) -> None:
raise ValueError(msg)

self._dim_names: tuple[str, ...] = ()
self._calculated_dims: tuple[str, ...] = ()
self._physical_coord_names: tuple[str, ...] = ()
self._logical_coord_names: tuple[str, ...] = ()
self._var_chunk_shape: tuple[int, ...] = ()
Expand Down Expand Up @@ -130,6 +131,11 @@ def dimension_names(self) -> tuple[str, ...]:
"""Returns the names of the dimensions."""
return copy.deepcopy(self._dim_names)

@property
def calculated_dimension_names(self) -> tuple[str, ...]:
"""Returns the names of the dimensions."""
return copy.deepcopy(self._calculated_dims)

@property
def physical_coordinate_names(self) -> tuple[str, ...]:
"""Returns the names of the physical (world) coordinates."""
Expand Down
104 changes: 104 additions & 0 deletions src/mdio/builder/templates/seismic_3d_streamer_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Seismic3DStreamerFieldRecordsTemplate MDIO v1 dataset templates."""

from typing import Any

from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import SeismicDataDomain


class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate):
"""Seismic 3D streamer shot field records template.
A generalized template for streamer field records that are optimized for:
- Common-shot access
- Common-channel access
It can also store all the shot-lines of a survey in one MDIO if needed.
Args:
data_domain: The domain of the dataset.
"""

def __init__(self, data_domain: SeismicDataDomain = "time"):
super().__init__(data_domain=data_domain)

self._spatial_dim_names = ("sail_line", "gun", "shot_index", "cable", "channel")
self._calculated_dims = ("shot_index",)
self._dim_names = (*self._spatial_dim_names, self._data_domain)
self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y")
self._logical_coord_names = ("shot_point", "orig_field_record_num") # ffid
self._var_chunk_shape = (1, 1, 16, 1, 32, 1024)

@property
def _name(self) -> str:
return "StreamerFieldRecords3D"

def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyDimensionality": "3D", "ensembleType": "common_source_by_gun"}

def _add_coordinates(self) -> None:
# Add dimension coordinates
# EXCLUDE: `shot_index` since its 0-N
self._builder.add_coordinate(
"sail_line",
dimensions=("sail_line",),
data_type=ScalarType.UINT32,
)
self._builder.add_coordinate(
"gun",
dimensions=("gun",),
data_type=ScalarType.UINT8,
)
self._builder.add_coordinate(
"cable",
dimensions=("cable",),
data_type=ScalarType.UINT8,
)
self._builder.add_coordinate(
"channel",
dimensions=("channel",),
data_type=ScalarType.UINT16,
)
self._builder.add_coordinate(
self._data_domain,
dimensions=(self._data_domain,),
data_type=ScalarType.INT32,
)

# Add non-dimension coordinates
self._builder.add_coordinate(
"orig_field_record_num",
dimensions=("sail_line", "gun", "shot_index"),
data_type=ScalarType.UINT32,
)
self._builder.add_coordinate(
"shot_point",
dimensions=("sail_line", "gun", "shot_index"),
data_type=ScalarType.UINT32,
)
self._builder.add_coordinate(
"source_coord_x",
dimensions=("sail_line", "gun", "shot_index"),
data_type=ScalarType.FLOAT64,
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")),
)
self._builder.add_coordinate(
"source_coord_y",
dimensions=("sail_line", "gun", "shot_index"),
data_type=ScalarType.FLOAT64,
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")),
)
self._builder.add_coordinate(
"group_coord_x",
dimensions=("sail_line", "gun", "shot_index", "cable", "channel"),
data_type=ScalarType.FLOAT64,
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")),
)
self._builder.add_coordinate(
"group_coord_y",
dimensions=("sail_line", "gun", "shot_index", "cable", "channel"),
data_type=ScalarType.FLOAT64,
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")),
)
8 changes: 6 additions & 2 deletions src/mdio/converters/segy.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,10 @@ def _populate_coordinates(
"""
drop_vars_delayed = []
# Populate the dimension coordinate variables (1-D arrays)
dataset, vars_to_drop_later = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed)
dataset, drop_vars_delayed = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed)

# Populate the non-dimension coordinate variables (N-dim arrays)
dataset, vars_to_drop_later = populate_non_dim_coordinates(
dataset, drop_vars_delayed = populate_non_dim_coordinates(
dataset,
grid,
coordinates=coords,
Expand Down Expand Up @@ -488,6 +488,7 @@ def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatas
header_fields = {field.name for field in segy_spec.trace.header.fields}

required_fields = set(mdio_template.spatial_dimension_names) | set(mdio_template.coordinate_names)
required_fields = required_fields - set(mdio_template.calculated_dimension_names) # remove to be calculated
required_fields = required_fields | {"coordinate_scalar"} # ensure coordinate scalar is always present
missing_fields = required_fields - header_fields

Expand Down Expand Up @@ -592,6 +593,9 @@ def segy_to_mdio( # noqa PLR0913
to_mdio(xr_dataset, output_path=output_path, mode="w", compute=False)

# This will write the non-dimension coordinates and trace mask
# We also remove dimensions that don't have associated coordinates
unindexed_dims = [d for d in xr_dataset.dims if d not in xr_dataset.coords]
[drop_vars_delayed.remove(d) for d in unindexed_dims]
meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]]
to_mdio(meta_ds, output_path=output_path, mode="r+", compute=True)

Expand Down
50 changes: 27 additions & 23 deletions src/mdio/segy/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def analyze_streamer_headers(
return unique_cables, cable_chan_min, cable_chan_max, geom_type


def analyze_shotlines_for_guns(
def analyze_saillines_for_guns(
index_headers: HeaderArray,
) -> tuple[NDArray, dict[str, list], ShotGunGeometryType]:
"""Check input headers for SEG-Y input to help determine geometry of shots and guns.
Expand All @@ -161,27 +161,27 @@ def analyze_shotlines_for_guns(
index_headers: numpy array with index headers

Returns:
tuple of unique_shot_lines, unique_guns_in_shot_line, geom_type
tuple of unique_sail_lines, unique_guns_in_sail_line, geom_type
"""
# Find unique cable ids
unique_shot_lines = np.sort(np.unique(index_headers["shot_line"]))
unique_sail_lines = np.sort(np.unique(index_headers["sail_line"]))
unique_guns = np.sort(np.unique(index_headers["gun"]))
logger.info("unique_shot_lines: %s", unique_shot_lines)
logger.info("unique_sail_lines: %s", unique_sail_lines)
logger.info("unique_guns: %s", unique_guns)

# Find channel min and max values for each cable
unique_guns_in_shot_line = {}
unique_guns_in_sail_line = {}

geom_type = ShotGunGeometryType.B
# Check shot numbers are still unique if div/num_guns
for shot_line in unique_shot_lines:
shot_line_mask = index_headers["shot_line"] == shot_line
shot_current_sl = index_headers["shot_point"][shot_line_mask]
gun_current_sl = index_headers["gun"][shot_line_mask]
for sail_line in unique_sail_lines:
sail_line_mask = index_headers["sail_line"] == sail_line
shot_current_sl = index_headers["shot_point"][sail_line_mask]
gun_current_sl = index_headers["gun"][sail_line_mask]

unique_guns_sl = np.sort(np.unique(gun_current_sl))
num_guns_sl = unique_guns_sl.shape[0]
unique_guns_in_shot_line[str(shot_line)] = list(unique_guns_sl)
unique_guns_in_sail_line[str(sail_line)] = list(unique_guns_sl)

for gun in unique_guns_sl:
gun_mask = gun_current_sl == gun
Expand All @@ -190,10 +190,10 @@ def analyze_shotlines_for_guns(
mod_shots = np.floor(shots_current_sl_gun / num_guns_sl)
if len(np.unique(mod_shots)) != num_shots_sl:
msg = "Shot line %s has %s when using div by %s %s has %s unique mod shots."
logger.info(msg, shot_line, num_shots_sl, num_guns_sl, np.unique(mod_shots))
logger.info(msg, sail_line, num_shots_sl, num_guns_sl, np.unique(mod_shots))
geom_type = ShotGunGeometryType.A
return unique_shot_lines, unique_guns_in_shot_line, geom_type
return unique_shot_lines, unique_guns_in_shot_line, geom_type
return unique_sail_lines, unique_guns_in_sail_line, geom_type
return unique_sail_lines, unique_guns_in_sail_line, geom_type


def create_counter(
Expand Down Expand Up @@ -459,7 +459,7 @@ def transform(
class AutoShotWrap(GridOverrideCommand):
"""Automatically determine ShotGun acquisition type."""

required_keys = {"shot_line", "gun", "shot_point", "cable", "channel"}
required_keys = {"sail_line", "gun", "shot_point", "cable", "channel"}
required_parameters = None

def validate(self, index_headers: HeaderArray, grid_overrides: dict[str, bool | int]) -> None:
Expand All @@ -475,24 +475,28 @@ def transform(
"""Perform the grid transform."""
self.validate(index_headers, grid_overrides)

result = analyze_shotlines_for_guns(index_headers)
unique_shot_lines, unique_guns_in_shot_line, geom_type = result
result = analyze_saillines_for_guns(index_headers)
unique_sail_lines, unique_guns_in_sail_line, geom_type = result
logger.info("Ingesting dataset as shot type: %s", geom_type.name)

max_num_guns = 1
for shot_line in unique_shot_lines:
logger.info("shot_line: %s has guns: %s", shot_line, unique_guns_in_shot_line[str(shot_line)])
num_guns = len(unique_guns_in_shot_line[str(shot_line)])
for sail_line in unique_sail_lines:
logger.info("sail_line: %s has guns: %s", sail_line, unique_guns_in_sail_line[str(sail_line)])
num_guns = len(unique_guns_in_sail_line[str(sail_line)])
max_num_guns = max(num_guns, max_num_guns)

# This might be slow and potentially could be improved with a rewrite
# to prevent so many lookups
if geom_type == ShotGunGeometryType.B:
for shot_line in unique_shot_lines:
shot_line_idxs = np.where(index_headers["shot_line"][:] == shot_line)
index_headers["shot_point"][shot_line_idxs] = np.floor(
index_headers["shot_point"][shot_line_idxs] / max_num_guns
shot_index = np.empty(len(index_headers), dtype="uint32")
index_headers = rfn.append_fields(index_headers.base, "shot_index", shot_index)
for sail_line in unique_sail_lines:
sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line)
index_headers["shot_index"][sail_line_idxs] = np.floor(
index_headers["shot_point"][sail_line_idxs] / max_num_guns
)
# Make shot index zero-based PER sail line
index_headers["shot_index"][sail_line_idxs] -= index_headers["shot_index"][sail_line_idxs].min()
return index_headers


Expand Down
15 changes: 15 additions & 0 deletions src/mdio/segy/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,21 @@ def get_grid_plan( # noqa: C901, PLR0913

Returns:
All index dimensions and chunksize or dimensions and chunksize together with header values.

Raises:
ValueError: If computed fields are not found after grid overrides.
"""
if grid_overrides is None:
grid_overrides = {}

# Keep only dimension and non-dimension coordinates excluding the vertical axis
horizontal_dimensions = template.spatial_dimension_names
horizontal_coordinates = horizontal_dimensions + template.coordinate_names

# Remove any to be computed fields
computed_fields = set(template.calculated_dimension_names)
horizontal_coordinates = tuple(set(horizontal_coordinates) - computed_fields)

headers_subset = parse_headers(
segy_file_kwargs=segy_file_kwargs,
num_traces=segy_file_info.num_traces,
Expand All @@ -73,6 +81,13 @@ def get_grid_plan( # noqa: C901, PLR0913
grid_overrides=grid_overrides,
)

if len(computed_fields) > 0 and not computed_fields.issubset(headers_subset.dtype.names):
err = (
f"Required computed fields {sorted(computed_fields)} for template {template.name} "
f"not found after grid overrides. Please ensure correct overrides are applied."
)
raise ValueError(err)

dimensions = []
for dim_name in horizontal_dimensions:
dim_unique = np.unique(headers_subset[dim_name])
Expand Down
14 changes: 7 additions & 7 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
def get_segy_mock_4d_spec() -> SegySpec:
"""Create a mock 4D SEG-Y specification."""
trace_header_fields = [
HeaderField(name="field_rec_no", byte=9, format="int32"),
HeaderField(name="orig_field_record_num", byte=9, format="int32"),
HeaderField(name="channel", byte=13, format="int32"),
HeaderField(name="shot_point", byte=17, format="int32"),
HeaderField(name="offset", byte=37, format="int32"),
HeaderField(name="samples_per_trace", byte=115, format="int16"),
HeaderField(name="sample_interval", byte=117, format="int16"),
HeaderField(name="shot_line", byte=133, format="int16"),
HeaderField(name="sail_line", byte=133, format="int16"),
HeaderField(name="cable", byte=137, format="int16"),
HeaderField(name="gun", byte=171, format="int16"),
HeaderField(name="coordinate_scalar", byte=71, format="int16"),
Expand Down Expand Up @@ -111,15 +111,15 @@ def create_segy_mock_4d( # noqa: PLR0913
gun = gun_headers[trc_idx]
cable = cable_headers[trc_idx]
channel = channel_headers[trc_idx]
shot_line = 1
sail_line = 1
offset = 0

if index_receivers is False:
channel, gun, shot_line = 0, 0, 0
channel, gun, sail_line = 0, 0, 0

# Assign dimension coordinate fields with calculated mock data
header_fields = ["field_rec_no", "channel", "shot_point", "offset", "shot_line", "cable", "gun"]
headers[header_fields][trc_idx] = (shot, channel, shot, offset, shot_line, cable, gun)
header_fields = ["orig_field_record_num", "channel", "shot_point", "offset", "sail_line", "cable", "gun"]
headers[header_fields][trc_idx] = (shot, channel, shot, offset, sail_line, cable, gun)

# Assign coordinate fields with mock data
x = start_x + step_x * trc_shot_idx
Expand All @@ -144,7 +144,7 @@ def segy_mock_4d_shots(fake_segy_tmp: Path) -> dict[StreamerShotGeometryType, Pa
num_samples = 25
shots = [2, 3, 5, 6, 7, 8, 9]
guns = [1, 2]
cables = [0, 101, 201, 301]
cables = [0, 3, 5, 7]
receivers_per_cable = [1, 5, 7, 5]

segy_paths = {}
Expand Down
Loading