Skip to content

Commit

Permalink
Allow configuring CPU usage during SEG-Y import/export (#392)
Browse files Browse the repository at this point in the history
* Update CPU count configuration for parallel operations

Changes have been made to improve the control of parallel computations in the application. The number of CPUs used by the mdio_to_segy and to_zarr methods can now be controlled by altering environmental variables MDIO__EXPORT__CPU_COUNT and MDIO__IMPORT__CPU_COUNT respectively. This allows users to optimize the program's performance based on their specific hardware setup.

* update lockfile

* update black to fix security vulnerability

* remove double newline from beginning of modules
  • Loading branch information
tasansal committed May 21, 2024
1 parent 1571a00 commit 0d9f993
Show file tree
Hide file tree
Showing 45 changed files with 417 additions and 444 deletions.
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Sphinx configuration."""

project = "MDIO"
author = "TGS"
copyright = "2023, TGS"
Expand Down
1 change: 0 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Nox sessions."""


import os
import shlex
import shutil
Expand Down
795 changes: 398 additions & 397 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ cloud = ["s3fs", "gcsfs", "adlfs"]
lossy = ["zfpy"]

[tool.poetry.group.dev.dependencies]
black = "^23.12.1"
black = "^24.4.2"
coverage = {version = "^7.4.0", extras = ["toml"]}
darglint = "^1.8.1"
flake8 = "^7.0.0"
Expand Down
1 change: 0 additions & 1 deletion src/mdio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""MDIO library."""


from importlib import metadata

from mdio.api import MDIOReader
Expand Down
1 change: 0 additions & 1 deletion src/mdio/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Command-line interface."""


from __future__ import annotations

import importlib
Expand Down
1 change: 0 additions & 1 deletion src/mdio/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""API Module."""


from .accessor import MDIOReader
from .accessor import MDIOWriter

Expand Down
1 change: 0 additions & 1 deletion src/mdio/api/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Utilities related to API functions and classes."""


from __future__ import annotations

from typing import Any
Expand Down
1 change: 0 additions & 1 deletion src/mdio/commands/copy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""MDIO Dataset copy command."""


from __future__ import annotations

from click import STRING
Expand Down
1 change: 0 additions & 1 deletion src/mdio/commands/segy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""SEG-Y Import/Export CLI Plugin."""


from typing import Any

from click import BOOL
Expand Down
1 change: 0 additions & 1 deletion src/mdio/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Constant values used across MDIO."""


import numpy as np


Expand Down
1 change: 0 additions & 1 deletion src/mdio/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""MDIO Data conversion API."""


from .mdio import mdio_to_segy
from .segy import segy_to_mdio

Expand Down
14 changes: 12 additions & 2 deletions src/mdio/converters/mdio.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Conversion from to MDIO various other formats."""


from __future__ import annotations

import os
from os import path
from tempfile import TemporaryDirectory

import numpy as np
from psutil import cpu_count
from tqdm.dask import TqdmCallback

from mdio import MDIOReader
Expand All @@ -24,6 +25,10 @@
distributed = None


default_cpus = cpu_count(logical=True)
NUM_CPUS = int(os.getenv("MDIO__EXPORT__CPU_COUNT", default_cpus))


def mdio_to_segy( # noqa: C901
mdio_path_or_buffer: str,
output_segy_path: str,
Expand Down Expand Up @@ -176,7 +181,12 @@ def mdio_to_segy( # noqa: C901
out_byteorder=out_byteorder,
file_root=tmp_dir.name,
axis=tuple(range(1, samples.ndim)),
).compute()
)

if client is not None:
flat_files = flat_files.compute()
else:
flat_files = flat_files.compute(num_workers=NUM_CPUS)

# If whole blocks are missing, remove them from the list.
missing_mask = flat_files == "missing"
Expand Down
1 change: 0 additions & 1 deletion src/mdio/converters/segy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Conversion from SEG-Y to MDIO."""


from __future__ import annotations

import logging
Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""MDIO core functionalities."""


from mdio.core.dimension import Dimension
from mdio.core.grid import Grid

Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/dimension.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Dimension (grid) abstraction and serializers."""


from __future__ import annotations

import inspect
Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Core exceptions for MDIO."""


from mdio.exceptions import MDIOError


Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/grid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Grid abstraction with serializers."""


from __future__ import annotations

import inspect
Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Indexing logic."""


import itertools
from math import ceil

Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
Current support for JSON and YAML.
"""


import json
from abc import ABC
from abc import abstractmethod
Expand Down
1 change: 0 additions & 1 deletion src/mdio/core/utils_write.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Convenience utilities for writing to Zarr."""


from typing import Any

import zarr
Expand Down
1 change: 0 additions & 1 deletion src/mdio/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Custom exceptions related to MDIO functionality."""


from __future__ import annotations


Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/_standards_common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Common elements for SEG-Y standards datasets."""


from enum import IntEnum


Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/_standards_rev0.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""SEG-Y Rev 0 standard and its definitions."""


from mdio.segy.byte_utils import ByteOrder
from mdio.segy.byte_utils import Dtype
from mdio.segy.byte_utils import OrderedType
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/_workers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Low level workers for parsing and writing SEG-Y to Zarr."""


from __future__ import annotations

from typing import Any
Expand Down
8 changes: 4 additions & 4 deletions src/mdio/segy/blocked_io.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Functions for doing blocked I/O from SEG-Y."""


from __future__ import annotations

import multiprocessing as mp
import os
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat

Expand Down Expand Up @@ -35,8 +35,8 @@
ZFPY = None
zfpy = None

# Globals
NUM_CORES = cpu_count(logical=False)
default_cpus = cpu_count(logical=True)
NUM_CPUS = int(os.getenv("MDIO__IMPORT__CPU_COUNT", default_cpus))


def to_zarr(
Expand Down Expand Up @@ -136,7 +136,7 @@ def to_zarr(
# For Unix async writes with s3fs/fsspec & multiprocessing,
# use 'spawn' instead of default 'fork' to avoid deadlocks
# on cloud stores. Slower but necessary. Default on Windows.
num_workers = min(num_chunks, NUM_CORES)
num_workers = min(num_chunks, NUM_CPUS)
context = mp.get_context("spawn")
executor = ProcessPoolExecutor(max_workers=num_workers, mp_context=context)

Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/byte_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Module for custom struct abstraction utilities."""


import sys
from dataclasses import dataclass
from enum import Enum
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/creation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""SEG-Y creation utilities."""


from __future__ import annotations

import os
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/ebcdic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
* Then reverse it and get `ASCII_TO_EBCDIC[33]` that maps back to `0x4F` which is "|".
"""


import numpy as np


Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Custom exceptions for SEG-Y."""


from mdio.exceptions import MDIOError


Expand Down
2 changes: 1 addition & 1 deletion src/mdio/segy/geometry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""SEG-Y geometry handling functions."""


from __future__ import annotations

import logging
Expand Down Expand Up @@ -78,6 +77,7 @@ class ShotGunGeometryType(Enum):
Gun 2 -> 2------------------40
"""

A = auto()
B = auto()

Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/headers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""SEG-Y header abstractions."""


from collections import abc
from dataclasses import dataclass
from dataclasses import field
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/headers_text.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Text header manipulation utilities."""


from __future__ import annotations

from typing import Sequence
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/helpers_segy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Helper functions for tinkering with SEG-Y related Zarr."""


from zarr import Group
from zarr import open_group
from zarr.errors import ContainsGroupError
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/ibm_float.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Low-level floating point conversion operations."""


import os

import numba as nb
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Parsers for sections of SEG-Y files."""


from __future__ import annotations

from concurrent.futures import ProcessPoolExecutor
Expand Down
1 change: 0 additions & 1 deletion src/mdio/segy/utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""More utilities for reading SEG-Ys."""


from __future__ import annotations

from typing import Sequence
Expand Down
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test configuration before everything runs."""


from os import path
from urllib.request import urlretrieve

Expand Down
1 change: 0 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test configuration before everything runs."""


from __future__ import annotations

import os
Expand Down
1 change: 0 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test cases for the __main__ module."""


from pathlib import Path

import pytest
Expand Down
1 change: 0 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Extra configurations for unit tests."""


from __future__ import annotations

from datetime import datetime
Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_dimension.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Dimension tests."""


import pytest

from mdio.core import Dimension
Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_segy_grid_overrides.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Check grid overrides."""


from __future__ import annotations

from typing import Any
Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_serialization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Check lower-level serialization functions."""


from inspect import Parameter
from inspect import Signature

Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_text_header.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Tests for lower level text header modules."""


import random
import string
from binascii import hexlify
Expand Down

0 comments on commit 0d9f993

Please sign in to comment.