Skip to content

Commit

Permalink
Merge pull request #2858 from h-mayorquin/binary_iec_prefixes
Browse files Browse the repository at this point in the history
Add binary sufixes to job tools memory specification (e.g. 1GiB vs 1GB)
  • Loading branch information
samuelgarcia committed May 21, 2024
2 parents 35d0edd + 4ff55ec commit 6bb7567
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 31 deletions.
46 changes: 36 additions & 10 deletions src/spikeinterface/core/core_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,6 @@
from math import prod

import numpy as np
from tqdm import tqdm

from .job_tools import (
ensure_chunk_size,
ensure_n_jobs,
divide_segment_into_chunks,
fix_job_kwargs,
ChunkRecordingExecutor,
_shared_job_kwargs_doc,
)


def define_function_from_class(source_class, name):
Expand Down Expand Up @@ -447,6 +437,42 @@ def convert_bytes_to_str(byte_value: int) -> str:
return f"{byte_value:.2f} {suffixes[i]}"


_exponents = {
"k": 1e3,
"M": 1e6,
"G": 1e9,
"T": 1e12,
"P": 1e15, # Decimal (SI) prefixes
"Ki": 1024**1,
"Mi": 1024**2,
"Gi": 1024**3,
"Ti": 1024**4,
"Pi": 1024**5, # Binary (IEC) prefixes
}


def convert_string_to_bytes(memory_string: str) -> int:
"""
Convert a memory size string to the corresponding number of bytes.
Parameters:
mem (str): Memory size string (e.g., "1G", "512Mi", "2T").
Returns:
int: Number of bytes.
"""
if memory_string[-2:] in _exponents:
suffix = memory_string[-2:]
mem_value = memory_string[:-2]
else:
suffix = memory_string[-1]
mem_value = memory_string[:-1]

assert suffix in _exponents, f"Unknown suffix: {suffix}"
bytes_value = int(float(mem_value) * _exponents[suffix])
return bytes_value


def is_editable_mode() -> bool:
"""
Check if spikeinterface is installed in editable mode
Expand Down
24 changes: 8 additions & 16 deletions src/spikeinterface/core/job_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import platform
import os
import warnings
from spikeinterface.core.core_tools import convert_string_to_bytes

import sys
import contextlib
from tqdm.auto import tqdm

from concurrent.futures import ProcessPoolExecutor
Expand All @@ -23,7 +23,7 @@
- chunk_size: int
Number of samples per chunk
- chunk_memory: str
Memory usage for each job (e.g. "100M", "1G")
Memory usage for each job (e.g. "100M", "1G", "500MiB", "2GiB")
- total_memory: str
Total memory usage (e.g. "500M", "2G")
- chunk_duration : str or float or None
Expand Down Expand Up @@ -149,16 +149,6 @@ def divide_recording_into_chunks(recording, chunk_size):
return all_chunks


_exponents = {"k": 1e3, "M": 1e6, "G": 1e9}


def _mem_to_int(mem):
suffix = mem[-1]
assert suffix in _exponents
mem = int(float(mem[:-1]) * _exponents[suffix])
return mem


def ensure_n_jobs(recording, n_jobs=1):
if n_jobs == -1:
n_jobs = os.cpu_count()
Expand Down Expand Up @@ -206,9 +196,11 @@ def ensure_chunk_size(
chunk_size: int or None
size for one chunk per job
chunk_memory: str or None
must end with "k", "M" or "G"
must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for
binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi")
total_memory: str or None
must end with "k", "M" or "G"
must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for
binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi")
chunk_duration: None or float or str
Units are second if float.
If str then the str must contain units(e.g. "1s", "500ms")
Expand All @@ -219,14 +211,14 @@ def ensure_chunk_size(
elif chunk_memory is not None:
assert total_memory is None
# set by memory per worker size
chunk_memory = _mem_to_int(chunk_memory)
chunk_memory = convert_string_to_bytes(chunk_memory)
n_bytes = np.dtype(recording.get_dtype()).itemsize
num_channels = recording.get_num_channels()
chunk_size = int(chunk_memory / (num_channels * n_bytes))
elif total_memory is not None:
# clip by total memory size
n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs)
total_memory = _mem_to_int(total_memory)
total_memory = convert_string_to_bytes(total_memory)
n_bytes = np.dtype(recording.get_dtype()).itemsize
num_channels = recording.get_num_channels()
chunk_size = int(total_memory / (num_channels * n_bytes * n_jobs))
Expand Down
37 changes: 32 additions & 5 deletions src/spikeinterface/core/tests/test_core_tools.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import platform
import math
from multiprocessing.shared_memory import SharedMemory
from pathlib import Path
import importlib
import pytest
import numpy as np

Expand All @@ -12,10 +10,8 @@
make_paths_absolute,
check_paths_relative,
normal_pdf,
convert_string_to_bytes,
)
from spikeinterface.core.binaryrecordingextractor import BinaryRecordingExtractor
from spikeinterface.core.generate import NoiseGeneratorRecording
from spikeinterface.core.numpyextractors import NumpySorting


if hasattr(pytest, "global_test_folder"):
Expand Down Expand Up @@ -88,6 +84,37 @@ def test_path_utils_functions():
# UNC can be relative to the same UNC
assert check_paths_relative(d, r"\\host\share")

def test_convert_string_to_bytes():
# Test SI prefixes
assert convert_string_to_bytes("1k") == 1000
assert convert_string_to_bytes("1M") == 1000000
assert convert_string_to_bytes("1G") == 1000000000
assert convert_string_to_bytes("1T") == 1000000000000
assert convert_string_to_bytes("1P") == 1000000000000000
# Test IEC prefixes
assert convert_string_to_bytes("1Ki") == 1024
assert convert_string_to_bytes("1Mi") == 1048576
assert convert_string_to_bytes("1Gi") == 1073741824
assert convert_string_to_bytes("1Ti") == 1099511627776
assert convert_string_to_bytes("1Pi") == 1125899906842624
# Test mixed values
assert convert_string_to_bytes("1.5k") == 1500
assert convert_string_to_bytes("2.5M") == 2500000
assert convert_string_to_bytes("0.5G") == 500000000
assert convert_string_to_bytes("1.2T") == 1200000000000
assert convert_string_to_bytes("1.5Pi") == 1688849860263936
# Test zero values
assert convert_string_to_bytes("0k") == 0
assert convert_string_to_bytes("0Ki") == 0
# Test invalid inputs (should raise assertion error)
with pytest.raises(AssertionError) as e:
convert_string_to_bytes("1Z")
assert str(e.value) == "Unknown suffix: Z"

with pytest.raises(AssertionError) as e:
convert_string_to_bytes("1Xi")
assert str(e.value) == "Unknown suffix: Xi"


def test_normal_pdf() -> None:
mu = 4.160771
Expand Down

0 comments on commit 6bb7567

Please sign in to comment.