diff --git a/petab/v2/C.py b/petab/v2/C.py index fc1f6fa2..99abf343 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -145,10 +145,14 @@ OBSERVABLE_NAME = "observableName" #: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" +#: Observable placeholders column in the observable table +OBSERVABLE_PLACEHOLDERS = "observablePlaceholders" #: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" #: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" +#: Noise placeholders column in the observable table +NOISE_PLACEHOLDERS = "noisePlaceholders" #: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ diff --git a/petab/v2/core.py b/petab/v2/core.py index 1ee74ace..a847b196 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,12 +2,11 @@ from __future__ import annotations -import re from collections.abc import Sequence from enum import Enum from itertools import chain from pathlib import Path -from typing import Annotated, Literal +from typing import Annotated import numpy as np import pandas as pd @@ -192,6 +191,14 @@ class Observable(BaseModel): noise_distribution: NoiseDistribution = Field( alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) + #: Placeholder symbols for the observable formula. + observable_placeholders: list[sp.Symbol] = Field( + alias=C.OBSERVABLE_PLACEHOLDERS, default=[] + ) + #: Placeholder symbols for the noise formula. + noise_placeholders: list[sp.Symbol] = Field( + alias=C.NOISE_PLACEHOLDERS, default=[] + ) #: :meta private: model_config = ConfigDict( @@ -221,37 +228,24 @@ def _sympify(cls, v): return sympify_petab(v) - def _placeholders( - self, type_: Literal["observable", "noise"] - ) -> set[sp.Symbol]: - formula = ( - self.formula - if type_ == "observable" - else self.noise_formula - if type_ == "noise" - else None - ) - if formula is None or formula.is_number: - return set() - - if not (free_syms := formula.free_symbols): - return set() + @field_validator( + "observable_placeholders", "noise_placeholders", mode="before" + ) + @classmethod + def _sympify_id_list(cls, v): + if v is None: + return [] - # TODO: add field validator to check for 1-based consecutive numbering - t = f"{re.escape(type_)}Parameter" - o = re.escape(self.id) - pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") - return {s for s in free_syms if pattern.match(str(s))} + if isinstance(v, float) and np.isnan(v): + return [] - @property - def observable_placeholders(self) -> set[sp.Symbol]: - """Placeholder symbols for the observable formula.""" - return self._placeholders("observable") + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] - @property - def noise_placeholders(self) -> set[sp.Symbol]: - """Placeholder symbols for the noise formula.""" - return self._placeholders("noise") + v = [pid.strip() for pid in v] + return [sympify_petab(_valid_petab_id(pid)) for pid in v if pid] class ObservableTable(BaseModel): @@ -289,6 +283,12 @@ def to_df(self) -> pd.DataFrame: noise = record[C.NOISE_FORMULA] record[C.OBSERVABLE_FORMULA] = petab_math_str(obs) record[C.NOISE_FORMULA] = petab_math_str(noise) + record[C.OBSERVABLE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PLACEHOLDERS]) + ) + record[C.NOISE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PLACEHOLDERS]) + ) return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index bc7398fc..3869307f 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import shutil from contextlib import suppress from itertools import chain @@ -14,6 +15,7 @@ from pandas.io.common import get_handle, is_url from .. import v1, v2 +from ..v1.math import sympify_petab from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML @@ -351,6 +353,7 @@ def v1v2_observable_df(observable_df: pd.DataFrame) -> pd.DataFrame: Perform all updates that can be done solely on the observable table: * drop observableTransformation, update noiseDistribution + * update placeholder parameters """ df = observable_df.copy().reset_index() @@ -388,6 +391,43 @@ def update_noise_dist(row): df[v2.C.NOISE_DISTRIBUTION] = df.apply(update_noise_dist, axis=1) df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True) + def extract_placeholders(row: pd.Series, type_: str) -> str: + """Extract placeholders from observable formula.""" + if type_ == "observable": + formula = row[v1.C.OBSERVABLE_FORMULA] + elif type_ == "noise": + formula = row[v1.C.NOISE_FORMULA] + else: + raise ValueError(f"Unknown placeholder type: {type_}") + + if pd.isna(formula): + return "" + + t = f"{re.escape(type_)}Parameter" + o = re.escape(row[v1.C.OBSERVABLE_ID]) + + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + + expr = sympify_petab(formula) + # for 10+ placeholders, the current lexicographical sorting will result + # in incorrect ordering of the placeholder IDs, so that they don't + # align with the overrides in the measurement table, but who does + # that anyway? + return v2.C.PARAMETER_SEPARATOR.join( + sorted( + str(sym) + for sym in expr.free_symbols + if sym.is_Symbol and pattern.match(str(sym)) + ) + ) + + df[v2.C.OBSERVABLE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("observable",), axis=1 + ) + df[v2.C.NOISE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("noise",), axis=1 + ) + return df diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 2aba25e4..2d55f219 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -160,28 +160,22 @@ def test_observable(): assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 o = Observable(id="obs1", formula=x + y) - assert o.observable_placeholders == set() - assert o.noise_placeholders == set() + assert o.observable_placeholders == [] + assert o.noise_placeholders == [] o = Observable( id="obs1", formula="observableParameter1_obs1", noise_formula="noiseParameter1_obs1", + observable_placeholders="observableParameter1_obs1", + noise_placeholders="noiseParameter1_obs1", ) - assert o.observable_placeholders == { + assert o.observable_placeholders == [ sp.Symbol("observableParameter1_obs1", real=True), - } - assert o.noise_placeholders == { + ] + assert o.noise_placeholders == [ sp.Symbol("noiseParameter1_obs1", real=True) - } - - # TODO: this should raise an error - # (numbering is not consecutive / not starting from 1) - # TODO: clarify if observableParameter0_obs1 would be allowed - # as regular parameter - # - # with pytest.raises(ValidationError): - # Observable(id="obs1", formula="observableParameter2_obs1") + ] def test_change():