diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 7dd6dd3c..5b524c7d 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). +* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). Bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists. * ``add_variables`` / ``add_constraints``: the same rule now applies to ``mask`` — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``. * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. diff --git a/linopy/common.py b/linopy/common.py index fff0f9ce..b0e7a75d 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -318,6 +318,11 @@ def as_dataarray( if coords is None: return _as_dataarray_lax(arr, coords, dims, **kwargs) + if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): + # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a + # coords entry passed as a tuple behaves identically to a list. + coords = [list(c) if isinstance(c, tuple) else c for c in coords] + expected = _coords_to_dict(coords, dims=dims) if not expected: return _as_dataarray_lax(arr, coords, dims, **kwargs) @@ -478,8 +483,11 @@ def align_to_coords( Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for ``mask``. Raises :class:`ValueError` with a message that names ``label`` - when conversion or validation fails. + when ``value`` cannot be aligned to ``coords``. Coords-parsing errors + propagate unchanged. """ + if coords is not None: + _coords_to_dict(coords, dims=kwargs.get("dims")) try: da = as_dataarray(value, coords, **kwargs) except TypeError as err: @@ -497,21 +505,41 @@ def _coords_to_dict( """ Normalize coords to a dict mapping dim names to coordinate values. - For ``xarray.Coordinates`` (and ``DataArray.coords``), only entries - that are actual dimensions are kept; derived MultiIndex level coords - are dropped here and re-attached by xarray downstream. Plain mappings - are returned as-is. For sequence inputs, entries must be ``pd.Index`` - (named or not) or unnamed sequences (``list`` / ``tuple`` / ``range`` - / ``np.ndarray``). A ``pd.MultiIndex`` must have ``.name`` set — - xarray requires a single dimension name for the flattened index. - Other types — notably ``xarray.DataArray`` — raise ``TypeError`` - rather than being silently dropped: callers should convert via - ``variable.indexes[]`` (or ``pd.Index(...)``) first. - - Unnamed sequence entries (or unnamed ``pd.Index``) gain a dim name - from ``dims`` by position when ``dims`` is provided, so callers that - pass ``coords=[[1, 2, 3]], dims=["x"]`` get the same strict - enforcement as ``coords={"x": [1, 2, 3]}``. + Container forms: + + - ``xarray.Coordinates`` → kept dim entries only (MultiIndex level + coords dropped). + - ``Mapping`` → returned as a shallow ``dict`` copy. + - sequence-of-entries → each entry handled per the rules below. + + Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]`` + is the matching entry in ``dims`` when one exists). An entry is + *unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` / + ``tuple`` / ``range`` / ``ndarray``. + + +---------------------------------+-----------------------+-----------+ + | Entry | Naming source | Outcome | + +=================================+=======================+===========+ + | ``pd.Index`` with ``.name`` | ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | ``dims[i]`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | — (no ``dims[i]``) | skipped | + | | | — xarray | + | | | assigns | + | | | ``dim_0`` | + | | | etc. | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted | + | | | (named on | + | | | a copy) | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | — (no ``dims[i]``) | TypeError | + +---------------------------------+-----------------------+-----------+ + | anything else (e.g. DataArray) | — | TypeError | + +---------------------------------+-----------------------+-----------+ """ if isinstance(coords, Coordinates): # Coordinates iterates over every coord variable, including @@ -525,13 +553,20 @@ def _coords_to_dict( result: dict[Hashable, Any] = {} for i, c in enumerate(coords): if isinstance(c, pd.MultiIndex): - if not c.name: + name = c.name or ( + dim_names[i] if dim_names and i < len(dim_names) else None + ) + if name is None: raise TypeError( "MultiIndex coords entries must have .name set so " "xarray can use it as the dimension name. Set it via " - "`idx.name = 'my_dim'` before passing to coords." + "`idx.name = 'my_dim'`, or pass `dims=[...]` to name " + "entries by position." ) - result[c.name] = c + if c.name is None: + c = c.copy() + c.name = name + result[name] = c elif isinstance(c, pd.Index): name = ( c.name @@ -577,32 +612,6 @@ def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | Non return arr.to_xarray() -def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: - """ - Broadcast a boolean mask to match the shape of labels. - - Ensures that mask dimensions are a subset of labels dimensions, broadcasts - the mask accordingly, and fills any NaN values (from missing coordinates) - with False while emitting a FutureWarning. - """ - assert set(mask.dims).issubset(labels.dims), ( - "Dimensions of mask not a subset of resulting labels dimensions." - ) - mask = mask.broadcast_like(labels) - if mask.isnull().any(): - warn( - "Mask contains coordinates not covered by the data dimensions. " - "Missing values will be filled with False (masked out). " - "In a future version, this will raise an error. " - "Use mask.reindex() or `linopy.align()` to explicitly handle missing " - "coordinates.", - FutureWarning, - stacklevel=3, - ) - mask = mask.fillna(False).astype(bool) - return mask - - # TODO: rename to to_pandas_dataframe def to_dataframe( ds: Dataset, diff --git a/test/test_common.py b/test/test_common.py index a9e84bf3..4cfb6e46 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -6,6 +6,7 @@ """ from collections.abc import Callable +from typing import Any import numpy as np import pandas as pd @@ -560,6 +561,124 @@ def test_align_to_coords_preserves_type_errors() -> None: align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") +def test_align_to_coords_does_not_relabel_coords_errors() -> None: + """Coords-side TypeError carries its own message, not the value label.""" + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + align_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") + + +class TestCoordsToDictRules: + """ + One test per row of the ``_coords_to_dict`` rules table. + + Each test name states the rule it pins; the assertions show the + expected outcome. Together they form the executable spec of how + sequence-form ``coords`` entries are named. + """ + + @staticmethod + def _parse(coords: Any, dims: Any = None) -> dict: + from linopy.common import _coords_to_dict + + return _coords_to_dict(coords, dims=dims) + + # -- container forms --------------------------------------------------- + + def test_mapping_is_returned_as_shallow_dict_copy(self) -> None: + src = {"x": [0, 1, 2], "y": [10, 20]} + result = self._parse(src) + assert result == src + assert result is not src + + def test_xarray_coordinates_keeps_only_dim_entries(self) -> None: + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked") + result = self._parse(coords) + assert set(result) == {"stacked"} + + # -- pd.Index entries -------------------------------------------------- + + def test_named_pd_index_uses_its_name(self) -> None: + result = self._parse([pd.Index([0, 1, 2], name="x")]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_with_dims_uses_dims(self) -> None: + result = self._parse([pd.Index([0, 1, 2])], dims=["x"]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_without_dims_is_size_only(self) -> None: + # Same as a bare sequence: contributes no dim name; xarray assigns + # ``dim_0`` downstream. + assert self._parse([pd.Index([0, 1, 2])]) == {} + m = Model() + v = m.add_variables(coords=[pd.Index([0, 1, 2])]) + assert v.dims == ("dim_0",) + + # -- pd.MultiIndex entries -------------------------------------------- + + def test_named_multiindex_uses_its_name(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + mi.name = "multi" + result = self._parse([mi]) + assert set(result) == {"multi"} + + def test_unnamed_multiindex_with_dims_uses_dims(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + result = self._parse([mi], dims=["multi"]) + assert set(result) == {"multi"} + assert result["multi"].name == "multi" + assert mi.name is None # caller's MultiIndex not mutated + + def test_unnamed_multiindex_without_dims_raises(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + self._parse([mi]) + + # -- bare sequence entries -------------------------------------------- + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None: + result = self._parse([entry], dims=["x"]) + assert set(result) == {"x"} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None: + assert self._parse([entry]) == {} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0( + self, entry: Any + ) -> None: + m = Model() + v = m.add_variables(coords=[entry]) + assert v.dims == ("dim_0",) + + # -- unsupported entries ---------------------------------------------- + + def test_dataarray_entry_raises(self) -> None: + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([DataArray([0, 1, 2], dims=["x"])]) + + def test_unknown_type_entry_raises(self) -> None: + class Foo: ... + + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([Foo()]) + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_variable.py b/test/test_variable.py index 47c25986..4b89d318 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -499,11 +499,10 @@ def test_positional_bound_wrong_size_raises_clear_error( with pytest.raises(ValueError, match=r"upper bound could not be aligned"): model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") - def test_unnamed_coords_short_circuit(self, model: "Model") -> None: - """Coords as a list of unnamed indexes leaves the bound unchanged.""" + def test_unnamed_pd_index_is_size_only(self, model: "Model") -> None: bound = DataArray([1, 2, 3], dims=["dim_0"]) var = model.add_variables(upper=bound, coords=[pd.Index([0, 1, 2])], name="x") - assert (var.data.upper == [1, 2, 3]).all() + assert (var.upper == [1, 2, 3]).all() # -- Broadcasting missing dims -----------------------------------------