Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y

**Bug Fixes**

* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 <https://github.com/PyPSA/linopy/issues/709>`__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 <https://github.com/PyPSA/linopy/issues/706>`__).
* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 <https://github.com/PyPSA/linopy/issues/709>`__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 <https://github.com/PyPSA/linopy/issues/706>`__). Bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists.
* ``add_variables`` / ``add_constraints``: the same rule now applies to ``mask`` — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``.
* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes.
* SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 <https://github.com/PyPSA/linopy/issues/688>`__; pass ``reformulate_sos=True`` as a workaround.
Expand Down
99 changes: 54 additions & 45 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,11 @@ def as_dataarray(
if coords is None:
return _as_dataarray_lax(arr, coords, dims, **kwargs)

if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords):
# xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a
# coords entry passed as a tuple behaves identically to a list.
coords = [list(c) if isinstance(c, tuple) else c for c in coords]

expected = _coords_to_dict(coords, dims=dims)
if not expected:
return _as_dataarray_lax(arr, coords, dims, **kwargs)
Expand Down Expand Up @@ -478,8 +483,11 @@ def align_to_coords(
Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``,
and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for
``mask``. Raises :class:`ValueError` with a message that names ``label``
when conversion or validation fails.
when ``value`` cannot be aligned to ``coords``. Coords-parsing errors
propagate unchanged.
"""
if coords is not None:
_coords_to_dict(coords, dims=kwargs.get("dims"))
try:
da = as_dataarray(value, coords, **kwargs)
except TypeError as err:
Expand All @@ -497,21 +505,41 @@ def _coords_to_dict(
"""
Normalize coords to a dict mapping dim names to coordinate values.

For ``xarray.Coordinates`` (and ``DataArray.coords``), only entries
that are actual dimensions are kept; derived MultiIndex level coords
are dropped here and re-attached by xarray downstream. Plain mappings
are returned as-is. For sequence inputs, entries must be ``pd.Index``
(named or not) or unnamed sequences (``list`` / ``tuple`` / ``range``
/ ``np.ndarray``). A ``pd.MultiIndex`` must have ``.name`` set —
xarray requires a single dimension name for the flattened index.
Other types — notably ``xarray.DataArray`` — raise ``TypeError``
rather than being silently dropped: callers should convert via
``variable.indexes[<dim>]`` (or ``pd.Index(...)``) first.

Unnamed sequence entries (or unnamed ``pd.Index``) gain a dim name
from ``dims`` by position when ``dims`` is provided, so callers that
pass ``coords=[[1, 2, 3]], dims=["x"]`` get the same strict
enforcement as ``coords={"x": [1, 2, 3]}``.
Container forms:

- ``xarray.Coordinates`` → kept dim entries only (MultiIndex level
coords dropped).
- ``Mapping`` → returned as a shallow ``dict`` copy.
- sequence-of-entries → each entry handled per the rules below.

Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]``
is the matching entry in ``dims`` when one exists). An entry is
*unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` /
``tuple`` / ``range`` / ``ndarray``.

+---------------------------------+-----------------------+-----------+
| Entry | Naming source | Outcome |
+=================================+=======================+===========+
| ``pd.Index`` with ``.name`` | ``.name`` | accepted |
+---------------------------------+-----------------------+-----------+
| unlabeled entry | ``dims[i]`` | accepted |
+---------------------------------+-----------------------+-----------+
| unlabeled entry | — (no ``dims[i]``) | skipped |
| | | — xarray |
| | | assigns |
| | | ``dim_0`` |
| | | etc. |
+---------------------------------+-----------------------+-----------+
| ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted |
+---------------------------------+-----------------------+-----------+
| ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted |
| | | (named on |
| | | a copy) |
+---------------------------------+-----------------------+-----------+
| ``pd.MultiIndex`` w/o ``.name`` | — (no ``dims[i]``) | TypeError |
+---------------------------------+-----------------------+-----------+
| anything else (e.g. DataArray) | — | TypeError |
+---------------------------------+-----------------------+-----------+
"""
if isinstance(coords, Coordinates):
# Coordinates iterates over every coord variable, including
Expand All @@ -525,13 +553,20 @@ def _coords_to_dict(
result: dict[Hashable, Any] = {}
for i, c in enumerate(coords):
if isinstance(c, pd.MultiIndex):
if not c.name:
name = c.name or (
dim_names[i] if dim_names and i < len(dim_names) else None
)
if name is None:
raise TypeError(
"MultiIndex coords entries must have .name set so "
"xarray can use it as the dimension name. Set it via "
"`idx.name = 'my_dim'` before passing to coords."
"`idx.name = 'my_dim'`, or pass `dims=[...]` to name "
"entries by position."
)
result[c.name] = c
if c.name is None:
c = c.copy()
c.name = name
result[name] = c
elif isinstance(c, pd.Index):
name = (
c.name
Expand Down Expand Up @@ -577,32 +612,6 @@ def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | Non
return arr.to_xarray()


def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray:
"""
Broadcast a boolean mask to match the shape of labels.

Ensures that mask dimensions are a subset of labels dimensions, broadcasts
the mask accordingly, and fills any NaN values (from missing coordinates)
with False while emitting a FutureWarning.
"""
assert set(mask.dims).issubset(labels.dims), (
"Dimensions of mask not a subset of resulting labels dimensions."
)
mask = mask.broadcast_like(labels)
if mask.isnull().any():
warn(
"Mask contains coordinates not covered by the data dimensions. "
"Missing values will be filled with False (masked out). "
"In a future version, this will raise an error. "
"Use mask.reindex() or `linopy.align()` to explicitly handle missing "
"coordinates.",
FutureWarning,
stacklevel=3,
)
mask = mask.fillna(False).astype(bool)
return mask


# TODO: rename to to_pandas_dataframe
def to_dataframe(
ds: Dataset,
Expand Down
119 changes: 119 additions & 0 deletions test/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

from collections.abc import Callable
from typing import Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -560,6 +561,124 @@ def test_align_to_coords_preserves_type_errors() -> None:
align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound")


def test_align_to_coords_does_not_relabel_coords_errors() -> None:
"""Coords-side TypeError carries its own message, not the value label."""
mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"])
with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"):
align_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound")


class TestCoordsToDictRules:
"""
One test per row of the ``_coords_to_dict`` rules table.

Each test name states the rule it pins; the assertions show the
expected outcome. Together they form the executable spec of how
sequence-form ``coords`` entries are named.
"""

@staticmethod
def _parse(coords: Any, dims: Any = None) -> dict:
from linopy.common import _coords_to_dict

return _coords_to_dict(coords, dims=dims)

# -- container forms ---------------------------------------------------

def test_mapping_is_returned_as_shallow_dict_copy(self) -> None:
src = {"x": [0, 1, 2], "y": [10, 20]}
result = self._parse(src)
assert result == src
assert result is not src

def test_xarray_coordinates_keeps_only_dim_entries(self) -> None:
midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"])
coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked")
result = self._parse(coords)
assert set(result) == {"stacked"}

# -- pd.Index entries --------------------------------------------------

def test_named_pd_index_uses_its_name(self) -> None:
result = self._parse([pd.Index([0, 1, 2], name="x")])
assert set(result) == {"x"}

def test_unnamed_pd_index_with_dims_uses_dims(self) -> None:
result = self._parse([pd.Index([0, 1, 2])], dims=["x"])
assert set(result) == {"x"}

def test_unnamed_pd_index_without_dims_is_size_only(self) -> None:
# Same as a bare sequence: contributes no dim name; xarray assigns
# ``dim_0`` downstream.
assert self._parse([pd.Index([0, 1, 2])]) == {}
m = Model()
v = m.add_variables(coords=[pd.Index([0, 1, 2])])
assert v.dims == ("dim_0",)

# -- pd.MultiIndex entries --------------------------------------------

def test_named_multiindex_uses_its_name(self) -> None:
mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"])
mi.name = "multi"
result = self._parse([mi])
assert set(result) == {"multi"}

def test_unnamed_multiindex_with_dims_uses_dims(self) -> None:
mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"])
result = self._parse([mi], dims=["multi"])
assert set(result) == {"multi"}
assert result["multi"].name == "multi"
assert mi.name is None # caller's MultiIndex not mutated

def test_unnamed_multiindex_without_dims_raises(self) -> None:
mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"])
with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"):
self._parse([mi])

# -- bare sequence entries --------------------------------------------

@pytest.mark.parametrize(
"entry",
[[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])],
ids=["list", "tuple", "range", "ndarray"],
)
def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None:
result = self._parse([entry], dims=["x"])
assert set(result) == {"x"}

@pytest.mark.parametrize(
"entry",
[[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])],
ids=["list", "tuple", "range", "ndarray"],
)
def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None:
assert self._parse([entry]) == {}

@pytest.mark.parametrize(
"entry",
[[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])],
ids=["list", "tuple", "range", "ndarray"],
)
def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0(
self, entry: Any
) -> None:
m = Model()
v = m.add_variables(coords=[entry])
assert v.dims == ("dim_0",)

# -- unsupported entries ----------------------------------------------

def test_dataarray_entry_raises(self) -> None:
with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"):
self._parse([DataArray([0, 1, 2], dims=["x"])])

def test_unknown_type_entry_raises(self) -> None:
class Foo: ...

with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"):
self._parse([Foo()])


def test_best_int() -> None:
# Test for int8
assert best_int(127) == np.int8
Expand Down
5 changes: 2 additions & 3 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,10 @@ def test_positional_bound_wrong_size_raises_clear_error(
with pytest.raises(ValueError, match=r"upper bound could not be aligned"):
model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad")

def test_unnamed_coords_short_circuit(self, model: "Model") -> None:
"""Coords as a list of unnamed indexes leaves the bound unchanged."""
def test_unnamed_pd_index_is_size_only(self, model: "Model") -> None:
bound = DataArray([1, 2, 3], dims=["dim_0"])
var = model.add_variables(upper=bound, coords=[pd.Index([0, 1, 2])], name="x")
assert (var.data.upper == [1, 2, 3]).all()
assert (var.upper == [1, 2, 3]).all()

# -- Broadcasting missing dims -----------------------------------------

Expand Down