Skip to content
Open
1 change: 1 addition & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y

* ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 <https://github.com/PyPSA/linopy/issues/709>`__); the variable's dimension order always follows ``coords`` (`#706 <https://github.com/PyPSA/linopy/issues/706>`__); bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``.
* Pandas inputs whose index names *levels* of a stacked-``MultiIndex`` ``coords`` dimension are now projected onto that dimension: a level subset broadcasts across the others, the full set aligns element-wise. This fixes PyPSA multi-investment arithmetic (e.g. an expression over a ``(period, timestep)`` ``snapshot`` MultiIndex times a ``period``-indexed weighting). In ``add_variables`` / ``add_constraints`` the input must provide a value for every level combination of the MultiIndex or a ``ValueError`` is raised (the error lists the missing combinations). **Implicit level projections are deprecated**: they emit an ``EvolvingAPIWarning`` everywhere — in arithmetic *and* in ``add_variables`` / ``add_constraints`` — and will raise under the upcoming v1 convention. Project the input onto the dimension explicitly (select with the dimension's level values) to keep current behavior. Aligning the full level set with full coverage stays silent. Strict validation also rejects a ``MultiIndex`` input with *unnamed* levels whose combinations don't match ``coords`` (previously a silent bypass, as such inputs can't be projected by level name).
* ``LinearExpression.groupby`` now accepts a **non-dimension** coordinate as the key -- by name (``expr.groupby("period").sum()``, where ``period`` labels another dimension) or as the coordinate ``DataArray`` -- which previously raised ``ValueError: ... already exists``. Grouping by a dimension or a ``MultiIndex`` level already worked (`#750 <https://github.com/PyPSA/linopy/issues/750>`__).
* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes.
* SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 <https://github.com/PyPSA/linopy/issues/688>`__; pass ``reformulate_sos=True`` as a workaround.
* ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning.
Expand Down
86 changes: 73 additions & 13 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,22 @@ def _expr_unwrap(
logger = logging.getLogger(__name__)


def _resolve_group(group: Any, data: Dataset) -> Any:
"""
Normalize a groupby key.

Unwrap a single-element key list to the scalar key, and resolve a string
naming a coordinate to that coordinate -- so ``groupby("name")`` behaves
like ``groupby(data["name"])``, mirroring xarray. Other inputs (Series,
DataFrame, DataArray, multi-key lists) are returned unchanged.
"""
if isinstance(group, (list, tuple)) and len(group) == 1:
group = group[0]
if isinstance(group, str) and group in data.coords:
group = data[group]
return group


@dataclass
@forward_as_properties(groupby=["dims", "groups"])
class LinearExpressionGroupby:
Expand All @@ -158,17 +174,25 @@ def groupby(self) -> xarray.core.groupby.DatasetGroupBy:
xarray.core.groupby.DataArrayGroupBy
The groupby object.
"""
if isinstance(self.group, pd.DataFrame):
data = self.data
group = _resolve_group(self.group, data)

if isinstance(group, pd.DataFrame):
raise ValueError(
"Grouping by a DataFrame only supported for `sum` operation with `use_fallback=False`."
)
if isinstance(self.group, pd.Series):
group_name = self.group.name or "group"
group = DataArray(self.group, name=group_name)
else:
group = self.group # type: ignore
if isinstance(group, pd.Series):
group = DataArray(group, name=group.name or "group")

# detach an attached free coordinate (never an indexed/level coord)
if (
isinstance(group, DataArray)
and group.name in set(data.coords) - set(data.dims)
and group.name not in data.xindexes
):
data = data.drop_vars([group.name])

return self.data.groupby(group=group, **self.kwargs)
return data.groupby(group=group, **self.kwargs)

def map(
self,
Expand Down Expand Up @@ -226,9 +250,24 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
LinearExpression
The sum of the groupby object.
"""
group = _resolve_group(self.group, self.data)

# a list of coord names rides the fast path, then unstacks to one dim per key
unstack_multikey = False
if (
not use_fallback
and isinstance(group, (list, tuple))
and len(group) > 1
and all(isinstance(g, str) and g in self.data.coords for g in group)
):
coord_dims = {self.data[g].dims for g in group}
if len(coord_dims) == 1 and len(next(iter(coord_dims))) == 1:
names = list(group)
group = self.data[names].to_dataframe()[names]
unstack_multikey = True

non_fallback_types = (pd.Series, pd.DataFrame, xr.DataArray)
if isinstance(self.group, non_fallback_types) and not use_fallback:
group: pd.Series | pd.DataFrame | xr.DataArray = self.group
if isinstance(group, non_fallback_types) and not use_fallback:
if isinstance(group, pd.DataFrame):
# dataframes do not have a name, so we need to set it
final_group_name = "group"
Expand All @@ -254,10 +293,12 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
arrays = [group, group.groupby(group).cumcount()]
idx = pd.MultiIndex.from_arrays(arrays, names=[GROUP_DIM, GROUPED_TERM_DIM])
new_coords = Coordinates.from_pandas_multiindex(idx, group_dim)
coords = self.data.indexes[group_dim]
names_to_drop = [coords.name]
if isinstance(coords, pd.MultiIndex):
names_to_drop += list(coords.names)
# collapsing group_dim invalidates every coordinate aligned to it
names_to_drop = [
name
for name, coord in self.data.coords.items()
if group_dim in coord.dims
]
ds = self.data.drop_vars(names_to_drop).assign_coords(new_coords)
ds = ds.unstack(group_dim, fill_value=LinearExpression._fill_value)
ds = LinearExpression._sum(ds, dim=GROUPED_TERM_DIM)
Expand All @@ -270,6 +311,25 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
ds = ds.assign_coords(new_coords)

ds = ds.rename({GROUP_DIM: final_group_name})
if unstack_multikey:
# warn before allocating the grid when most cells would be fill
mi = ds.indexes[final_group_name].remove_unused_levels()
observed = len(mi)
grid = int(np.prod([len(level) for level in mi.levels]))
if grid > 2 * observed and grid - observed > 10_000:
warn(
f"Grouping a LinearExpression by {names} produces a dense "
f"{grid:,}-cell grid, but only {observed:,} of those "
f"combinations occur -- the {grid - observed:,} absent ones "
f"are materialised as fill values. Group by a `pd.DataFrame` "
f"of these keys instead to keep the result compact over only "
f"the observed combinations.",
UserWarning,
stacklevel=2,
)
ds = ds.unstack(
final_group_name, fill_value=LinearExpression._fill_value
)
return LinearExpression(ds, self.model)

def func(ds: Dataset) -> Dataset:
Expand Down
Loading
Loading