Skip to content

Commit

Permalink
fix Rolling for multi-index and reversed index. (pandas-dev#28297)
Browse files Browse the repository at this point in the history
  • Loading branch information
leftys authored and Blake Hawkins committed Oct 29, 2019
1 parent c4f0b5c commit 4feb8ac
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 15 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Expand Up @@ -372,8 +372,9 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
Expand Down
25 changes: 14 additions & 11 deletions pandas/core/window/rolling.py
Expand Up @@ -70,7 +70,7 @@ def __init__(
center: Optional[bool] = False,
win_type: Optional[str] = None,
axis: Axis = 0,
on: Optional[str] = None,
on: Optional[Union[str, Index]] = None,
closed: Optional[str] = None,
**kwargs
):
Expand Down Expand Up @@ -126,7 +126,7 @@ def _create_blocks(self):
obj = self._selected_obj

# filter out the on from the object
if self.on is not None:
if self.on is not None and not isinstance(self.on, Index):
if obj.ndim == 2:
obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
blocks = obj._to_dict_of_blocks(copy=False).values()
Expand Down Expand Up @@ -637,10 +637,10 @@ class Window(_Window):
Provide a window type. If ``None``, all points are evenly weighted.
See the notes below for further information.
on : str, optional
For a DataFrame, a datetime-like column on which to calculate the rolling
window, rather than the DataFrame's index. Provided integer column is
ignored and excluded from result since an integer index is not used to
calculate the rolling window.
For a DataFrame, a datetime-like column or MultiIndex level on which
to calculate the rolling window, rather than the DataFrame's index.
Provided integer column is ignored and excluded from result since
an integer index is not used to calculate the rolling window.
axis : int or str, default 0
closed : str, default None
Make the interval closed on the 'right', 'left', 'both' or
Expand Down Expand Up @@ -1651,18 +1651,19 @@ def is_datetimelike(self):

@cache_readonly
def _on(self):

if self.on is None:
if self.axis == 0:
return self.obj.index
elif self.axis == 1:
return self.obj.columns
elif isinstance(self.on, Index):
return self.on
elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
return Index(self.obj[self.on])
else:
raise ValueError(
"invalid on specified as {0}, "
"must be a column (if DataFrame) "
"must be a column (of DataFrame), an Index "
"or None".format(self.on)
)

Expand Down Expand Up @@ -1706,10 +1707,12 @@ def validate(self):

def _validate_monotonic(self):
"""
Validate on is_monotonic.
Validate monotonic (increasing or decreasing).
"""
if not self._on.is_monotonic:
formatted = self.on or "index"
if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
formatted = self.on
if self.on is None:
formatted = "index"
raise ValueError("{0} must be monotonic".format(formatted))

def _validate_freq(self):
Expand Down
53 changes: 50 additions & 3 deletions pandas/tests/window/test_timeseries_window.py
@@ -1,7 +1,15 @@
import numpy as np
import pytest

from pandas import DataFrame, Index, Series, Timestamp, date_range, to_datetime
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
Timestamp,
date_range,
to_datetime,
)
import pandas.util.testing as tm

import pandas.tseries.offsets as offsets
Expand Down Expand Up @@ -105,8 +113,16 @@ def test_monotonic_on(self):
assert df.index.is_monotonic
df.rolling("2s").sum()

# non-monotonic
df.index = reversed(df.index.tolist())
def test_non_monotonic_on(self):
# GH 19248
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
)
df = df.set_index("A")
non_monotonic_index = df.index.to_list()
non_monotonic_index[0] = non_monotonic_index[3]
df.index = non_monotonic_index

assert not df.index.is_monotonic

with pytest.raises(ValueError):
Expand Down Expand Up @@ -690,3 +706,34 @@ def test_rolling_cov_offset(self):

expected2 = ss.rolling(3, min_periods=1).cov()
tm.assert_series_equal(result, expected2)

def test_rolling_on_decreasing_index(self):
# GH-19248
index = [
Timestamp("20190101 09:00:00"),
Timestamp("20190101 09:00:02"),
Timestamp("20190101 09:00:03"),
Timestamp("20190101 09:00:05"),
Timestamp("20190101 09:00:06"),
]

df = DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index))
result = df.rolling("2s").min()
expected = DataFrame(
{"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index)
)
tm.assert_frame_equal(result, expected)

def test_rolling_on_multi_index_level(self):
# GH-15584
df = DataFrame(
{"column": range(6)},
index=MultiIndex.from_product(
[date_range("20190101", periods=3), range(2)], names=["date", "seq"]
),
)
result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
expected = DataFrame(
{"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
)
tm.assert_frame_equal(result, expected)

0 comments on commit 4feb8ac

Please sign in to comment.