# Rolling
https://pandas.pydata.org/docs/user_guide/window.html

In [None]:
## Libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series(range(5))
print(s)

0    0
1    1
2    2
3    3
4    4
dtype: int64


In [4]:
# Rolling Sum
s.rolling(window=2).sum()

0    NaN
1    1.0
2    3.0
3    5.0
4    7.0
dtype: float64

In [6]:
# Display Rolling Sum
print(s)
for window in s.rolling(window=2):
    print(window)

0    0
1    1
2    2
3    3
4    4
dtype: int64
0    0
dtype: int64
0    0
1    1
dtype: int64
1    1
2    2
dtype: int64
2    2
3    3
dtype: int64
3    3
4    4
dtype: int64


#### pandas supports 4 types of windowing operations:
Rolling window: Generic fixed or variable sliding window over the values.
Weighted window: Weighted, non-rectangular window supplied by the scipy.signal library.
Expanding window: Accumulating window over the values.
Exponentially Weighted window: Accumulating and exponentially weighted window over the values.

In [8]:
s = pd.Series(range(5), index=pd.date_range('2022-10-01', periods=5, freq='1D'))
print(s)
s.rolling(window='2D').sum()

2022-10-01    0
2022-10-02    1
2022-10-03    2
2022-10-04    3
2022-10-05    4
Freq: D, dtype: int64


2022-10-01    0.0
2022-10-02    1.0
2022-10-03    3.0
2022-10-04    5.0
2022-10-05    7.0
Freq: D, dtype: float64

In [9]:
# first group the data by the specified keys and then perform a windowing operation per group
df = pd.DataFrame({'A': ['a', 'b', 'a', 'b', 'a'], 'B': range(5)})
print(df)
df.groupby('A').expanding().sum()
#Windowing operations currently only support numeric data (integer and float) and will always return float64 values

   A  B
0  a  0
1  b  1
2  a  2
3  b  3
4  a  4


Unnamed: 0_level_0,Unnamed: 1_level_0,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,0.0
a,2,2.0
a,4,6.0
b,1,1.0
b,3,4.0


In [11]:
# Time Rolling
times = ['2022-01-01', '2022-01-03', '2022-01-04', '2022-01-05', '2022-01-29']
s = pd.Series(range(5), index=pd.DatetimeIndex(times))
print(s)

2022-01-01    0
2022-01-03    1
2022-01-04    2
2022-01-05    3
2022-01-29    4
dtype: int64


In [12]:
# Window with 2 observations
s.rolling(window=2).sum()

2022-01-01    NaN
2022-01-03    1.0
2022-01-04    3.0
2022-01-05    5.0
2022-01-29    7.0
dtype: float64

In [13]:
# Window with 2 days worth of observations
s.rolling(window='2D').sum()
#https://pandas.pydata.org/docs/reference/window.html#api-functions-rolling

2022-01-01    0.0
2022-01-03    1.0
2022-01-04    3.0
2022-01-05    5.0
2022-01-29    4.0
dtype: float64

## Centering windows
By default the labels are set to the right edge of the window, but a center keyword is available so the labels can be set at the center

In [20]:
s = pd.Series(range(10))
s
s.rolling(window=5).mean()
pd.concat([s, s.rolling(window=5).mean() ],axis=1, ignore_index=True)

Unnamed: 0,0,1
0,0,
1,1,
2,2,
3,3,
4,4,2.0
5,5,3.0
6,6,4.0
7,7,5.0
8,8,6.0
9,9,7.0


In [21]:
pd.concat([s, s.rolling(window=5, center=True).mean() ],axis=1, ignore_index=True)
#rolling mean at center of 5, 3rd position

Unnamed: 0,0,1
0,0,
1,1,
2,2,2.0
3,3,3.0
4,4,4.0
5,5,5.0
6,6,6.0
7,7,7.0
8,8,
9,9,


In [23]:
# Applying to datetime indices
df = pd.DataFrame({"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2022", periods=5, freq="1D"))
df

Unnamed: 0,A
2022-01-01,0
2022-01-02,1
2022-01-03,2
2022-01-04,3
2022-01-05,4


In [24]:
df.rolling("2D", center=False).mean()

Unnamed: 0,A
2022-01-01,0.0
2022-01-02,0.5
2022-01-03,1.5
2022-01-04,2.5
2022-01-05,3.5


In [25]:
df.rolling("2D", center=True).mean()

Unnamed: 0,A
2022-01-01,0.5
2022-01-02,1.5
2022-01-03,2.5
2022-01-04,3.5
2022-01-05,4.0


## Rolling window endpoints
he inclusion of the interval endpoints in rolling window calculations can be specified with the closed parameter:
Value           Behavior
'right' close right endpoint
'left'  close left endpoint
'both'  close both endpoints
'neither' open endpoints
For example, having the right endpoint open is useful in many problems that require that there is no contamination from present information back to past information. This allows the rolling window to compute statistics “up to that point in time”, but not including that point in time.

In [27]:
df = pd.DataFrame(  {"x": 1},  index=[
        pd.Timestamp("20220101 09:00:01"),
        pd.Timestamp("20220101 09:00:02"),
        pd.Timestamp("20220101 09:00:03"),
        pd.Timestamp("20220101 09:00:04"),
        pd.Timestamp("20220101 09:00:06"),
    ],)
df

Unnamed: 0,x
2022-01-01 09:00:01,1
2022-01-01 09:00:02,1
2022-01-01 09:00:03,1
2022-01-01 09:00:04,1
2022-01-01 09:00:06,1


In [28]:
df.rolling("2s", closed="right").x.sum()  # default

2022-01-01 09:00:01    1.0
2022-01-01 09:00:02    2.0
2022-01-01 09:00:03    2.0
2022-01-01 09:00:04    2.0
2022-01-01 09:00:06    1.0
Name: x, dtype: float64

In [29]:
df.rolling("2s", closed="right").x.sum()  # default

2022-01-01 09:00:01    1.0
2022-01-01 09:00:02    2.0
2022-01-01 09:00:03    2.0
2022-01-01 09:00:04    2.0
2022-01-01 09:00:06    1.0
Name: x, dtype: float64

In [30]:
df["right"] = df.rolling("2s", closed="right").x.sum()  # default
df["both"] = df.rolling("2s", closed="both").x.sum()
df["left"] = df.rolling("2s", closed="left").x.sum()
df["neither"] = df.rolling("2s", closed="neither").x.sum()
df

Unnamed: 0,x,right,both,left,neither
2022-01-01 09:00:01,1,1.0,1.0,,
2022-01-01 09:00:02,1,2.0,2.0,1.0,1.0
2022-01-01 09:00:03,1,2.0,3.0,2.0,1.0
2022-01-01 09:00:04,1,2.0,3.0,2.0,1.0
2022-01-01 09:00:06,1,1.0,2.0,1.0,


In [31]:
from pandas.api.indexers import FixedForwardWindowIndexer
indexer = FixedForwardWindowIndexer(window_size=2)
df.rolling(indexer, min_periods=1).sum()

Unnamed: 0,x,right,both,left,neither
2022-01-01 09:00:01,2.0,3.0,3.0,1.0,1.0
2022-01-01 09:00:02,2.0,4.0,5.0,3.0,2.0
2022-01-01 09:00:03,2.0,4.0,6.0,4.0,2.0
2022-01-01 09:00:04,2.0,3.0,5.0,3.0,1.0
2022-01-01 09:00:06,1.0,1.0,2.0,1.0,


### Links
https://pandas.pydata.org/docs/user_guide/window.html