# Rolling/Shifting Time Windows

### rolling(): which can be used for rolling window calculations, such as moving average
### shift(): which is used to shift thevalues in a time series back and forward in time.

## Data and Imports

In [2]:
import pandas as pd

In [3]:
sp500 = pd.read_pickle('sp500.pkl')

## rolling()

In [5]:
## Its easist to look at a single column for rolling and shifting
## We will look at 'Close' column

## With rolling, we can perform rolling window calculations, 
## which means as we're sliding across the dataset, 
## we can take subsets of databased on a specific window size and perform calculations on them.

sp500['Close']

Date
2016-12-27    2268.88
2016-12-28    2249.92
2016-12-29    2249.26
2016-12-30    2238.83
2017-01-03    2257.83
               ...   
2021-12-16    4668.67
2021-12-17    4620.64
2021-12-20    4568.02
2021-12-21    4649.23
2021-12-22    4696.56
Name: Close, Length: 1257, dtype: float64

In [6]:
## Lets try an example

## we will set the window parameter to 3 so that when we do calulations always three observations willbe used for those calculations.
## We can then apply a method for calulation like mean()

## Go here to find rolling functions:
## https://pandas.pydata.org/pandas-docs/stable/reference/window.html#api-functions-rolling

## This example returns thatnew series of data, which is basically the three observations moving average for the closing price.

sp500['Close'].rolling(window=3).mean()

Date
2016-12-27            NaN
2016-12-28            NaN
2016-12-29    2256.020000
2016-12-30    2246.003333
2017-01-03    2248.640000
                 ...     
2021-12-16    4670.870000
2021-12-17    4666.386667
2021-12-20    4619.110000
2021-12-21    4612.630000
2021-12-22    4637.936667
Name: Close, Length: 1257, dtype: float64

In [7]:
## Lets assign this back to a new column

sp500['Close_3_mean'] = sp500['Close'].rolling(window=3).mean()

In [8]:
## Looking at the first 5 rows notice how the first 2 rows for the mean are NaN
## This is because the rolling window didn't have 3 observations to complete the calculation

sp500[['Close','Close_3_mean']].head()

Unnamed: 0_level_0,Close,Close_3_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-27,2268.88,
2016-12-28,2249.92,
2016-12-29,2249.26,2256.02
2016-12-30,2238.83,2246.003333
2017-01-03,2257.83,2248.64


In [9]:
## We can prove the above by caluclating the mean of the first 3 dates ['2016-12-27':'2016-12-29']
## Then compare to 2016-12-29 Close_3_mean of 2256.020000

## Notice it is the same!

sp500.loc['2016-12-27':'2016-12-29','Close'].mean()

2256.02

In [10]:
## Lets verify the Close_3_mean for 2017-01-03: 2248.640000

sp500.loc['2016-12-29':'2017-01-03','Close'].mean()

2248.64

In [11]:
## What if we don't want the data for the first 3 rows to be missing? 
## Lets fix that!

sp500[['Close','Close_3_mean']].head()

Unnamed: 0_level_0,Close,Close_3_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-27,2268.88,
2016-12-28,2249.92,
2016-12-29,2249.26,2256.02
2016-12-30,2238.83,2246.003333
2017-01-03,2257.83,2248.64


In [61]:
# Use the min_periods= parameter 

## So we can see that the first 2 rows now have data! 
## The frist rows is the same as it's normal 'Close' because its the mean of itself

sp500['Close'].rolling(window=3,min_periods=1).mean()

Date
2016-12-27    2268.880000
2016-12-28    2259.400000
2016-12-29    2256.020000
2016-12-30    2246.003333
2017-01-03    2248.640000
                 ...     
2021-12-16    4670.870000
2021-12-17    4666.386667
2021-12-20    4619.110000
2021-12-21    4612.630000
2021-12-22    4637.936667
Name: Close, Length: 1257, dtype: float64

In [67]:
## Using rolling we can also write the method like this

## It takes the min_periods by default and sets them to 1 and we don't need to write the 'window=' parameter 
## BUT This works a little different! We can view that below:

sp500['Close_3d_mean'] = sp500['Close'].rolling('3D').mean()
sp500['Close_3d_mean']

Date
2016-12-27    2268.880000
2016-12-28    2259.400000
2016-12-29    2256.020000
2016-12-30    2246.003333
2017-01-03    2257.830000
                 ...     
2021-12-16    4670.870000
2021-12-17    4666.386667
2021-12-20    4568.020000
2021-12-21    4608.625000
2021-12-22    4637.936667
Name: Close_3d_mean, Length: 1257, dtype: float64

In [73]:
## Now lets print out our 3 Close columns

## Notice how Close_3d_mean has the first record = the Close, this is the mean of itself and makes sense why it matches
## For the 2nd row notice how its the mean of the Close of row 1 and 2
## Then the same for the 3rd row. Its the mean of the first 3 rows

## The 4th row however matches the 'Close' of the 4th row in the Close_3d_mean column instead of taking the last 3 rows
## See whats happening??
## The mean is taken for every 3 rows then starts over using the above rolling method

sp500[['Close','Close_3_mean','Close_3d_mean']].head(6)

Unnamed: 0_level_0,Close,Close_3_mean,Close_3d_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-27,2268.88,,2268.88
2016-12-28,2249.92,,2259.4
2016-12-29,2249.26,2256.02,2256.02
2016-12-30,2238.83,2246.003333,2246.003333
2017-01-03,2257.83,2248.64,2257.83
2017-01-04,2270.75,2255.803333,2264.29


In [75]:
## We can also do something similar using pandas Timedelta 

sp500['Close_3d_mean'] = sp500['Close'].rolling(pd.Timedelta(days=3)).mean()
sp500[['Close','Close_3_mean','Close_3d_mean']].head(6)

Unnamed: 0_level_0,Close,Close_3_mean,Close_3d_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-27,2268.88,,2268.88
2016-12-28,2249.92,,2259.4
2016-12-29,2249.26,2256.02,2256.02
2016-12-30,2238.83,2246.003333,2246.003333
2017-01-03,2257.83,2248.64,2257.83
2017-01-04,2270.75,2255.803333,2264.29


## Shifting

In [78]:
sp500['Close']

Date
2016-12-27    2268.88
2016-12-28    2249.92
2016-12-29    2249.26
2016-12-30    2238.83
2017-01-03    2257.83
               ...   
2021-12-16    4668.67
2021-12-17    4620.64
2021-12-20    4568.02
2021-12-21    4649.23
2021-12-22    4696.56
Name: Close, Length: 1257, dtype: float64

In [82]:
## By default, the shift method shifts the index by one period. 
## So if we compare this is the original series, youcan see that the index is shifted by one period.

sp500['Close'].shift()

Date
2016-12-27        NaN
2016-12-28    2268.88
2016-12-29    2249.92
2016-12-30    2249.26
2017-01-03    2238.83
               ...   
2021-12-16    4709.85
2021-12-17    4668.67
2021-12-20    4620.64
2021-12-21    4568.02
2021-12-22    4649.23
Name: Close, Length: 1257, dtype: float64

In [86]:
## Lets assign this to a new column and compare

sp500['Close_previous'] = sp500['Close'].shift()
sp500['Close_previous']

Date
2016-12-27        NaN
2016-12-28    2268.88
2016-12-29    2249.92
2016-12-30    2249.26
2017-01-03    2238.83
               ...   
2021-12-16    4709.85
2021-12-17    4668.67
2021-12-20    4620.64
2021-12-21    4568.02
2021-12-22    4649.23
Name: Close_previous, Length: 1257, dtype: float64

In [90]:
## Lets also shift by 3 periods
sp500['Close_previous_3'] = sp500['Close'].shift(periods = 3)
sp500['Close_previous_3']

Date
2016-12-27        NaN
2016-12-28        NaN
2016-12-29        NaN
2016-12-30    2268.88
2017-01-03    2249.92
               ...   
2021-12-16    4668.97
2021-12-17    4634.09
2021-12-20    4709.85
2021-12-21    4668.67
2021-12-22    4620.64
Name: Close_previous_3, Length: 1257, dtype: float64

In [94]:
## This is useful for different time series analysis when you want to compare the current data to data from a previous period.

sp500[['Close','Close_previous','Close_previous_3']].head(6)

Unnamed: 0_level_0,Close,Close_previous,Close_previous_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-27,2268.88,,
2016-12-28,2249.92,2268.88,
2016-12-29,2249.26,2249.92,
2016-12-30,2238.83,2249.26,2268.88
2017-01-03,2257.83,2238.83,2249.92
2017-01-04,2270.75,2257.83,2249.26


## Rolling and Shifting Together

In [99]:
## Lets compare the mean close and volume of the last 10 day period

sp500[['Close','Volume']].rolling('10D').mean().shift().head(10)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-27,,
2016-12-28,2268.88,1987080000.0
2016-12-29,2259.4,2189720000.0
2016-12-30,2256.02,2238603000.0
2017-01-03,2251.7225,2346678000.0
2017-01-04,2252.944,2631448000.0
2017-01-05,2255.911667,2820355000.0
2017-01-06,2257.781429,2954850000.0
2017-01-09,2258.938571,3148109000.0
2017-01-10,2268.692,3570948000.0
