<a href="https://colab.research.google.com/github/Chaliantosh/datascience_cheatsheets/blob/main/Timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Downsample Scenario**
Customers visiting a super market at different timesteps.

In [None]:
import numpy as np
import pandas as pd
#pattern every minute
customerArrival = pd.date_range('18/09/2017 8:00', periods=100, freq='T')
custArrivalTs = pd.Series(np.random.randint(0, 100, len(customerArrival)), index=customerArrival)
custArrivalTs.head(10)

2017-09-18 08:00:00    79
2017-09-18 08:01:00    54
2017-09-18 08:02:00    43
2017-09-18 08:03:00    44
2017-09-18 08:04:00     6
2017-09-18 08:05:00    77
2017-09-18 08:06:00    90
2017-09-18 08:07:00    31
2017-09-18 08:08:00    25
2017-09-18 08:09:00     4
Freq: T, dtype: int64

In [None]:
#mean customer incidence every 10s
custArrivalTs.resample('10min').mean().head

<bound method NDFrame.head of 2017-09-18 08:00:00    45.3
2017-09-18 08:10:00    48.0
2017-09-18 08:20:00    60.1
2017-09-18 08:30:00    44.8
2017-09-18 08:40:00    57.3
2017-09-18 08:50:00    56.9
2017-09-18 09:00:00    40.8
2017-09-18 09:10:00    54.6
2017-09-18 09:20:00    54.4
2017-09-18 09:30:00    43.7
Freq: 10T, dtype: float64>

In [None]:
#use custom function
custArrivalTs.resample('10min').sum().head()

2017-09-18 08:00:00    453
2017-09-18 08:10:00    480
2017-09-18 08:20:00    601
2017-09-18 08:30:00    448
2017-09-18 08:40:00    573
Freq: 10T, dtype: int64

In [None]:
#maximum incidence in a given interval
custArrivalTs.resample('1h').max().head()

2017-09-18 08:00:00    99
2017-09-18 09:00:00    97
Freq: H, dtype: int64

In [None]:
#write own custom function
import random
#use custom function
custArrivalTs.resample('1h').apply(lambda m: random.choice(m)).head()

2017-09-18 08:00:00    54
2017-09-18 09:00:00    38
Freq: H, dtype: int64

In [None]:
custArrivalTs.resample('1h').apply('ohlc').head()

Unnamed: 0,open,high,low,close
2017-09-18 08:00:00,79,99,4,24
2017-09-18 09:00:00,60,97,1,22


**Upsampling**

In [None]:
sampleRng = pd.date_range('18/09/2017 8:00', periods=10, freq='H')
sampleTs = pd.Series(np.random.randint(0, 100, len(sampleRng)), index=sampleRng)
sampleTs

2017-09-18 08:00:00    98
2017-09-18 09:00:00     5
2017-09-18 10:00:00    45
2017-09-18 11:00:00     0
2017-09-18 12:00:00     1
2017-09-18 13:00:00    43
2017-09-18 14:00:00    75
2017-09-18 15:00:00    23
2017-09-18 16:00:00    12
2017-09-18 17:00:00    17
Freq: H, dtype: int64

In [None]:
sampleTs.resample('15min').mean().head(10)

2017-09-18 08:00:00    98.0
2017-09-18 08:15:00     NaN
2017-09-18 08:30:00     NaN
2017-09-18 08:45:00     NaN
2017-09-18 09:00:00     5.0
2017-09-18 09:15:00     NaN
2017-09-18 09:30:00     NaN
2017-09-18 09:45:00     NaN
2017-09-18 10:00:00    45.0
2017-09-18 10:15:00     NaN
Freq: 15T, dtype: float64

In [None]:
#NaN can be filled with forward and backward filling
sampleTS = sampleTs.resample('15min')

In [None]:
#forward filling
sampleTS.ffill().head()

2017-09-18 08:00:00    98
2017-09-18 08:15:00    98
2017-09-18 08:30:00    98
2017-09-18 08:45:00    98
2017-09-18 09:00:00     5
Freq: 15T, dtype: int64

In [None]:
sampleTs.resample('15min', fill_method='ffill',limit=2)

TypeError: ignored

In [None]:
#backward filling
sampleTS.bfill().head()

2017-09-18 08:00:00    98
2017-09-18 08:15:00     5
2017-09-18 08:30:00     5
2017-09-18 08:45:00     5
2017-09-18 09:00:00     5
Freq: 15T, dtype: int64