#### Resampling
##### when we upsample(increase rows), we need to fill or interpolate the missing values
##### when we downsample(reduce rows), we need to tell how to aggregate data

In [1]:
import numpy as np
import pandas as pd

In [2]:
# dates= pd.date_range(start='2015', periods=4,freq='QE') #default start from QE-DEC
dates= pd.date_range(start='2015-07', periods=4,freq='QE-JUL')
data = range(1,5)
quarterly = pd.Series(data=data,index=dates)
quarterly


2015-07-31    1
2015-10-31    2
2016-01-31    3
2016-04-30    4
Freq: QE-JUL, dtype: int64

In [8]:
monthly = quarterly.asfreq('ME')
monthly

2015-07-31    1.0
2015-08-31    NaN
2015-09-30    NaN
2015-10-31    2.0
2015-11-30    NaN
2015-12-31    NaN
2016-01-31    3.0
2016-02-29    NaN
2016-03-31    NaN
2016-04-30    4.0
Freq: ME, dtype: float64

In [9]:
monthly = monthly.to_frame('baseline') #to_frame() method convert a series in a dataFrame

In [10]:
#many ways to fill missing values while upsampling
monthly['bfill'] = quarterly.asfreq('ME').bfill() #backfill
monthly['ffill'] = quarterly.asfreq('ME', method='ffill') #forwardfill
monthly['value'] = quarterly.asfreq('ME', fill_value=0) #fill with a value
# monthly['ffill_new'] = monthly.baseline.ffill() #forwardfill in different way
monthly

Unnamed: 0,baseline,bfill,ffill,value
2015-07-31,1.0,1.0,1,1
2015-08-31,,2.0,1,0
2015-09-30,,2.0,1,0
2015-10-31,2.0,2.0,2,2
2015-11-30,,3.0,2,0
2015-12-31,,3.0,2,0
2016-01-31,3.0,3.0,3,3
2016-02-29,,4.0,3,0
2016-03-31,,4.0,3,0
2016-04-30,4.0,4.0,4,4


In [11]:
dates = pd.date_range('2015-07', periods = 12, freq = 'ME')
q2 = quarterly.reindex(dates)

In [12]:
q2 = q2.to_frame('baseline')
q2

Unnamed: 0,baseline
2015-07-31,1.0
2015-08-31,
2015-09-30,
2015-10-31,2.0
2015-11-30,
2015-12-31,
2016-01-31,3.0
2016-02-29,
2016-03-31,
2016-04-30,4.0


In [13]:
q2['ffill'] = q2.ffill().iloc[:,0]
q2['bfill'] = q2.bfill().iloc[:,0]
q2

Unnamed: 0,baseline,ffill,bfill
2015-07-31,1.0,1.0,1.0
2015-08-31,,1.0,2.0
2015-09-30,,1.0,2.0
2015-10-31,2.0,2.0,2.0
2015-11-30,,2.0,3.0
2015-12-31,,2.0,3.0
2016-01-31,3.0,3.0,3.0
2016-02-29,,3.0,4.0
2016-03-31,,3.0,4.0
2016-04-30,4.0,4.0,4.0


## resampling (change the frequency of time series data )
#### returns result when aggregate function is applied

In [None]:
q2.resample('QE').sum() # will return quarterly based result with sum of values in each quarter

Unnamed: 0,baseline,ffill,bfill
2015-09-30,1.0,3.0,5.0
2015-12-31,2.0,6.0,8.0
2016-03-31,3.0,9.0,11.0
2016-06-30,4.0,12.0,4.0


In [33]:
q2[['baseline']].resample('ME').interpolate()
# q2

Unnamed: 0,baseline
2015-07-31,1.0
2015-08-31,1.333333
2015-09-30,1.666667
2015-10-31,2.0
2015-11-30,2.333333
2015-12-31,2.666667
2016-01-31,3.0
2016-02-29,3.333333
2016-03-31,3.666667
2016-04-30,4.0
