In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(list(range(len(rng))), index=rng)
ts.head()
# len(ts)

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int64

In [3]:
converted = ts.asfreq('45Min', method='pad')
converted.head()
# len(converted)

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
Freq: 45T, dtype: int64

In [4]:
converted = ts.asfreq('45Min', method='ffill')
converted.head()

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
Freq: 45T, dtype: int64

Does asfreq change the # of rows?
* Answer :Yes, from 72 to 95 rows(more rows and more data)

What do the different methods do?
method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}
* Answer: 
  *  backfill: use next cell value for current cell.
  *  ffill: use previous cell's value to fill the current cell.
  *  None : if we want no value assigned to the new rows.

Might any of these methods have pitfalls from a logical point of view?
* Answer: backfill will lead to interpolation.

What's the difference between going to a higher frequency and a lower frequency?
* Answer :Higher frequency will add more records and Lower frequency will help in shrinking the time periods. 

In [5]:
converted = ts.asfreq('90Min', method = 'bfill')

In [6]:
converted = ts.asfreq('3H')
converted[1:10]

2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 09:00:00     9
2011-01-01 12:00:00    12
2011-01-01 15:00:00    15
2011-01-01 18:00:00    18
2011-01-01 21:00:00    21
2011-01-02 00:00:00    24
2011-01-02 03:00:00    27
Freq: 3H, dtype: int64

What's different logically about going to a higher frequency vs a lower frequency? 
* Answer: Higher frequency  adds more records and Lower frequency helps in shrinking the time periods in case there is not enough data for certain periods, it will make more sense to shrink it to use lower frequency.

What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?
* Answer: We do not want to drop the data when we lower the frequency so resamling should be the way.


In [7]:
ts.resample('2H').mean()

2011-01-01 00:00:00     0.5
2011-01-01 02:00:00     2.5
2011-01-01 04:00:00     4.5
2011-01-01 06:00:00     6.5
2011-01-01 08:00:00     8.5
2011-01-01 10:00:00    10.5
2011-01-01 12:00:00    12.5
2011-01-01 14:00:00    14.5
2011-01-01 16:00:00    16.5
2011-01-01 18:00:00    18.5
2011-01-01 20:00:00    20.5
2011-01-01 22:00:00    22.5
2011-01-02 00:00:00    24.5
2011-01-02 02:00:00    26.5
2011-01-02 04:00:00    28.5
2011-01-02 06:00:00    30.5
2011-01-02 08:00:00    32.5
2011-01-02 10:00:00    34.5
2011-01-02 12:00:00    36.5
2011-01-02 14:00:00    38.5
2011-01-02 16:00:00    40.5
2011-01-02 18:00:00    42.5
2011-01-02 20:00:00    44.5
2011-01-02 22:00:00    46.5
2011-01-03 00:00:00    48.5
2011-01-03 02:00:00    50.5
2011-01-03 04:00:00    52.5
2011-01-03 06:00:00    54.5
2011-01-03 08:00:00    56.5
2011-01-03 10:00:00    58.5
2011-01-03 12:00:00    60.5
2011-01-03 14:00:00    62.5
2011-01-03 16:00:00    64.5
2011-01-03 18:00:00    66.5
2011-01-03 20:00:00    68.5
2011-01-03 22:00:00 

 What if you want to downsample and you don't want to ffill or bfill?
*  Answer: We can use method = "None"

What is the difference between .resample() and .asfreq()?
* Answer: .resample() offers aggregation options, it creates an object and we can experiment with lot more methods like mean, var, sum etc.
 whereas .as freq is limited and does not provide such options.


What are some special things you can do with .resample() you can't do with .asfreq()?
* Answer: .resample.mean(), .resample.var(), .resample.sum()



In [8]:
irreg_ts = ts[list(np.random.choice(a= list(range(len(ts))),size = 10,replace = False))]
irreg_ts = irreg_ts.sort_index()
irreg_ts

2011-01-01 01:00:00     1
2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 11:00:00    11
2011-01-02 17:00:00    41
2011-01-02 22:00:00    46
2011-01-03 05:00:00    53
2011-01-03 08:00:00    56
2011-01-03 16:00:00    64
2011-01-03 20:00:00    68
dtype: int64

In [9]:
irreg_ts.asfreq('D')
# dint work as the data is not ordered.

2011-01-01 01:00:00    1.0
2011-01-02 01:00:00    NaN
2011-01-03 01:00:00    NaN
Freq: D, dtype: float64

In [10]:
irreg_ts.asfreq('D').count()

1

In [11]:
irreg_ts.resample('D').count()

2011-01-01    4
2011-01-02    2
2011-01-03    4
Freq: D, dtype: int64

In [12]:
irreg_ts.fillna(limit = 5 , method = 'ffill')

2011-01-01 01:00:00     1
2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 11:00:00    11
2011-01-02 17:00:00    41
2011-01-02 22:00:00    46
2011-01-03 05:00:00    53
2011-01-03 08:00:00    56
2011-01-03 16:00:00    64
2011-01-03 20:00:00    68
dtype: int64

In [13]:
irreg_ts.resample('H').count()

2011-01-01 01:00:00    1
2011-01-01 02:00:00    0
2011-01-01 03:00:00    1
2011-01-01 04:00:00    0
2011-01-01 05:00:00    0
2011-01-01 06:00:00    1
2011-01-01 07:00:00    0
2011-01-01 08:00:00    0
2011-01-01 09:00:00    0
2011-01-01 10:00:00    0
2011-01-01 11:00:00    1
2011-01-01 12:00:00    0
2011-01-01 13:00:00    0
2011-01-01 14:00:00    0
2011-01-01 15:00:00    0
2011-01-01 16:00:00    0
2011-01-01 17:00:00    0
2011-01-01 18:00:00    0
2011-01-01 19:00:00    0
2011-01-01 20:00:00    0
2011-01-01 21:00:00    0
2011-01-01 22:00:00    0
2011-01-01 23:00:00    0
2011-01-02 00:00:00    0
2011-01-02 01:00:00    0
2011-01-02 02:00:00    0
2011-01-02 03:00:00    0
2011-01-02 04:00:00    0
2011-01-02 05:00:00    0
2011-01-02 06:00:00    0
                      ..
2011-01-02 15:00:00    0
2011-01-02 16:00:00    0
2011-01-02 17:00:00    1
2011-01-02 18:00:00    0
2011-01-02 19:00:00    0
2011-01-02 20:00:00    0
2011-01-02 21:00:00    0
2011-01-02 22:00:00    1
2011-01-02 23:00:00    0
