In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.head()

2011-01-01 00:00:00    0.948183
2011-01-01 01:00:00    0.936580
2011-01-01 02:00:00    0.307958
2011-01-01 03:00:00   -0.570673
2011-01-01 04:00:00    0.729669
Freq: H, dtype: float64

In [3]:
converted = ts.asfreq('45Min', method='pad')
converted.head()

2011-01-01 00:00:00    0.948183
2011-01-01 00:45:00    0.948183
2011-01-01 01:30:00    0.936580
2011-01-01 02:15:00    0.307958
2011-01-01 03:00:00   -0.570673
Freq: 45T, dtype: float64

In [4]:
# Does asfreq change the # of rows?  Yes!
print(len(ts))
print(len(converted))

72
95


In [5]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}
# ffill/path: take data from previous cell
# backfill/bfill: take data from next cell
# None: adds NaNs

In [6]:
# Might any of these methods have pitfalls from a logical point of view?
# yes - can't backward fill if don't know future yet

In [7]:
# What's the difference between going to a higher frequency and a lower frequency?
# more frequent: can interpolate missing values
# less frequent: drop data 

In [8]:
converted = ts.asfreq('90Min', method = 'bfill')
converted.head()

2011-01-01 00:00:00    0.948183
2011-01-01 01:30:00    0.307958
2011-01-01 03:00:00   -0.570673
2011-01-01 04:30:00   -1.019529
2011-01-01 06:00:00   -1.535137
Freq: 90T, dtype: float64

In [9]:
# What's different logically about going to a higher frequency vs a lower frequency? 
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?
# when switching to higher frequency, have to decide how to interpolate missing values
# when switching to lower frequency, have to decide how to combine times 

In [10]:
ts.resample('2H').mean()[1:10]

2011-01-01 02:00:00   -0.131357
2011-01-01 04:00:00   -0.144930
2011-01-01 06:00:00   -1.064015
2011-01-01 08:00:00   -0.553143
2011-01-01 10:00:00   -0.876828
2011-01-01 12:00:00   -0.742028
2011-01-01 14:00:00    1.306403
2011-01-01 16:00:00    0.876473
2011-01-01 18:00:00    0.373204
Freq: 2H, dtype: float64

In [11]:
ts.resample('D').count()

2011-01-01    24
2011-01-02    24
2011-01-03    24
Freq: D, dtype: int64

In [12]:
ts.resample('D').sum()

2011-01-01    2.225034
2011-01-02    1.834848
2011-01-03   -1.744110
Freq: D, dtype: float64

In [13]:
# What if you want to downsample and you don't want to ffill or bfill?
# change method to 'None' and it will fill in NaNs

In [14]:
# What is the difference between .resample() and .asfreq()?
# .asfreq() is more limited; .resample() produces an object that you can 
# do more with (e.g., count, mean)

In [15]:
# What are some special things you can do with .resample() you can't do with .asfreq()?
# .resample lets you do aggregating functions