In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2000-01-01   -1.722351
2000-01-02   -0.663832
2000-01-03   -1.004214
2000-01-04   -0.127645
2000-01-05   -1.480082
2000-01-06    0.523313
2000-01-07   -0.669998
2000-01-08    0.517393
2000-01-09   -0.678856
2000-01-10    0.974326
2000-01-11   -0.769425
2000-01-12    0.283122
2000-01-13   -0.076485
2000-01-14    0.059234
2000-01-15   -0.766007
2000-01-16   -0.251947
2000-01-17    0.558856
2000-01-18   -2.756441
2000-01-19    0.442881
2000-01-20    0.299441
2000-01-21    0.312559
2000-01-22    1.220532
2000-01-23    0.295891
2000-01-24   -0.550371
2000-01-25   -0.243708
2000-01-26   -0.016657
2000-01-27    0.563048
2000-01-28    0.980443
2000-01-29    0.373743
2000-01-30   -0.996769
                ...   
2000-03-11   -1.250970
2000-03-12   -0.415215
2000-03-13    0.607593
2000-03-14   -0.432829
2000-03-15    0.504741
2000-03-16   -0.303304
2000-03-17   -0.703999
2000-03-18   -1.447294
2000-03-19   -0.141326
2000-03-20    0.606671
2000-03-21    1.098835
2000-03-22   -0.519193
2000-03-23 

In [3]:
ts.resample('M').mean()

2000-01-31   -0.160114
2000-02-29    0.227501
2000-03-31   -0.137752
2000-04-30    0.357042
Freq: M, dtype: float64

In [4]:
ts.resample('M', kind='period').mean()

2000-01   -0.160114
2000-02    0.227501
2000-03   -0.137752
2000-04    0.357042
Freq: M, dtype: float64

# Downsampling

In [5]:
rng = pd.date_range('2000-01-01',periods=12,freq='T')
ts = pd.Series(np.arange(12),index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [6]:
ts.resample('5min',closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [7]:
ts.resample('5min',closed='right',label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [10]:
ts.resample('5min',closed='right',label='right',loffset='-1s').sum() #Shift with frequecny

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

In [11]:
ts.resample('5min').ohlc() #Open-High-Low-Close

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


# Upsampling and Interpolation

In [13]:
frame = pd.DataFrame(np.random.randn(2,4),index=pd.date_range('1/1/2000',periods=2,freq='W-WED'),columns=['Colorado','Texas','New York','Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.315813,-0.040196,0.912539,-0.625073
2000-01-12,-1.187544,0.404494,-0.448598,1.042577


In [14]:
df_daily = frame.resample('D').asfreq()
df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.315813,-0.040196,0.912539,-0.625073
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-1.187544,0.404494,-0.448598,1.042577


In [15]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.315813,-0.040196,0.912539,-0.625073
2000-01-06,-0.315813,-0.040196,0.912539,-0.625073
2000-01-07,-0.315813,-0.040196,0.912539,-0.625073
2000-01-08,-0.315813,-0.040196,0.912539,-0.625073
2000-01-09,-0.315813,-0.040196,0.912539,-0.625073
2000-01-10,-0.315813,-0.040196,0.912539,-0.625073
2000-01-11,-0.315813,-0.040196,0.912539,-0.625073
2000-01-12,-1.187544,0.404494,-0.448598,1.042577


In [16]:
frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.315813,-0.040196,0.912539,-0.625073
2000-01-06,-0.315813,-0.040196,0.912539,-0.625073
2000-01-07,-0.315813,-0.040196,0.912539,-0.625073
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-1.187544,0.404494,-0.448598,1.042577


In [17]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.315813,-0.040196,0.912539,-0.625073
2000-01-13,-1.187544,0.404494,-0.448598,1.042577


# Resampling with Periods

In [20]:
frame = pd.DataFrame(np.random.randn(11,4),index=pd.date_range('1-2000','12-2000',freq='M'),columns=['Colorado','Texas','New York','Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-31,0.809386,1.463291,0.347741,-1.232052
2000-02-29,-1.194823,1.488224,-0.352916,0.595431
2000-03-31,-0.686563,-0.905814,0.037858,0.508339
2000-04-30,0.310578,0.955404,-0.47475,0.330968
2000-05-31,0.976141,0.892244,1.123201,0.596146
2000-06-30,1.635649,-0.929043,-0.570818,-1.095211
2000-07-31,-0.042448,-2.704627,0.478062,-1.083182
2000-08-31,-1.319971,-0.345206,-0.420774,1.23186
2000-09-30,0.725209,2.354011,0.304448,-0.031289
2000-10-31,-2.247764,-1.429921,0.086105,-0.875256


In [22]:
annual_frame = frame.resample('A-DEC').mean()
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-12-31,-0.04452,0.150027,0.047965,-0.068831


In [23]:
annual_frame.resample('Q-DEC').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-12-31,-0.04452,0.150027,0.047965,-0.068831


In [24]:
annual_frame.resample('Q-DEC',convention='end').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-12-31,-0.04452,0.150027,0.047965,-0.068831
