In [1]:
import pandas as pd
import numpy as np

In [2]:
# Resampling : onefreq to other
# like groupby 1st aggregate then func
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2000-01-01   -0.899645
2000-01-02   -1.287196
2000-01-03    0.126700
2000-01-04    1.318200
2000-01-05    0.399285
                ...   
2000-04-05   -0.432357
2000-04-06   -0.271998
2000-04-07   -1.966834
2000-04-08    0.733056
2000-04-09    1.555111
Freq: D, Length: 100, dtype: float64

In [3]:
ts.resample('M').count()

2000-01-31    31
2000-02-29    29
2000-03-31    31
2000-04-30     9
Freq: M, dtype: int64

In [13]:
ts.resample('M',label='right').mean()

2000-01-31   -0.090057
2000-02-29    0.020278
2000-03-31    0.041598
2000-04-30   -0.265273
Freq: M, dtype: float64

In [9]:
ts.resample('M', kind='period').std()


2000-01    0.751249
2000-02    0.957532
2000-03    1.074748
2000-04    1.130377
Freq: M, dtype: float64

### Downsampling

In [14]:
# Vimp label(left default) and closed(left def ) args
rng = pd.date_range("2000-01-01" , periods=12, freq="T")
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [15]:
# Downsample to 5min freq
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [16]:
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [18]:
# shift res index with loffset
# not working
# ts.resample('5min', closed='right', label='right',loffset='-1s').sum()

# or shift 
ts.resample('5min', closed='right', label='right').sum().shift(-1,'s')

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

In [19]:
# open high low close agg ratt
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


### Upsampling and interpolation (low -> high freq)

In [21]:
# no agg needed
frame = pd.DataFrame(np.random.randn(2, 4),
        index=pd.date_range('1/1/2000', periods=2,
        freq='W-WED'),
        columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.345758,1.847213,-0.759326,-0.495528
2000-01-12,-0.561869,0.943195,1.050617,-0.300879


In [23]:
# w/o aggregation low to high with as freq
frame.resample('D').asfreq()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.345758,1.847213,-0.759326,-0.495528
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.561869,0.943195,1.050617,-0.300879


In [25]:
# ffiling nans w/o asfreq
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.345758,1.847213,-0.759326,-0.495528
2000-01-06,-0.345758,1.847213,-0.759326,-0.495528
2000-01-07,-0.345758,1.847213,-0.759326,-0.495528
2000-01-08,-0.345758,1.847213,-0.759326,-0.495528
2000-01-09,-0.345758,1.847213,-0.759326,-0.495528
2000-01-10,-0.345758,1.847213,-0.759326,-0.495528
2000-01-11,-0.345758,1.847213,-0.759326,-0.495528
2000-01-12,-0.561869,0.943195,1.050617,-0.300879


In [28]:
# with limit
frame.resample('D').ffill(limit=2)


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.345758,1.847213,-0.759326,-0.495528
2000-01-06,-0.345758,1.847213,-0.759326,-0.495528
2000-01-07,-0.345758,1.847213,-0.759326,-0.495528
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.561869,0.943195,1.050617,-0.300879


In [29]:
# no overlap of new andoldindices 
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.345758,1.847213,-0.759326,-0.495528
2000-01-13,-0.561869,0.943195,1.050617,-0.300879


## Resampling periods


In [33]:
frame = pd.DataFrame(np.random.randn(24, 4),
         index=pd.period_range('1-2000', '12-2001',
         freq='M'),
         columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-09,-0.989338,0.045969,-1.486925,-0.95136
2001-10,-0.457072,0.287336,-0.383934,-0.361201
2001-11,2.289544,-0.893237,0.859655,-0.009197
2001-12,0.187445,0.502182,0.24025,0.15007


In [34]:
# downsampling
annual_frame = frame.resample('A-DEC').mean()
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,0.112302,0.542099,-0.048007,-0.253158
2001,0.372111,0.124044,0.270344,-0.251861


In [36]:
annual_frame.resample('Q-DEC').ffill()
# frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,0.112302,0.542099,-0.048007,-0.253158
2000Q2,0.112302,0.542099,-0.048007,-0.253158
2000Q3,0.112302,0.542099,-0.048007,-0.253158
2000Q4,0.112302,0.542099,-0.048007,-0.253158
2001Q1,0.372111,0.124044,0.270344,-0.251861
2001Q2,0.372111,0.124044,0.270344,-0.251861
2001Q3,0.372111,0.124044,0.270344,-0.251861
2001Q4,0.372111,0.124044,0.270344,-0.251861
