In [2]:
import pandas as pd
import numpy as np

In [3]:
# Resampling : onefreq to other
# like groupby 1st aggregate then func
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2000-01-01    0.012745
2000-01-02    0.399175
2000-01-03   -0.278147
2000-01-04    2.028797
2000-01-05    1.999349
                ...   
2000-04-05   -0.418782
2000-04-06   -1.072578
2000-04-07   -1.313320
2000-04-08   -0.915548
2000-04-09   -2.930616
Freq: D, Length: 100, dtype: float64

In [4]:
ts.resample('M').count()

2000-01-31    31
2000-02-29    29
2000-03-31    31
2000-04-30     9
Freq: M, dtype: int64

In [5]:
ts.resample('M',label='right').mean()

2000-01-31    0.055875
2000-02-29    0.260309
2000-03-31    0.189168
2000-04-30   -0.762969
Freq: M, dtype: float64

In [6]:
ts.resample('M', kind='period').std()


2000-01    1.109515
2000-02    1.030544
2000-03    1.162605
2000-04    1.219177
Freq: M, dtype: float64

### Downsampling

In [7]:
# Vimp label(left default) and closed(left def ) args
rng = pd.date_range("2000-01-01" , periods=12, freq="T")
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [8]:
# Downsample to 5min freq
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [9]:
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [10]:
# shift res index with loffset
# not working
# ts.resample('5min', closed='right', label='right',loffset='-1s').sum()

# or shift 
ts.resample('5min', closed='right', label='right').sum().shift(-1,'s')

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

In [11]:
# open high low close agg ratt
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


### Upsampling and interpolation (low -> high freq)

In [12]:
# no agg needed
frame = pd.DataFrame(np.random.randn(2, 4),
        index=pd.date_range('1/1/2000', periods=2,
        freq='W-WED'),
        columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.024877,0.057863,0.584946,-0.227976
2000-01-12,-0.595336,0.116886,-1.617002,-2.52487


In [13]:
# w/o aggregation low to high with as freq
frame.resample('D').asfreq()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.024877,0.057863,0.584946,-0.227976
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.595336,0.116886,-1.617002,-2.52487


In [14]:
# ffiling nans w/o asfreq
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.024877,0.057863,0.584946,-0.227976
2000-01-06,-0.024877,0.057863,0.584946,-0.227976
2000-01-07,-0.024877,0.057863,0.584946,-0.227976
2000-01-08,-0.024877,0.057863,0.584946,-0.227976
2000-01-09,-0.024877,0.057863,0.584946,-0.227976
2000-01-10,-0.024877,0.057863,0.584946,-0.227976
2000-01-11,-0.024877,0.057863,0.584946,-0.227976
2000-01-12,-0.595336,0.116886,-1.617002,-2.52487


In [15]:
# with limit
frame.resample('D').ffill(limit=2)


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.024877,0.057863,0.584946,-0.227976
2000-01-06,-0.024877,0.057863,0.584946,-0.227976
2000-01-07,-0.024877,0.057863,0.584946,-0.227976
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.595336,0.116886,-1.617002,-2.52487


In [16]:
# no overlap of new andoldindices 
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.024877,0.057863,0.584946,-0.227976
2000-01-13,-0.595336,0.116886,-1.617002,-2.52487


## Resampling periods


In [17]:
frame = pd.DataFrame(np.random.randn(24, 4),
         index=pd.period_range('1-2000', '12-2001',
         freq='M'),
         columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,0.764228,-0.899489,-0.413639,1.18776
2000-02,-1.53915,0.584592,-0.487823,0.408128
2000-03,0.561313,-0.16798,-0.257904,-1.225353
2000-04,-0.670749,-1.048134,0.197961,0.448078
2000-05,-0.292165,-0.290212,2.277708,0.987505


In [18]:
# downsampling
annual_frame = frame.resample('A-DEC').mean()
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,-0.118002,-0.295844,-0.306613,0.09321
2001,-0.513423,0.071271,0.150704,0.324596


In [20]:
annual_frame.resample('Q-DEC').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,-0.118002,-0.295844,-0.306613,0.09321
2000Q2,-0.118002,-0.295844,-0.306613,0.09321
2000Q3,-0.118002,-0.295844,-0.306613,0.09321
2000Q4,-0.118002,-0.295844,-0.306613,0.09321
2001Q1,-0.513423,0.071271,0.150704,0.324596
2001Q2,-0.513423,0.071271,0.150704,0.324596
2001Q3,-0.513423,0.071271,0.150704,0.324596
2001Q4,-0.513423,0.071271,0.150704,0.324596
