# 重采样及频率转换

In [1]:
import numpy as np
from numpy.random import randn
import pandas as pd
from pandas import Series, DataFrame

In [2]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')

In [3]:
ts = Series(randn(len(rng)), index=rng)

In [4]:
ts.describe()

count    100.000000
mean      -0.132835
std        1.025953
min       -2.377757
25%       -0.953906
50%       -0.031970
75%        0.691375
max        2.305748
dtype: float64

In [5]:
ts.resample('M').mean()

2000-01-31   -0.420430
2000-02-29    0.236424
2000-03-31   -0.321629
2000-04-30    0.318231
Freq: M, dtype: float64

In [6]:
ts.resample('M', kind='period').mean()

2000-01   -0.420430
2000-02    0.236424
2000-03   -0.321629
2000-04    0.318231
Freq: M, dtype: float64

## 降采样

In [7]:
rng = pd.date_range('1/1/2000', periods=12, freq='T')

In [8]:
ts = Series(np.arange(12), index= rng)

In [9]:
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int64

In [10]:
ts.resample('5min').sum()

2000-01-01 00:00:00    10
2000-01-01 00:05:00    35
2000-01-01 00:10:00    21
Freq: 5T, dtype: int64

In [11]:
ts.resample('5min', closed='left').sum()

2000-01-01 00:00:00    10
2000-01-01 00:05:00    35
2000-01-01 00:10:00    21
Freq: 5T, dtype: int64

In [12]:
ts.resample('5min', closed='left', label='left').sum()

2000-01-01 00:00:00    10
2000-01-01 00:05:00    35
2000-01-01 00:10:00    21
Freq: 5T, dtype: int64

In [13]:
ts.resample('5min', loffset='-1s').sum()

1999-12-31 23:59:59    10
2000-01-01 00:04:59    35
2000-01-01 00:09:59    21
Freq: 5T, dtype: int64

### OHLC 重采样

In [16]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


### 通过 groupby 进行重采样

In [17]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')

In [19]:
ts = Series(np.arange(100), index=rng)

In [21]:
ts.head()

2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
2000-01-05    4
Freq: D, dtype: int64

In [22]:
ts.groupby(lambda x: x.month).mean()

1    15
2    45
3    75
4    95
dtype: int64

In [23]:
ts.groupby(lambda x: x.weekday).mean()

0    47.5
1    48.5
2    49.5
3    50.5
4    51.5
5    49.0
6    50.0
dtype: float64

### 升采样和插值

In [24]:
frame = DataFrame(np.random.randn(2, 4),
                  index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [25]:
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.180601,-0.784,0.771044,0.36959
2000-01-12,-0.604909,-0.140229,-0.175791,1.449063


In [26]:
df_daily = frame.resample('D')

In [30]:
df_daily.mean()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.180601,-0.784,0.771044,0.36959
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.604909,-0.140229,-0.175791,1.449063


In [32]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.180601,-0.784,0.771044,0.36959
2000-01-06,0.180601,-0.784,0.771044,0.36959
2000-01-07,0.180601,-0.784,0.771044,0.36959
2000-01-08,0.180601,-0.784,0.771044,0.36959
2000-01-09,0.180601,-0.784,0.771044,0.36959
2000-01-10,0.180601,-0.784,0.771044,0.36959
2000-01-11,0.180601,-0.784,0.771044,0.36959
2000-01-12,-0.604909,-0.140229,-0.175791,1.449063


In [33]:
frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.180601,-0.784,0.771044,0.36959
2000-01-06,0.180601,-0.784,0.771044,0.36959
2000-01-07,0.180601,-0.784,0.771044,0.36959
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.604909,-0.140229,-0.175791,1.449063


In [34]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,0.180601,-0.784,0.771044,0.36959
2000-01-13,-0.604909,-0.140229,-0.175791,1.449063


### 通过时期进行重采样

In [43]:
frame = DataFrame(np.random.randn(24, 4),
                  index=pd.period_range('1-2000', '12-2001', freq='M'),
                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [44]:
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,-1.344489,-1.010792,0.738855,-0.208103
2000-02,0.22774,-1.201537,-0.628121,0.943884
2000-03,0.325579,0.460465,-0.883807,0.591597
2000-04,0.514185,0.762423,1.852345,0.44921
2000-05,-0.329644,1.35854,0.260425,-0.349108


In [45]:
annual_frame = frame.resample('A-DEC').mean()

In [47]:
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,0.185037,0.02196,0.417823,-0.010367
2001,0.152075,-0.084792,-0.121939,-0.200771


In [48]:
annual_frame.resample('Q-DEC').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,0.185037,0.02196,0.417823,-0.010367
2000Q2,0.185037,0.02196,0.417823,-0.010367
2000Q3,0.185037,0.02196,0.417823,-0.010367
2000Q4,0.185037,0.02196,0.417823,-0.010367
2001Q1,0.152075,-0.084792,-0.121939,-0.200771
2001Q2,0.152075,-0.084792,-0.121939,-0.200771
2001Q3,0.152075,-0.084792,-0.121939,-0.200771
2001Q4,0.152075,-0.084792,-0.121939,-0.200771


In [51]:
annual_frame.resample('Q-DEC', convention='start').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,0.185037,0.02196,0.417823,-0.010367
2000Q2,0.185037,0.02196,0.417823,-0.010367
2000Q3,0.185037,0.02196,0.417823,-0.010367
2000Q4,0.185037,0.02196,0.417823,-0.010367
2001Q1,0.152075,-0.084792,-0.121939,-0.200771
2001Q2,0.152075,-0.084792,-0.121939,-0.200771
2001Q3,0.152075,-0.084792,-0.121939,-0.200771
2001Q4,0.152075,-0.084792,-0.121939,-0.200771


In [52]:
annual_frame.resample('Q-MAR').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,0.185037,0.02196,0.417823,-0.010367
2001Q1,0.185037,0.02196,0.417823,-0.010367
2001Q2,0.185037,0.02196,0.417823,-0.010367
2001Q3,0.185037,0.02196,0.417823,-0.010367
2001Q4,0.152075,-0.084792,-0.121939,-0.200771
2002Q1,0.152075,-0.084792,-0.121939,-0.200771
2002Q2,0.152075,-0.084792,-0.121939,-0.200771
2002Q3,0.152075,-0.084792,-0.121939,-0.200771
