In [1]:
import warnings

warnings.filterwarnings('ignore')

# 2 在pandas中进行时间分组聚合

## 2.1 利用resample()对时序数据进行分组聚合

In [2]:
import pandas as pd

# 记录了2013-02-08到2018-02-07之间每个交易日苹果公司的股价
AAPL = pd.read_csv('AAPL.csv', parse_dates=['date'])

# 以月为统计窗口计算每月股票最高收盘价
(
    AAPL
    .set_index('date') # 设置date为index
    .resample('M') # 以月为单位
    .agg({
        'close': ['max', 'min']
    })
)

Unnamed: 0_level_0,close,close
Unnamed: 0_level_1,max,min
date,Unnamed: 1_level_2,Unnamed: 2_level_2
2013-02-28,68.5614,63.0571
2013-03-31,66.2256,60.0071
2013-04-30,63.2542,55.7899
2013-05-31,66.2628,61.2642
2013-06-30,64.3885,56.2542
...,...,...
2017-10-31,169.0400,153.4800
2017-11-30,176.2400,166.8900
2017-12-31,176.4200,169.0100
2018-01-31,179.2600,166.9700


In [3]:
# 以6个月为统计窗口计算每月股票平均收盘价且显示为当月第一天
(
    AAPL
    .set_index('date') # 设置date为index
    .resample('6MS') # 以6个月为单位
    .agg({
        'close': 'mean'
    })
)

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2013-02-01,62.197397
2013-08-01,73.577949
2014-02-01,83.960738
2014-08-01,106.000508
2015-02-01,126.762063
2015-08-01,111.48276
2016-02-01,99.536667
2016-08-01,112.912126
2017-02-01,144.45532
2017-08-01,165.375197


In [4]:
(
    AAPL
    .set_index('date') # 设置date为index
    .resample('1D') # 以1日为单位
    .agg({
        'close': 'mean'
    })
)

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2013-02-08,67.8542
2013-02-09,
2013-02-10,
2013-02-11,68.5614
2013-02-12,66.8428
...,...
2018-02-03,
2018-02-04,
2018-02-05,156.4900
2018-02-06,163.0300


In [5]:
(
    AAPL
    .set_index('date') # 设置date为index
    .resample('2D', closed='right') # 以2日为单位
    .agg({
        'close': 'mean'
    })
)

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2013-02-06,67.8542
2013-02-08,
2013-02-10,67.7021
2013-02-12,66.6856
2013-02-14,65.7371
...,...
2018-01-29,167.2000
2018-01-31,164.1400
2018-02-02,
2018-02-04,159.7600


## 2.2 利用groupby()+Grouper()实现混合分组

In [6]:
# 分别对苹果与微软每月平均收盘价进行统计
(
    pd
    .read_csv('AAPL&MSFT.csv', parse_dates=['date'])
    .groupby(['Name', pd.Grouper(freq='MS', key='date')])
    .agg({
        'close': 'mean'
    })
)

Unnamed: 0_level_0,Unnamed: 1_level_0,close
Name,date,Unnamed: 2_level_1
AAPL,2013-02-01,65.306264
AAPL,2013-03-01,63.120110
AAPL,2013-04-01,59.966432
AAPL,2013-05-01,63.778927
AAPL,2013-06-01,60.791120
...,...,...
MSFT,2017-10-01,77.939545
MSFT,2017-11-01,83.717619
MSFT,2017-12-01,84.758500
MSFT,2018-01-01,90.075238
