# Pandas的时间序列处理 

## 创建

In [3]:
from datetime import datetime
import pandas as pd
import numpy as np

# 指定index为datetime的list
date_list = [datetime(2017, 2, 18), datetime(2017, 2, 19), 
             datetime(2017, 2, 25), datetime(2017, 2, 26), 
             datetime(2017, 3, 4), datetime(2017, 3, 5)]
time_s = pd.Series(np.random.randn(6), index=date_list)
print(time_s)
print(type(time_s.index))

2017-02-18   -0.543551
2017-02-19   -0.759103
2017-02-25    0.058956
2017-02-26    0.275448
2017-03-04   -0.957346
2017-03-05   -1.143108
dtype: float64
<class 'pandas.tseries.index.DatetimeIndex'>
<class 'pandas.core.series.Series'>


In [25]:
# pd.date_range()
dates = pd.date_range('2017-02-18', # 起始日期
                      periods=5,    # 周期
                      freq='W-SAT') # 频率
print(dates)
print(pd.Series(np.random.randn(5), index=dates))

DatetimeIndex(['2017-02-18', '2017-02-25', '2017-03-04', '2017-03-11',
               '2017-03-18'],
              dtype='datetime64[ns]', freq='W-SAT')
2017-02-18   -0.921937
2017-02-25    0.722167
2017-03-04   -0.171531
2017-03-11   -1.104664
2017-03-18    1.259994
Freq: W-SAT, dtype: float64


## 索引

In [5]:
# 索引位置
print(time_s[0])

-0.543550683904


In [6]:
# 索引值
print(time_s[datetime(2017, 2, 18)])

-0.543550683904


In [8]:
# 可以被解析的日期字符串
print(time_s['2017/02/18'])

-0.543550683904


In [14]:
# 按“年份”、“月份”索引
print(time_s['2017-2'])

2017-02-18   -0.543551
2017-02-19   -0.759103
2017-02-25    0.058956
2017-02-26    0.275448
dtype: float64


In [15]:
# 切片操作
print(time_s['2017-2-26':])

2017-02-26    0.275448
2017-03-04   -0.957346
2017-03-05   -1.143108
dtype: float64


## 过滤


In [16]:
time_s.truncate(before='2017-2-25')

2017-02-25    0.058956
2017-02-26    0.275448
2017-03-04   -0.957346
2017-03-05   -1.143108
dtype: float64

In [17]:
time_s.truncate(after='2017-2-25')

2017-02-18   -0.543551
2017-02-19   -0.759103
2017-02-25    0.058956
dtype: float64

## 生成日期范围

In [26]:
# 传入开始、结束日期，默认生成的该时间段的时间点是按天计算的
date_index = pd.date_range('2017/02/18', '2017/03/18')
print(date_index)

DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',
               '2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',
               '2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',
               '2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
               '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
               '2017-03-18'],
              dtype='datetime64[ns]', freq='D')


In [30]:
# 只传入开始或结束日期，还需要传入时间段
print(pd.date_range(start='2017/02/18', periods=10))

DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27'],
              dtype='datetime64[ns]', freq='D')


In [31]:
print(pd.date_range(end='2017/03/18', periods=10))

DatetimeIndex(['2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',
               '2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',
               '2017-03-17', '2017-03-18'],
              dtype='datetime64[ns]', freq='D')


In [34]:
# 规范化时间戳 
print(pd.date_range(start='2017/02/18 12:13:14', periods=10))
print(pd.date_range(start='2017/02/18 12:13:14', periods=10, normalize=True))

DatetimeIndex(['2017-02-18 12:13:14', '2017-02-19 12:13:14',
               '2017-02-20 12:13:14', '2017-02-21 12:13:14',
               '2017-02-22 12:13:14', '2017-02-23 12:13:14',
               '2017-02-24 12:13:14', '2017-02-25 12:13:14',
               '2017-02-26 12:13:14', '2017-02-27 12:13:14'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27'],
              dtype='datetime64[ns]', freq='D')


## 频率与偏移量

In [36]:
print(pd.date_range('2017/02/18', '2017/03/18', freq='2D'))

DatetimeIndex(['2017-02-18', '2017-02-20', '2017-02-22', '2017-02-24',
               '2017-02-26', '2017-02-28', '2017-03-02', '2017-03-04',
               '2017-03-06', '2017-03-08', '2017-03-10', '2017-03-12',
               '2017-03-14', '2017-03-16', '2017-03-18'],
              dtype='datetime64[ns]', freq='2D')


In [43]:
# 偏移量通过加法连接
sum_offset = pd.tseries.offsets.Week(2) + pd.tseries.offsets.Hour(12)
print(sum_offset)

print(pd.date_range('2017/02/18', '2017/03/18', freq=sum_offset))

14 days 12:00:00
DatetimeIndex(['2017-02-18 00:00:00', '2017-03-04 12:00:00'], dtype='datetime64[ns]', freq='348H')


## 移动数据

In [46]:
ts = pd.Series(np.random.randn(5), index=pd.date_range('20170218', periods=5, freq='W-SAT'))
print(ts)

2017-02-18    0.400190
2017-02-25    1.495394
2017-03-04   -1.331107
2017-03-11    2.943859
2017-03-18    0.813070
Freq: W-SAT, dtype: float64


In [48]:
print(ts.shift(1))
print(ts.shift(-1))

2017-02-18         NaN
2017-02-25    0.400190
2017-03-04    1.495394
2017-03-11   -1.331107
2017-03-18    2.943859
Freq: W-SAT, dtype: float64
2017-02-18    1.495394
2017-02-25   -1.331107
2017-03-04    2.943859
2017-03-11    0.813070
2017-03-18         NaN
Freq: W-SAT, dtype: float64
