## 时间序列
时间序列数据分以下几种：
- 时间戳
- 固定时期
- 时间间隔

Python标准库中用于日期(data)和时间(time)数据的数据类型
- datetime模块
- time模块
- calendar模块

In [2]:
'''datetime：以毫秒形式存储日期和时间'''
from datetime import datetime
import pandas as pd 
import numpy as np

now=datetime.now()
print(now)
print(type(now)) 
print(type(now.microsecond)) 
print(str(now.year)+'年'+str(now.month)+'月'+str(now.day)+'日'+str(now.hour)+'时'+str(now.minute)+'分'+str(now.second)+'秒')

2017-05-03 20:49:07.955000
<type 'datetime.datetime'>
<type 'int'>
2017年5月3日20时49分7秒


### datetime模块中的数据类型
![](http://i2.muimg.com/567571/787ec75907501b9d.png)

In [9]:
'''用datetime.timedelta表示两个datetime对象之间的时间差'''
delta=datetime(2011,1,7)-datetime(2008,6,24,8,15)
print(delta)
print(type(delta))

926 days, 15:45:00
<type 'datetime.timedelta'>


In [15]:
'''datetime.strptime'''
time1=['2017-05-01','2016-05-01']
print([datetime.strptime(i,'%Y-%m-%d') for i in time1])

[datetime.datetime(2017, 5, 1, 0, 0), datetime.datetime(2016, 5, 1, 0, 0)]


In [19]:
'''pandas的to_datetime()解析多种不同的日期表示形式'''
dt1=['7/12/2015','5/6/2016']
print(pd.to_datetime(dt1+[None])) #[None]处理缺失值，NaT(Not a Time)是pandas中时间戳数据的NA值

DatetimeIndex(['2015-07-12', '2016-05-06', 'NaT'], dtype='datetime64[ns]', freq=None)


In [4]:
'''时间戳为索引的Series'''
#dt2中的datetime对象是存放在一个DatetimeIndex中的
dt2=[datetime(2016,1,2),datetime(2016,1,5),datetime(2016,1,7),datetime(2016,1,14),datetime(2016,1,17),datetime(2016,1,20)]
arr1=pd.Series(np.random.randn(6),index=dt2)
print(arr1)
print('-------------------')
print(type(arr1)) #arr1是一个TimeSeries
print('-------------------')
print(arr1.index) #pandas用NumPy的datatime64数据类型以纳秒(ns)形式存储时间戳
print('-------------------')
print(arr1.index[2])
print(type(arr1.index[2])) #DatetimeIndex中各个标量值是pandas的Timestamp对象

2016-01-02    0.063656
2016-01-05    1.015813
2016-01-07   -0.733707
2016-01-14   -1.179360
2016-01-17   -0.936616
2016-01-20   -0.211832
dtype: float64
-------------------
<class 'pandas.core.series.Series'>
-------------------
DatetimeIndex(['2016-01-02', '2016-01-05', '2016-01-07', '2016-01-14',
               '2016-01-17', '2016-01-20'],
              dtype='datetime64[ns]', freq=None)
-------------------
2016-01-07 00:00:00
<class 'pandas.tslib.Timestamp'>


In [5]:
dt3=[datetime(2016,1,2),datetime(2016,1,5),datetime(2016,1,7),datetime(2016,1,14),datetime(2016,1,17),datetime(2016,1,20)]
arr2=pd.Series(np.random.randn(6),index=dt2)
#通过传入一个可以被解释为日期的字符串作为索引
print(arr2)
print(arr2['20160114'])
print(arr2['1/17/2016'])
print('-------------------')
arr3=pd.Series(np.random.randn(10),index=pd.date_range('1/1/2017',periods=10,freq='W')) #freq的参数'w'表示频率间隔为weekly
print(arr3)
print('-------------------')
#通过输入特定的年月可以选取数据的切片
print(arr3['2017-01']) 
print('-------------------')
print(arr3[datetime(2017,2,12):])
print('-------------------')
print(arr3['2017-03-01':'2017-07-01']) #切片范围可以超出数据的范围
print('-------------------')
print(arr3.truncate(before='2017-02-05')) #截断该日期前的数据(取出该日期后的数据)。after参数则相反

2016-01-02   -0.188051
2016-01-05    0.278473
2016-01-07   -0.645694
2016-01-14   -0.623303
2016-01-17    0.486498
2016-01-20    1.029388
dtype: float64
-0.623303032822
0.486498266986
-------------------
2017-01-01    0.422297
2017-01-08    0.985423
2017-01-15   -0.078065
2017-01-22    1.351250
2017-01-29    0.659384
2017-02-05    0.920617
2017-02-12    0.181504
2017-02-19    0.145558
2017-02-26   -2.240103
2017-03-05   -0.029864
Freq: W-SUN, dtype: float64
-------------------
2017-01-01    0.422297
2017-01-08    0.985423
2017-01-15   -0.078065
2017-01-22    1.351250
2017-01-29    0.659384
Freq: W-SUN, dtype: float64
-------------------
2017-02-12    0.181504
2017-02-19    0.145558
2017-02-26   -2.240103
2017-03-05   -0.029864
Freq: W-SUN, dtype: float64
-------------------
2017-03-05   -0.029864
Freq: W-SUN, dtype: float64
-------------------
2017-02-05    0.920617
2017-02-12    0.181504
2017-02-19    0.145558
2017-02-26   -2.240103
2017-03-05   -0.029864
Freq: W-SUN, dtype: float64


In [51]:
#对DataFrame的行进行索引
dt4=pd.date_range('1/1/2017',periods=10,freq='w')
data1=pd.DataFrame(np.random.randn(10,4),index=dt4,columns=['a','b','c','d'])
print(data1)
print(data1.ix['2017-01'])

                   a         b         c         d
2017-01-01  0.319147  0.995083 -0.063343  0.361745
2017-01-08 -0.660898  1.063969  1.438154  0.233394
2017-01-15  0.474133  0.922500 -1.059700 -0.807684
2017-01-22  0.549245  1.002593 -0.188812 -0.584192
2017-01-29 -0.023371  1.342699 -0.703796  0.221889
2017-02-05 -1.209670 -0.398946  0.243542  0.343868
2017-02-12  0.557387 -1.418794 -1.765443  0.093713
2017-02-19  1.779852 -1.304426  0.585054  1.066391
2017-02-26 -1.481359 -0.698616 -1.207452  0.090489
2017-03-05  1.956331  1.319873  0.310050 -0.673371
                   a         b         c         d
2017-01-01  0.319147  0.995083 -0.063343  0.361745
2017-01-08 -0.660898  1.063969  1.438154  0.233394
2017-01-15  0.474133  0.922500 -1.059700 -0.807684
2017-01-22  0.549245  1.002593 -0.188812 -0.584192
2017-01-29 -0.023371  1.342699 -0.703796  0.221889


In [11]:
'''带有重复索引的时间序列'''
data2=pd.Series(np.arange(5),index=pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','1/2/2000','1/3/2000']))
print(data2)
print('-------------------')
print(data2.index.is_unique)
print('-------------------')
grouped=data2.groupby(level=0)
print(grouped)
print('-------------------')
print(grouped.mean())
print(grouped.count())

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32
-------------------
False
-------------------
<pandas.core.groupby.SeriesGroupBy object at 0x0000000005722828>
-------------------
2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32
2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64


In [19]:
'''pandas.date_range():生成日期范围'''
dt5=pd.date_range('1/1/2017','6/1/2017') #默认按天计算
print(dt5)
print('-------------------')
dt6=pd.date_range(start='1/1/2017',periods=10)
print(dt6)
print('-------------------')
dt7=pd.date_range(end='1/1/2017',periods=10)
print(dt7)
print('-------------------')
#生成一个由每月最后一个工作日组成的日期索引
dt8=pd.date_range('1/1/2016','12/1/2016',freq='BM')
print(dt8)

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10',
               ...
               '2017-05-23', '2017-05-24', '2017-05-25', '2017-05-26',
               '2017-05-27', '2017-05-28', '2017-05-29', '2017-05-30',
               '2017-05-31', '2017-06-01'],
              dtype='datetime64[ns]', length=152, freq='D')
-------------------
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', freq='D')
-------------------
DatetimeIndex(['2016-12-23', '2016-12-24', '2016-12-25', '2016-12-26',
               '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30',
               '2016-12-31', '2017-01-01'],
              dtype='datetime64[ns]', freq='D')
-------------------
DatetimeIndex(['2016-0

In [25]:
'''频率和日期偏移量'''
dt9=pd.date_range('1/1/2016',freq='6H30min',periods=5)
print(dt9)
print('-------------------')
dt10=pd.date_range('1/1/2016',freq='WOM-3FRI',periods=5) #每月第三个星期五
print(dt10)

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:30:00',
               '2016-01-01 13:00:00', '2016-01-01 19:30:00',
               '2016-01-02 02:00:00'],
              dtype='datetime64[ns]', freq='390T')
-------------------
DatetimeIndex(['2016-01-15', '2016-02-19', '2016-03-18', '2016-04-15',
               '2016-05-20'],
              dtype='datetime64[ns]', freq='WOM-3FRI')


### 时间序列的基础频率
![](http://i1.piimg.com/567571/82875f23f32def17.png)
![](http://i4.buimg.com/567571/6f42dfb6263093af.png)

In [30]:
'''shift():移动(超前和滞后)数据
    Series和DataFrame都有shift()'''
arr4=pd.Series(np.random.randn(5),index=pd.date_range('1/1/2016',periods=5,freq='M'))
print(arr4)
print('-------------------')
print(arr4.shift(2))
print('-------------------')
print(arr4.shift(-2))
print('-------------------')
print(arr4.shift(2,freq='M')) #加上freq参数会改变时间戳，数据不变

2016-01-31   -0.960279
2016-02-29    0.916248
2016-03-31   -0.789328
2016-04-30    2.333411
2016-05-31    0.184923
Freq: M, dtype: float64
-------------------
2016-01-31         NaN
2016-02-29         NaN
2016-03-31   -0.960279
2016-04-30    0.916248
2016-05-31   -0.789328
Freq: M, dtype: float64
-------------------
2016-01-31   -0.789328
2016-02-29    2.333411
2016-03-31    0.184923
2016-04-30         NaN
2016-05-31         NaN
Freq: M, dtype: float64
-------------------
2016-03-31   -0.960279
2016-04-30    0.916248
2016-05-31   -0.789328
2016-06-30    2.333411
2016-07-31    0.184923
Freq: M, dtype: float64


In [40]:
''''''
from pandas.tseries.offsets import Day,MonthEnd
dt11=pd.datetime(2016,1,1)
print(dt11)
print('-------------------')
print(dt11+3*Day())
print('-------------------')
print(dt11+MonthEnd(2)) #月末
print('-------------------')
'''通过锚点偏移量的rollforward()和rollback()显示将日期前/后移动'''
dt12=MonthEnd()
print(dt12.rollback(dt11))
print('-------------------')
print(dt12.rollforward(dt11))

2016-01-01 00:00:00
-------------------
2016-01-04 00:00:00
-------------------
2016-02-29 00:00:00
-------------------
2015-12-31 00:00:00
-------------------
2016-01-31 00:00:00
