In [1]:
import pandas as pd
import numpy as np

###  时间序列 ###
- 时间戳（timestamp）
- 固定周期（period）
- 时间间隔（interval）

<img src="f1.png" alt="FAO" width="590" >

### 创建时间序列
### date_range ###
- 可以指定开始时间与周期
- H：小时
- D：天
- M：月

In [3]:
# 时间表示方法 “2016 Jul 1” → 7/1/2016 或 1/7/2016 或 2016-07-01 或 2016/07/01
rng = pd.date_range('2016-07-01', periods = 10, freq = '3D')
rng

DatetimeIndex(['2016-07-01', '2016-07-04', '2016-07-07', '2016-07-10',
               '2016-07-13', '2016-07-16', '2016-07-19', '2016-07-22',
               '2016-07-25', '2016-07-28'],
              dtype='datetime64[ns]', freq='3D')

In [5]:
rng = pd.date_range('2016-07-01', periods = 5, freq = 'M')
rng

DatetimeIndex(['2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30'],
              dtype='datetime64[ns]', freq='M')

In [4]:
rng = pd.date_range('2016-07-01', periods = 5, freq = 'H')
rng

DatetimeIndex(['2016-07-01 00:00:00', '2016-07-01 01:00:00',
               '2016-07-01 02:00:00', '2016-07-01 03:00:00',
               '2016-07-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')

In [7]:
import datetime as dt
time=pd.Series(np.random.randn(20),
           index=pd.date_range(dt.datetime(2016,1,1),periods=20))
print(time)

2016-01-01    1.087540
2016-01-02    0.871709
2016-01-03    0.074931
2016-01-04   -0.508958
2016-01-05   -0.246238
2016-01-06    0.159537
2016-01-07   -0.029808
2016-01-08    2.121705
2016-01-09    0.016637
2016-01-10    0.755904
2016-01-11    0.112268
2016-01-12    2.149702
2016-01-13    1.996170
2016-01-14    2.462095
2016-01-15   -1.103346
2016-01-16   -1.447488
2016-01-17    0.219138
2016-01-18    0.364637
2016-01-19    2.176869
2016-01-20    1.375342
Freq: D, dtype: float64


###  truncate过滤 ###

In [8]:
time.truncate(before='2016-1-10')

2016-01-10    0.755904
2016-01-11    0.112268
2016-01-12    2.149702
2016-01-13    1.996170
2016-01-14    2.462095
2016-01-15   -1.103346
2016-01-16   -1.447488
2016-01-17    0.219138
2016-01-18    0.364637
2016-01-19    2.176869
2016-01-20    1.375342
Freq: D, dtype: float64

In [9]:
time.truncate(after='2016-1-10')

2016-01-01    1.087540
2016-01-02    0.871709
2016-01-03    0.074931
2016-01-04   -0.508958
2016-01-05   -0.246238
2016-01-06    0.159537
2016-01-07   -0.029808
2016-01-08    2.121705
2016-01-09    0.016637
2016-01-10    0.755904
Freq: D, dtype: float64

In [12]:
print(time['2016/01/15']) # 等于time['2016-01-15']

-1.10334589472


In [11]:
print(time['2016-01-15':'2016-01-20'])

2016-01-15   -1.103346
2016-01-16   -1.447488
2016-01-17    0.219138
2016-01-18    0.364637
2016-01-19    2.176869
2016-01-20    1.375342
Freq: D, dtype: float64


In [13]:
data=pd.date_range('2010-01-01','2011-01-01',freq='M')
print(data)

DatetimeIndex(['2010-01-31', '2010-02-28', '2010-03-31', '2010-04-30',
               '2010-05-31', '2010-06-30', '2010-07-31', '2010-08-31',
               '2010-09-30', '2010-10-31', '2010-11-30', '2010-12-31'],
              dtype='datetime64[ns]', freq='M')


<img src="f2.png" alt="FAO" width="590" >

In [14]:
#时间戳
pd.Timestamp('2016-07-10')

Timestamp('2016-07-10 00:00:00')

In [15]:
# 可以指定更多细节
pd.Timestamp('2016-07-10 10')

Timestamp('2016-07-10 10:00:00')

In [16]:
pd.Timestamp('2016-07-10 10:15')

Timestamp('2016-07-10 10:15:00')

#### How much detail can you add?

In [17]:
t = pd.Timestamp('2016-07-10 10:15')

In [18]:
# 时间区间
pd.Period('2016-01')

Period('2016-01', 'M')

In [19]:
pd.Period('2016-01-01')

Period('2016-01-01', 'D')

In [21]:
# TIME OFFSETS 时间偏倚量
pd.Timedelta('1 day')

Timedelta('1 days 00:00:00')

#### 加时间

In [24]:
pd.Period('2016-01-01 10:10') + pd.Timedelta('1 day')

Period('2016-01-02 10:10', 'T')

In [25]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('1 day')

Timestamp('2016-01-02 10:10:00')

In [26]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('15 ns')

Timestamp('2016-01-01 10:10:00.000000015')

In [31]:
p1 = pd.period_range('2016-01-01 10:10', freq = '25H', periods = 10)
p2 = pd.period_range('2016-01-01 10:10', freq = '1D1H', periods = 10)

In [32]:
p1

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [33]:
p2

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [34]:
# 指定索引
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
rng
pd.Series(range(len(rng)), index = rng)

2016-07-01    0
2016-07-02    1
2016-07-03    2
2016-07-04    3
2016-07-05    4
2016-07-06    5
2016-07-07    6
2016-07-08    7
2016-07-09    8
2016-07-10    9
Freq: D, dtype: int32

In [35]:
periods = [pd.Period('2016-01'), pd.Period('2016-02'), pd.Period('2016-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

2016-01   -0.326312
2016-02    0.391125
2016-03    0.073120
Freq: M, dtype: float64

In [36]:
type(ts.index)

pandas.core.indexes.period.PeriodIndex

#### 时间戳和时间周期的转换

In [37]:
# 时间戳和时间周期可以转换
ts = pd.Series(range(10), pd.date_range('07-10-16 8:00', periods = 10, freq = 'H'))
ts

2016-07-10 08:00:00    0
2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
2016-07-10 12:00:00    4
2016-07-10 13:00:00    5
2016-07-10 14:00:00    6
2016-07-10 15:00:00    7
2016-07-10 16:00:00    8
2016-07-10 17:00:00    9
Freq: H, dtype: int32

In [38]:
ts_period = ts.to_period()
ts_period

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
2016-07-10 12:00    4
2016-07-10 13:00    5
2016-07-10 14:00    6
2016-07-10 15:00    7
2016-07-10 16:00    8
2016-07-10 17:00    9
Freq: H, dtype: int32

#### 时间周期结果包括了8点

In [39]:
ts_period['2016-07-10 08:30':'2016-07-10 11:45']  

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
Freq: H, dtype: int32

#### 时间戳结果没有包括8点

In [41]:
ts['2016-07-10 08:30':'2016-07-10 11:45'] 

2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
Freq: H, dtype: int32