# 3.12 处理时间序列

## 3.12.1 Python的日期与时间工具

1. 原生Python的日期与时间工具：datetime与dateutil

In [1]:
from datetime import datetime

In [2]:
datetime(year=2015, month=7, day=4)

datetime.datetime(2015, 7, 4, 0, 0)

In [3]:
from dateutil import parser

In [6]:
date = parser.parse("4th of July, 2015")

In [7]:
date

datetime.datetime(2015, 7, 4, 0, 0)

In [8]:
date.strftime('%A')

'Saturday'

2. 时间类型数组：Numpy的datetime64类型

In [9]:
import numpy as np

In [10]:
date = np.array('2015-07-04', dtype=np.datetime64)

In [11]:
date

array('2015-07-04', dtype='datetime64[D]')

In [12]:
date + np.arange(12)

array(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
       '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
       '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
      dtype='datetime64[D]')

In [13]:
np.datetime64('2015-07-04')

numpy.datetime64('2015-07-04')

In [14]:
np.datetime64('2015-07-04 12:00')

numpy.datetime64('2015-07-04T12:00')

In [15]:
np.datetime64('2015-07-04 12:59:59.50', 'ns')

numpy.datetime64('2015-07-04T12:59:59.500000000')

3. Pandas的日期与时间工具：理想与现实的最佳解决方案

In [16]:
import pandas as pd

In [17]:
date = pd.to_datetime("4th of July, 2015")

In [18]:
date

Timestamp('2015-07-04 00:00:00')

In [19]:
date.strftime('%A')

'Saturday'

In [20]:
date + pd.to_timedelta(np.arange(12), 'D')

DatetimeIndex(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
               '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
               '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
              dtype='datetime64[ns]', freq=None)

In [21]:
date + pd.to_timedelta(np.arange(12))

DatetimeIndex([          '2015-07-04 00:00:00',
               '2015-07-04 00:00:00.000000001',
               '2015-07-04 00:00:00.000000002',
               '2015-07-04 00:00:00.000000003',
               '2015-07-04 00:00:00.000000004',
               '2015-07-04 00:00:00.000000005',
               '2015-07-04 00:00:00.000000006',
               '2015-07-04 00:00:00.000000007',
               '2015-07-04 00:00:00.000000008',
               '2015-07-04 00:00:00.000000009',
               '2015-07-04 00:00:00.000000010',
               '2015-07-04 00:00:00.000000011'],
              dtype='datetime64[ns]', freq=None)

## 3.12.2 Pandas时间序列：用时间作索引

In [22]:
index = pd.DatetimeIndex(['2014-07-04', '2014-08-04', '2015-07-04', '2015-08-04'])

In [23]:
data = pd.Series([0, 1, 2, 3], index=index)

In [24]:
data

2014-07-04    0
2014-08-04    1
2015-07-04    2
2015-08-04    3
dtype: int64

In [25]:
data['2014-07-04': '2015-07-04']

2014-07-04    0
2014-08-04    1
2015-07-04    2
dtype: int64

In [26]:
data['2015']

2015-07-04    2
2015-08-04    3
dtype: int64

## 3.12.3 Pandas时间序列数据结构

In [27]:
dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015', '2015-Jul-6', '07-07-2015', '20150708'])

In [28]:
dates

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
               '2015-07-08'],
              dtype='datetime64[ns]', freq=None)

In [29]:
dates.to_period('D')

PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
             '2015-07-08'],
            dtype='period[D]', freq='D')

In [31]:
dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

创建一个有规律的日期序列

In [32]:
pd.date_range('2015-07-03', '2015-07-10')

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

日期范围可以是开始时间和周期数periods