## Python + pandas 日期、时间、时间序列的处理

## 1. datatime

In [1]:
# 导包
from datetime import datetime,timedelta

In [2]:
# 查看当前时间
now = datetime.now()
now

datetime.datetime(2018, 3, 16, 17, 20, 44, 607318)

In [4]:
# 进行加减运算
delta = now - datetime(2017,6,27,10,10,10,10)

delta,delta.days,delta.seconds,delta.microseconds

(datetime.timedelta(262, 25834, 607308), 262, 25834, 607308)

In [7]:
# 日期转换为字符串
stamp = datetime(2017,6,27)

stamp,str(stamp)

(datetime.datetime(2017, 6, 27, 0, 0), '2017-06-27 00:00:00')

In [9]:
# 格式化打印格式
stamp.strftime('%Y-%m-%d')  # %Y是4位年，%y是2位年

'2017-06-27'

In [10]:
# 对多个时间进行解析成字符串
date = ['2017-6-26','2017-6-27']
datetime2 = [datetime.strptime(x,'%Y-%m-%d') for x in date]
datetime2

[datetime.datetime(2017, 6, 26, 0, 0), datetime.datetime(2017, 6, 27, 0, 0)]

### 2. dateutil.parser [第三方库]  时间解析函数

In [15]:
from dateutil.parser import parse

In [16]:
parse('2017-6-27')

datetime.datetime(2017, 6, 27, 0, 0)

In [20]:
parse('27/6/2017',dayfirst =True)

datetime.datetime(2017, 6, 27, 0, 0)

### datetime 格式定义

|代码 | 说明 |
|:---|:---|
|%Y | 4位数的年 |
|%y | 2位数的年 |
|%m | 2位数的月[01,12] |
|%d | 2位数的日[01，31] |
|%H | 时（24小时制）[00,23] |
|%l | 时（12小时制）[01,12] |
|%M | 2位数的分[00,59] |
|%S | 秒[00,61]有闰秒的存在 |
|%w | 用整数表示的星期几[0（星期天），6] |
|%F | %Y-%m-%d简写形式例如，2017-06-27 |
|%D | %m/%d/%y简写形式 |

### 3. pandas处理成组日期

In [3]:
import pandas as pd

In [8]:
# 转换生成
date = ['2017-6-26', '2017-6-27']
date = pd.to_datetime(date)
date[0], date[1]

(Timestamp('2017-06-26 00:00:00'), Timestamp('2017-06-27 00:00:00'))

### 4. pandas 时间序列基础以及时间、日期处理

In [10]:
import numpy as np

In [9]:
dates = ['2017-06-20','2017-06-21',
         '2017-06-22','2017-06-23',
         '2017-06-24','2017-06-25',
         '2017-06-26','2017-06-27']

In [11]:
ts = pd.Series(np.random.randn(8),
               index = pd.to_datetime(dates))

In [12]:
ts

2017-06-20   -1.721828
2017-06-21   -0.490975
2017-06-22    0.062078
2017-06-23    1.030794
2017-06-24   -1.362922
2017-06-25   -0.500648
2017-06-26   -0.982481
2017-06-27   -1.381705
dtype: float64

In [13]:
ts.index

DatetimeIndex(['2017-06-20', '2017-06-21', '2017-06-22', '2017-06-23',
               '2017-06-24', '2017-06-25', '2017-06-26', '2017-06-27'],
              dtype='datetime64[ns]', freq=None)

#### 4.1 pandas 不同索引的时间序列之间的算术运算

In [14]:
ts[::2]

2017-06-20   -1.721828
2017-06-22    0.062078
2017-06-24   -1.362922
2017-06-26   -0.982481
dtype: float64

In [15]:
ts[::-2]

2017-06-27   -1.381705
2017-06-25   -0.500648
2017-06-23    1.030794
2017-06-21   -0.490975
dtype: float64

In [17]:
ts + ts[::2]  # 取并集 

2017-06-20   -3.443655
2017-06-21         NaN
2017-06-22    0.124156
2017-06-23         NaN
2017-06-24   -2.725844
2017-06-25         NaN
2017-06-26   -1.964961
2017-06-27         NaN
dtype: float64

#### 4.2 索引为日期的Series和DataFrame数据的索引、选取以及子集构造

In [18]:
ts

2017-06-20   -1.721828
2017-06-21   -0.490975
2017-06-22    0.062078
2017-06-23    1.030794
2017-06-24   -1.362922
2017-06-25   -0.500648
2017-06-26   -0.982481
2017-06-27   -1.381705
dtype: float64

In [26]:
ts[ts.index[2]], ts[2], ts['2017-06-22'],ts['20170622']

(0.062077970591989745,
 0.062077970591989745,
 0.062077970591989745,
 0.062077970591989745)

In [27]:
ts['2017-06']  # ts['201706'] 会报错

2017-06-20   -1.721828
2017-06-21   -0.490975
2017-06-22    0.062078
2017-06-23    1.030794
2017-06-24   -1.362922
2017-06-25   -0.500648
2017-06-26   -0.982481
2017-06-27   -1.381705
dtype: float64

In [32]:
ts['2017-06-20':'2017-06-23']  # 前闭后闭区间 

2017-06-20   -1.721828
2017-06-21   -0.490975
2017-06-22    0.062078
2017-06-23    1.030794
dtype: float64

#### 4.3 带有重复索引的时间序列

In [34]:
dates = pd.DatetimeIndex(['2017/06/01','2017/06/02','2017/06/02','2017/06/02','2017/06/03'])
dates

DatetimeIndex(['2017-06-01', '2017-06-02', '2017-06-02', '2017-06-02',
               '2017-06-03'],
              dtype='datetime64[ns]', freq=None)

In [37]:
dup_ts = pd.Series(np.arange(5),index = dates)
dup_ts

2017-06-01    0
2017-06-02    1
2017-06-02    2
2017-06-02    3
2017-06-03    4
dtype: int32

In [39]:
# 判断索引是否有重复的
dup_ts.is_unique

True

In [40]:
dup_ts['2017-06-02']

2017-06-02    1
2017-06-02    2
2017-06-02    3
dtype: int32

In [46]:
# 聚合求值
grouped = dup_ts.groupby(level=0).mean()
grouped

2017-06-01    0
2017-06-02    2
2017-06-03    4
dtype: int32

In [47]:
# DataFrame
dup_df = pd.DataFrame(np.arange(10).reshape((5,2)),index = dates )
dup_df

Unnamed: 0,0,1
2017-06-01,0,1
2017-06-02,2,3
2017-06-02,4,5
2017-06-02,6,7
2017-06-03,8,9


In [49]:
# 聚合
grouped = dup_df.groupby(level=0).mean()
grouped

Unnamed: 0,0,1
2017-06-01,0,1
2017-06-02,4,5
2017-06-03,8,9


In [52]:
pd.Timestamp('2017-06-01 00:00:00')

Timestamp('2017-06-01 00:00:00')