In [8]:
import numpy as np
import pandas as pd
import datetime

# 时间戳对象

时间戳对象主要分为：时间戳（时间标量）、时间戳数组（时间序列）

# 时间戳、时间戳数组的构建

## 1.建立 时间戳 的不同方法

In [5]:
pd.Timestamp(year=2012,month=5,day=1)

Timestamp('2012-05-01 00:00:00')

In [6]:
pd.Timestamp(2012,5,1)

Timestamp('2012-05-01 00:00:00')

In [10]:
pd.Timestamp(datetime.datetime(2012, 5, 1))

Timestamp('2012-05-01 00:00:00')

## 2.建立 时间周期 的不同方法

用时间周期建立的对象会根据时间数据的最小度量解析出周期的类型。<br>
也可以指定周期，那么时间数据的格式会相应改变

In [15]:
pd.Period('2012-01')

Period('2012-01', 'M')

In [16]:
pd.Period('2012-01-01')

Period('2012-01-01', 'D')

In [17]:
pd.Period('2012-05', freq='D')

Period('2012-05-01', 'D')

# 数组时间对象的构建

#### 数组时间对象可以作为某些函数中的index参数传入，并被函数强制转化成DatetimeIndex 和PeriodIndex类

# 时间戳序列创建

## 1.主动构造

### 1.2 用DatetimeIndex直接构造

In [39]:
# 基础形式
pd.DatetimeIndex(data=['2018-01-01', '2018-01-03', '2018-01-05'])

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)

In [38]:
# 自动推断频率
pd.DatetimeIndex(data=['2018-01-01', '2018-01-03', '2018-01-05'], freq='infer')

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')

In [47]:
# 设置区间与时间间隔
pd.DatetimeIndex(start='2018-01-01', end='2018-01-07', freq='D')

  


DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07'],
              dtype='datetime64[ns]', freq='D')

## 2.外部数据转化

### 2.1 pd.Series对象转化

In [25]:
# 可容忍不同类型的时间格式（自动解析）(仅此一种方式容忍)
timeSeries = pd.Series(['Jul 31, 2009', '2010-01-10', None])
pd.to_datetime(timeSeries)

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

注意：标量字符串传递给pd.to_datetime，return的是pd.Timestamp标量时间对象

### 2.2 list对象转化

In [27]:
# 不支持不同类型的时间对象传入列表
timeList = ['2005/11/23', '2010.12.31']
pd.to_datetime(timeList)

DatetimeIndex(['2009-07-31', '2010-01-10', 'NaT'], dtype='datetime64[ns]', freq=None)

### 2.3 将list、pd.Series传给函数的index参数

In [20]:
dates = [pd.Timestamp('2012-05-01'),
         pd.Timestamp('2012-05-02'),
         pd.Timestamp('2012-05-03')]

In [48]:
pd.Series(data=np.random.randint(0,10), index=dates).index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

### 2.4 将dataframe列中分散的时间组件组装

In [73]:
# 注意：1.dataframe中的列顺序不影响解析；
#      2.名称参数必须是英文的year、month、day、hour、minute、second等(单复数不敏感)

df = pd.DataFrame({'years': [2015, 2016],
                   'days': [4, 5],
                   'month': [2, 3],
                   'hour': [2, 3],
                   'minutes':[12,13]
                  })

df

Unnamed: 0,years,days,month,hour,minutes
0,2015,4,2,2,12
1,2016,5,3,3,13


In [74]:
pd.to_datetime(df)

0   2015-02-04 02:12:00
1   2016-03-05 03:13:00
dtype: datetime64[ns]

### 2.5用range建立大范围的时间戳数组

In [76]:
# 参数要求是datetime-like 或 str
# 日历日
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)
pd.date_range(start, end)

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [81]:
#工作日(无节假日)（可自添加节假日,则变成自定义频率）
holiday = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]
pd.bdate_range(start, end, freq='C',holidays= holiday)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-06', '2011-01-07',
               '2011-01-10', '2011-01-11', '2011-01-12', '2011-01-13',
               '2011-01-14', '2011-01-17',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=258, freq='C')