In [2]:
# 时期及其算数运算
# timedelta 表示两个时间的差值，比如timedelta(days=1) 差1天
# timedelta(seconds=2)  差2秒
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
from pandas import DataFrame,Series
from pandas.tseries.offsets import Day,Hour,Minute,MonthEnd

In [2]:
# 时间段Period
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [3]:
p.start_time

Timestamp('2007-01-01 00:00:00')

In [4]:
p.end_time

Timestamp('2007-12-31 23:59:59.999999999')

In [5]:
p + 5

Period('2012', 'A-DEC')

In [6]:
p - 2

Period('2005', 'A-DEC')

In [7]:
pd.Period('2014',freq='A-DEC') - p

7

In [8]:
rng = pd.period_range('1/1/2000','6/30/2000',freq='M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [9]:
pd.date_range('1/1/2000','6/30/2000',freq='M')

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30'],
              dtype='datetime64[ns]', freq='M')

In [10]:
rng+1

PeriodIndex(['2000-02', '2000-03', '2000-04', '2000-05', '2000-06', '2000-07'], dtype='period[M]', freq='M')

In [11]:
rng-2

PeriodIndex(['1999-11', '1999-12', '2000-01', '2000-02', '2000-03', '2000-04'], dtype='period[M]', freq='M')

In [12]:
Series(np.random.randn(6),index=rng)

2000-01   -0.171356
2000-02    0.146284
2000-03    0.786703
2000-04    1.108849
2000-05   -0.678325
2000-06    1.427651
Freq: M, dtype: float64

对于 Q-DEC 和 Q-SEP 的区别如图所示    
![Q-DEC](./Q-DEC.jpg)

In [13]:
values = ['2001Q3','2002Q2','2003Q1']
index = pd.PeriodIndex(values,freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

In [3]:
# 时期的频率转换
# Perid和PeriodIndex对象都可以通过其asfreq方法被转换成别的频率。
# 定义一个年度时期
p = pd.Period('2007',freq='A-DEC')

In [5]:
# 将其转换为年初的一个月度时期
p.asfreq('M',how='start')

Period('2007-01', 'M')

In [6]:
# 将其转换成年末的一个月度时期
p.asfreq('M',how='end')

Period('2007-12', 'M')

In [7]:
p = pd.Period('2007',freq='A-JUN')# 截止到2007-6-30的一年时间
p.start_time

Timestamp('2006-07-01 00:00:00')

In [8]:
p.end_time

Timestamp('2007-06-30 23:59:59.999999999')

In [9]:
p.asfreq('M',how='start')

Period('2006-07', 'M')

In [10]:
p.asfreq('M',how='end')

Period('2007-06', 'M')

In [11]:
p.asfreq('D')

Period('2007-06-30', 'D')

In [12]:
p.asfreq('D',how='start')

Period('2006-07-01', 'D')

In [13]:
p = pd.Period('2007','M')

In [14]:
p.start_time

Timestamp('2007-01-01 00:00:00')

In [15]:
p.end_time

Timestamp('2007-01-31 23:59:59.999999999')

In [16]:
# 将高频率转换为低频率时，超时期是由子时期所属的位置决定的。
# 例如在A-JUN频率中，2007-08实际上是属于周期2008年的。
p = pd.Period('2007-08','M')

In [17]:
p

Period('2007-08', 'M')

In [18]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [23]:
# PeriodIndex或TimeSeries的频率转换和Peroid类似
rng = pd.period_range('2006','2009',freq='A-DEC')
ts = Series(np.random.randn(len(rng)),index=rng)
ts

2006   -0.622746
2007    0.869713
2008   -0.334810
2009    0.198360
Freq: A-DEC, dtype: float64

In [24]:
ts.asfreq('M')

2006-12   -0.622746
2007-12    0.869713
2008-12   -0.334810
2009-12    0.198360
Freq: M, dtype: float64

In [25]:
ts.asfreq('D')

2006-12-31   -0.622746
2007-12-31    0.869713
2008-12-31   -0.334810
2009-12-31    0.198360
Freq: D, dtype: float64

In [26]:
ts.asfreq('B',how='end')# 每年最后一个工作日

2006-12-29   -0.622746
2007-12-31    0.869713
2008-12-31   -0.334810
2009-12-31    0.198360
Freq: B, dtype: float64

In [27]:
ts.asfreq('B',how='start')# 每年第一个工作日

2006-01-02   -0.622746
2007-01-01    0.869713
2008-01-01   -0.334810
2009-01-01    0.198360
Freq: B, dtype: float64

In [28]:
rng = pd.period_range('2006','2009',freq='A-NOV')
ts = Series(np.random.randn(len(rng)),index=rng)
ts

2006    0.292354
2007   -0.888247
2008    0.914223
2009    0.429392
Freq: A-NOV, dtype: float64

In [29]:
ts.asfreq('M')

2006-11    0.292354
2007-11   -0.888247
2008-11    0.914223
2009-11    0.429392
Freq: M, dtype: float64

In [30]:
ts.asfreq('B',how='start')# 每个年度周期的第一个工作日

2005-12-01    0.292354
2006-12-01   -0.888247
2007-12-03    0.914223
2008-12-01    0.429392
Freq: B, dtype: float64

In [32]:
rng

PeriodIndex(['2006', '2007', '2008', '2009'], dtype='period[A-NOV]', freq='A-NOV')

In [33]:
rng.strftime('%Y-%m-%d')

array(['2006-11-30', '2007-11-30', '2008-11-30', '2009-11-30'],
      dtype='<U10')

In [34]:
rng.strftime('%m/%d/%Y')

array(['11/30/2006', '11/30/2007', '11/30/2008', '11/30/2009'],
      dtype='<U10')

In [35]:
# 按季度计算的时期频率
# Q代表季度为单位（开始时间要减去一个季度，Q4请忽略，这里没卵用）
# JAN代表1月，结合Q，取1/31
p = pd.Period('2012Q4',freq='Q-JAN')
p

Period('2012Q4', 'Q-JAN')

In [36]:
p.start_time

Timestamp('2011-11-01 00:00:00')

In [37]:
p.end_time

Timestamp('2012-01-31 23:59:59.999999999')

In [38]:
p.strftime('%Y-%m-%d')

'2012-01-31'

In [39]:
p.asfreq('M','s')

Period('2011-11', 'M')

In [40]:
p.asfreq('M','e')

Period('2012-01', 'M')

In [41]:
p.asfreq('B','s')

Period('2011-11-01', 'B')

In [42]:
p.asfreq('B','e')

Period('2012-01-31', 'B')

In [44]:
# 2012/01/30
# 频率变成分钟
# 再加16小时
p4pm = (p.asfreq('B','e') - 1).asfreq('T','s') + 16*60
p4pm

Period('2012-01-30 16:00', 'T')

In [45]:
rng = pd.period_range('2011Q3','2012Q4',freq='Q-FEB') # 以Q为单位
ts = Series(np.random.randn(len(rng)),index=rng)
ts

2011Q3    1.008257
2011Q4   -1.430866
2012Q1    0.095793
2012Q2   -1.332407
2012Q3    0.232470
2012Q4    0.312922
Freq: Q-FEB, dtype: float64

In [48]:
rng.start_time

DatetimeIndex(['2010-09-01', '2010-12-01', '2011-03-01', '2011-06-01',
               '2011-09-01', '2011-12-01'],
              dtype='datetime64[ns]', freq='QS-DEC')

In [50]:
rng.end_time

DatetimeIndex(['2010-11-30', '2011-02-28', '2011-05-31', '2011-08-31',
               '2011-11-30', '2012-02-29'],
              dtype='datetime64[ns]', freq='Q-NOV')

In [51]:
new_rng = (rng.asfreq('B','e') - 1).asfreq('T','s') + 16 * 60
new_rng

PeriodIndex(['2010-11-29 16:00', '2011-02-25 16:00', '2011-05-30 16:00',
             '2011-08-30 16:00', '2011-11-29 16:00', '2012-02-28 16:00'],
            dtype='period[T]', freq='T')

In [52]:
ts.index = new_rng
ts

2010-11-29 16:00    1.008257
2011-02-25 16:00   -1.430866
2011-05-30 16:00    0.095793
2011-08-30 16:00   -1.332407
2011-11-29 16:00    0.232470
2012-02-28 16:00    0.312922
Freq: T, dtype: float64

In [53]:
ts.index = new_rng.to_timestamp()
ts

2010-11-29 16:00:00    1.008257
2011-02-25 16:00:00   -1.430866
2011-05-30 16:00:00    0.095793
2011-08-30 16:00:00   -1.332407
2011-11-29 16:00:00    0.232470
2012-02-28 16:00:00    0.312922
dtype: float64

In [None]:
# 将Timestamp转换为Period（及其反向过程）

In [56]:
pd.period_range('1/1/2000',freq='M',periods=3)

PeriodIndex(['2000-01', '2000-02', '2000-03'], dtype='period[M]', freq='M')

In [57]:
pd.date_range('1/1/2000',freq='M',periods=3)

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31'], dtype='datetime64[ns]', freq='M')

In [58]:
rng = pd.date_range('1/1/2000',periods=3,freq='M')
ts = Series(np.random.randn(3),index=rng)
ts

2000-01-31    0.988606
2000-02-29    0.125540
2000-03-31   -0.364573
Freq: M, dtype: float64

In [59]:
pts = ts.to_period()
pts# 因为rng是以月为单位，to_period会把日省略

2000-01    0.988606
2000-02    0.125540
2000-03   -0.364573
Freq: M, dtype: float64

In [60]:
pts.to_timestamp(how='end')# 还原到timestamp

2000-01-31    0.988606
2000-02-29    0.125540
2000-03-31   -0.364573
Freq: M, dtype: float64

In [61]:
rng = pd.date_range('1/29/2000',periods=6,freq='D')
ts2 = Series(np.random.randn(6),index=rng)
ts2

2000-01-29    1.094845
2000-01-30    0.605318
2000-01-31   -0.916735
2000-02-01    0.161802
2000-02-02    0.690065
2000-02-03   -0.167699
Freq: D, dtype: float64

In [62]:
ts2.to_period()

2000-01-29    1.094845
2000-01-30    0.605318
2000-01-31   -0.916735
2000-02-01    0.161802
2000-02-02    0.690065
2000-02-03   -0.167699
Freq: D, dtype: float64

In [63]:
ts2.to_period('M')

2000-01    1.094845
2000-01    0.605318
2000-01   -0.916735
2000-02    0.161802
2000-02    0.690065
2000-02   -0.167699
Freq: M, dtype: float64

In [None]:
# 通过数组来创建PeriodIndex

In [64]:
df = pd.read_csv('../dataset/macrodata.csv')
df.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [65]:
# 根据年份和季度构造索引，12/31为年度结束日
index = pd.PeriodIndex(year=df.year,quarter=df.quarter,freq='Q-DEC')
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [66]:
df.index = index
df.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
1959Q1,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1959Q2,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
1959Q3,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
1959Q4,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
1960Q1,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [67]:
df.tail()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
2008Q3,2008.0,3.0,13324.6,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.27,-3.16,4.33
2008Q4,2008.0,4.0,13141.92,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
2009Q1,2009.0,1.0,12925.41,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
2009Q2,2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19
2009Q3,2009.0,3.0,12990.341,9256.0,1486.398,1044.088,10040.6,216.385,1673.9,0.12,9.6,308.013,3.56,-3.44
