# 时区处理

In [5]:
import pytz
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

In [2]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [3]:
tz = pytz.timezone('US/Pacific')

In [4]:
tz

<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>

## 本地化和转换

In [6]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')

In [7]:
ts = Series(np.random.randn(len(rng)), index=rng)

In [9]:
print(ts.index.tz)

None


In [16]:
pd.date_range('3/9/2012 9:30', periods=10, freq='10D', tz='UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-19 09:30:00+00:00',
               '2012-03-29 09:30:00+00:00', '2012-04-08 09:30:00+00:00',
               '2012-04-18 09:30:00+00:00', '2012-04-28 09:30:00+00:00',
               '2012-05-08 09:30:00+00:00', '2012-05-18 09:30:00+00:00',
               '2012-05-28 09:30:00+00:00', '2012-06-07 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='10D')

In [17]:
ts_utc = ts.tz_localize('UTC')

In [18]:
ts_utc

2012-03-09 09:30:00+00:00   -0.138025
2012-03-10 09:30:00+00:00   -0.272517
2012-03-11 09:30:00+00:00   -0.416808
2012-03-12 09:30:00+00:00    1.032040
2012-03-13 09:30:00+00:00    0.523901
2012-03-14 09:30:00+00:00   -1.430716
Freq: D, dtype: float64

In [19]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [20]:
ts_utc.tz_convert('US/Eastern')

2012-03-09 04:30:00-05:00   -0.138025
2012-03-10 04:30:00-05:00   -0.272517
2012-03-11 05:30:00-04:00   -0.416808
2012-03-12 05:30:00-04:00    1.032040
2012-03-13 05:30:00-04:00    0.523901
2012-03-14 05:30:00-04:00   -1.430716
Freq: D, dtype: float64

In [21]:
ts_eastern = ts.tz_localize('US/Eastern')

In [22]:
ts_eastern

2012-03-09 09:30:00-05:00   -0.138025
2012-03-10 09:30:00-05:00   -0.272517
2012-03-11 09:30:00-04:00   -0.416808
2012-03-12 09:30:00-04:00    1.032040
2012-03-13 09:30:00-04:00    0.523901
2012-03-14 09:30:00-04:00   -1.430716
Freq: D, dtype: float64

In [23]:
ts_eastern.tz_convert('UTC')

2012-03-09 14:30:00+00:00   -0.138025
2012-03-10 14:30:00+00:00   -0.272517
2012-03-11 13:30:00+00:00   -0.416808
2012-03-12 13:30:00+00:00    1.032040
2012-03-13 13:30:00+00:00    0.523901
2012-03-14 13:30:00+00:00   -1.430716
Freq: D, dtype: float64

In [24]:
ts_eastern

2012-03-09 09:30:00-05:00   -0.138025
2012-03-10 09:30:00-05:00   -0.272517
2012-03-11 09:30:00-04:00   -0.416808
2012-03-12 09:30:00-04:00    1.032040
2012-03-13 09:30:00-04:00    0.523901
2012-03-14 09:30:00-04:00   -1.430716
Freq: D, dtype: float64

In [25]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-09 15:30:00+01:00   -0.138025
2012-03-10 15:30:00+01:00   -0.272517
2012-03-11 14:30:00+01:00   -0.416808
2012-03-12 14:30:00+01:00    1.032040
2012-03-13 14:30:00+01:00    0.523901
2012-03-14 14:30:00+01:00   -1.430716
Freq: D, dtype: float64

In [26]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

## 操作时区意识型 Timestamp 对象

In [30]:
stamp = pd.Timestamp('2011-03-12 04:00')

In [31]:
stamp_utc = stamp.tz_localize('utc')

In [33]:
stamp_utc.tz_convert('US/Eastern')

Timestamp('2011-03-11 23:00:00-0500', tz='US/Eastern')

In [34]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')

In [35]:
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [36]:
stamp_utc.value

1299902400000000000

In [38]:
stamp_utc.tz_convert('US/Eastern').value

1299902400000000000

In [40]:
from pandas.tseries.offsets import Hour

In [42]:
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')

In [43]:
stamp

Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [44]:
stamp + Hour()

Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [46]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')

In [47]:
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [48]:
stamp + 2 * Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

## 不同时区之间的运算

In [49]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')

In [50]:
ts = Series(np.random.randn(len(rng)), index=rng)

In [51]:
ts

2012-03-07 09:30:00    0.108390
2012-03-08 09:30:00   -1.041413
2012-03-09 09:30:00    0.361084
2012-03-12 09:30:00    0.241353
2012-03-13 09:30:00   -1.109962
2012-03-14 09:30:00    0.548051
2012-03-15 09:30:00    1.151170
2012-03-16 09:30:00   -1.206106
2012-03-19 09:30:00   -0.290913
2012-03-20 09:30:00   -0.569070
Freq: B, dtype: float64

In [52]:
ts1 = ts[:7].tz_localize('Europe/London')

In [54]:
ts1

2012-03-07 09:30:00+00:00    0.108390
2012-03-08 09:30:00+00:00   -1.041413
2012-03-09 09:30:00+00:00    0.361084
2012-03-12 09:30:00+00:00    0.241353
2012-03-13 09:30:00+00:00   -1.109962
2012-03-14 09:30:00+00:00    0.548051
2012-03-15 09:30:00+00:00    1.151170
Freq: B, dtype: float64

In [53]:
ts2 = ts1[2:].tz_convert('Europe/Moscow')

In [55]:
ts2

2012-03-09 13:30:00+04:00    0.361084
2012-03-12 13:30:00+04:00    0.241353
2012-03-13 13:30:00+04:00   -1.109962
2012-03-14 13:30:00+04:00    0.548051
2012-03-15 13:30:00+04:00    1.151170
Freq: B, dtype: float64

In [56]:
result = ts1 + ts2

In [58]:
result

2012-03-07 09:30:00+00:00         NaN
2012-03-08 09:30:00+00:00         NaN
2012-03-09 09:30:00+00:00    0.722168
2012-03-12 09:30:00+00:00    0.482705
2012-03-13 09:30:00+00:00   -2.219924
2012-03-14 09:30:00+00:00    1.096101
2012-03-15 09:30:00+00:00    2.302340
Freq: B, dtype: float64

In [59]:
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')

# 日期及其算术运算

In [64]:
p = pd.Period(2007, freq='A-DEC')

In [65]:
p

Period('2007', 'A-DEC')

In [66]:
p + 5

Period('2012', 'A-DEC')

In [67]:
p -2

Period('2005', 'A-DEC')

In [69]:
pd.Period('2014', freq='A-DEC') - p

7

In [71]:
rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')

In [72]:
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [74]:
Series(np.random.randn(6), index=rng)

2000-01   -0.571914
2000-02   -1.321160
2000-03   -0.995661
2000-04   -0.301000
2000-05   -0.405346
2000-06    2.032887
Freq: M, dtype: float64

In [75]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [78]:
index = pd.PeriodIndex(values, freq='Q-DEC')

In [80]:
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

## 时期的频率转换

In [81]:
p = pd.Period('2007', freq='A-DEC')

In [82]:
p

Period('2007', 'A-DEC')

In [84]:
p.asfreq('M', how='start')

Period('2007-01', 'M')

In [85]:
p.asfreq('M', how='end')

Period('2007-12', 'M')

In [87]:
p = pd.Period('2007', freq='A-JUN')

In [88]:
p

Period('2007', 'A-JUN')

In [89]:
p.asfreq('M', 'start')

Period('2006-07', 'M')

In [90]:
p.asfreq('M', 'end')

Period('2007-06', 'M')

In [91]:
p = pd.Period('2007-08', 'M')

In [92]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [96]:
rng = pd.period_range('2006', '2009', freq='A-DEC')

In [98]:
ts = Series(np.random.randn(len(rng)), index=rng)

In [99]:
ts

2006   -1.219027
2007   -0.288526
2008    0.985427
2009   -0.561623
Freq: A-DEC, dtype: float64

In [100]:
ts.asfreq('M', how='start')

2006-01   -1.219027
2007-01   -0.288526
2008-01    0.985427
2009-01   -0.561623
Freq: M, dtype: float64

In [101]:
ts.asfreq('B', how='end')

2006-12-29   -1.219027
2007-12-31   -0.288526
2008-12-31    0.985427
2009-12-31   -0.561623
Freq: B, dtype: float64

## 按季度计算的时期频率

In [105]:
p = pd.Period('2012Q4', freq='Q-JAN')

In [106]:
p

Period('2012Q4', 'Q-JAN')

In [107]:
p.asfreq('D', 'start')

Period('2011-11-01', 'D')

In [108]:
p.asfreq('D', 'end')

Period('2012-01-31', 'D')

In [110]:
p4pm = (p.asfreq('B', 'e') -1).asfreq('T', 's') + 16 * 60

In [111]:
p4pm

Period('2012-01-30 16:00', 'T')

In [112]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [113]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')

In [115]:
ts = Series(np.arange(len(rng)), index=rng)

In [116]:
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int64

In [117]:
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [118]:
new_rng

PeriodIndex(['2010-10-28 16:00', '2011-01-28 16:00', '2011-04-28 16:00',
             '2011-07-28 16:00', '2011-10-28 16:00', '2012-01-30 16:00'],
            dtype='period[T]', freq='T')

In [119]:
ts.index = new_rng.to_timestamp()

In [120]:
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int64

## 将 Timestamp 转换为 Period （及其反向过程）

In [123]:
rng = pd.date_range('1/1/2000', periods=3, freq='M')

In [125]:
ts = Series(np.random.randn(3), index=rng)

In [126]:
ts

2000-01-31    1.528898
2000-02-29   -0.266126
2000-03-31   -0.682194
Freq: M, dtype: float64

In [127]:
pts = ts.to_period()

In [128]:
pts

2000-01    1.528898
2000-02   -0.266126
2000-03   -0.682194
Freq: M, dtype: float64

In [129]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')

In [130]:
ts2 = Series(np.random.randn(6), index=rng)

In [131]:
ts2

2000-01-29    0.360011
2000-01-30   -1.186176
2000-01-31    0.046586
2000-02-01   -1.504945
2000-02-02   -0.970356
2000-02-03    1.268902
Freq: D, dtype: float64

In [132]:
ts2.to_period('M')

2000-01    0.360011
2000-01   -1.186176
2000-01    0.046586
2000-02   -1.504945
2000-02   -0.970356
2000-02    1.268902
Freq: M, dtype: float64

In [134]:
pts = ts.to_period()

In [136]:
ts

2000-01-31    1.528898
2000-02-29   -0.266126
2000-03-31   -0.682194
Freq: M, dtype: float64

In [135]:
pts

2000-01    1.528898
2000-02   -0.266126
2000-03   -0.682194
Freq: M, dtype: float64

In [137]:
pts.to_timestamp(how='end')

2000-01-31    1.528898
2000-02-29   -0.266126
2000-03-31   -0.682194
Freq: M, dtype: float64

## 通过数组创建 PeriodIndex

In [138]:
data = pd.read_csv('/Users/wonderful/Desktop/macrodata.csv')

In [139]:
data.year

0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
5      1960.0
6      1960.0
7      1960.0
8      1961.0
9      1961.0
10     1961.0
11     1961.0
12     1962.0
13     1962.0
14     1962.0
15     1962.0
16     1963.0
17     1963.0
18     1963.0
19     1963.0
20     1964.0
21     1964.0
22     1964.0
23     1964.0
24     1965.0
25     1965.0
26     1965.0
27     1965.0
28     1966.0
29     1966.0
        ...  
173    2002.0
174    2002.0
175    2002.0
176    2003.0
177    2003.0
178    2003.0
179    2003.0
180    2004.0
181    2004.0
182    2004.0
183    2004.0
184    2005.0
185    2005.0
186    2005.0
187    2005.0
188    2006.0
189    2006.0
190    2006.0
191    2006.0
192    2007.0
193    2007.0
194    2007.0
195    2007.0
196    2008.0
197    2008.0
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, dtype: float64

In [141]:
data.quarter

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
5      2.0
6      3.0
7      4.0
8      1.0
9      2.0
10     3.0
11     4.0
12     1.0
13     2.0
14     3.0
15     4.0
16     1.0
17     2.0
18     3.0
19     4.0
20     1.0
21     2.0
22     3.0
23     4.0
24     1.0
25     2.0
26     3.0
27     4.0
28     1.0
29     2.0
      ... 
173    2.0
174    3.0
175    4.0
176    1.0
177    2.0
178    3.0
179    4.0
180    1.0
181    2.0
182    3.0
183    4.0
184    1.0
185    2.0
186    3.0
187    4.0
188    1.0
189    2.0
190    3.0
191    4.0
192    1.0
193    2.0
194    3.0
195    4.0
196    1.0
197    2.0
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, dtype: float64

In [142]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')

In [143]:
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [144]:
data.index = index

In [145]:
data.infl

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
1960Q2    0.14
1960Q3    2.70
1960Q4    1.21
1961Q1   -0.40
1961Q2    1.47
1961Q3    0.80
1961Q4    0.80
1962Q1    2.26
1962Q2    0.13
1962Q3    2.11
1962Q4    0.79
1963Q1    0.53
1963Q2    2.75
1963Q3    0.78
1963Q4    2.46
1964Q1    0.13
1964Q2    0.90
1964Q3    1.29
1964Q4    2.05
1965Q1    1.28
1965Q2    2.54
1965Q3    0.89
1965Q4    2.90
1966Q1    4.99
1966Q2    2.10
          ... 
2002Q2    1.56
2002Q3    2.66
2002Q4    3.08
2003Q1    1.31
2003Q2    1.09
2003Q3    2.60
2003Q4    3.02
2004Q1    2.35
2004Q2    3.61
2004Q3    3.58
2004Q4    2.09
2005Q1    4.15
2005Q2    1.85
2005Q3    9.14
2005Q4    0.40
2006Q1    2.60
2006Q2    3.97
2006Q3   -1.58
2006Q4    3.30
2007Q1    4.58
2007Q2    2.75
2007Q3    3.45
2007Q4    6.38
2008Q1    2.82
2008Q2    8.53
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, dtype: float64