In [3]:
import pandas as pd
import numpy as np

In [2]:
import pytz

In [3]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [5]:
tz = pytz.timezone('America/New_York')

In [6]:
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

In [7]:
# 11.4.1 시간대 지역화와 변환

In [8]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')

ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [9]:
ts

2012-03-09 09:30:00    2.325487
2012-03-10 09:30:00   -1.116720
2012-03-11 09:30:00   -1.211968
2012-03-12 09:30:00    1.049266
2012-03-13 09:30:00    0.335826
2012-03-14 09:30:00    0.712281
Freq: D, dtype: float64

In [10]:
print(ts.index.tz)

None


In [11]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [12]:
# 지역화 시간으로의 변환은 tz_localize 메서드로 처리가능

In [13]:
ts

2012-03-09 09:30:00    2.325487
2012-03-10 09:30:00   -1.116720
2012-03-11 09:30:00   -1.211968
2012-03-12 09:30:00    1.049266
2012-03-13 09:30:00    0.335826
2012-03-14 09:30:00    0.712281
Freq: D, dtype: float64

In [14]:
ts_utc = ts.tz_localize('UTC')

In [15]:
ts_utc

2012-03-09 09:30:00+00:00    2.325487
2012-03-10 09:30:00+00:00   -1.116720
2012-03-11 09:30:00+00:00   -1.211968
2012-03-12 09:30:00+00:00    1.049266
2012-03-13 09:30:00+00:00    0.335826
2012-03-14 09:30:00+00:00    0.712281
Freq: D, dtype: float64

In [16]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [18]:
ts_utc.tz_convert('America/New_York')   # 지역화 후, convert를 사용하면 다른 시간대로 변경가능

2012-03-09 04:30:00-05:00    2.325487
2012-03-10 04:30:00-05:00   -1.116720
2012-03-11 05:30:00-04:00   -1.211968
2012-03-12 05:30:00-04:00    1.049266
2012-03-13 05:30:00-04:00    0.335826
2012-03-14 05:30:00-04:00    0.712281
Freq: D, dtype: float64

In [19]:
ts_eastern = ts.tz_localize('America/New_York')

In [20]:
ts_eastern.tz_convert('UTC')

2012-03-09 14:30:00+00:00    2.325487
2012-03-10 14:30:00+00:00   -1.116720
2012-03-11 13:30:00+00:00   -1.211968
2012-03-12 13:30:00+00:00    1.049266
2012-03-13 13:30:00+00:00    0.335826
2012-03-14 13:30:00+00:00    0.712281
dtype: float64

In [21]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-09 15:30:00+01:00    2.325487
2012-03-10 15:30:00+01:00   -1.116720
2012-03-11 14:30:00+01:00   -1.211968
2012-03-12 14:30:00+01:00    1.049266
2012-03-13 14:30:00+01:00    0.335826
2012-03-14 14:30:00+01:00    0.712281
dtype: float64

In [22]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq=None)

In [23]:
# 11.4.2 시간대를 고려해서 Timestamp 객체 다루기

In [24]:
stamp = pd.Timestamp('2011-03-12 04:00')

In [25]:
stamp_utc = stamp.tz_localize('utc')

In [26]:
stamp_utc.tz_convert('America/New_York')

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [27]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')

In [28]:
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [29]:
stamp_utc.value

1299902400000000000

In [31]:
stamp_utc.tz_convert('America/New_York').value  # 값이 바뀌지 않고 동일

1299902400000000000

In [32]:
from pandas.tseries.offsets import Hour

In [33]:
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp

Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [34]:
stamp + Hour()

Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [35]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [36]:
stamp + 2 * Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

In [37]:
# 11.4.3 다른 시간대의 연산

In [21]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')

In [22]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [27]:
ts_lon = ts.tz_localize('America/New_York')

In [35]:
ts1 = ts_lon[:7].tz_convert('Europe/London')
ts1

2012-03-07 09:30:00+00:00   -1.907034
2012-03-08 09:30:00+00:00   -0.140724
2012-03-09 09:30:00+00:00   -0.263336
2012-03-12 09:30:00+00:00   -0.233213
2012-03-13 09:30:00+00:00    1.273402
2012-03-14 09:30:00+00:00    0.801555
2012-03-15 09:30:00+00:00   -0.645299
dtype: float64

In [36]:
ts2 = ts_lon[2:].tz_convert('Europe/Moscow')
ts2

2012-03-09 13:30:00+04:00   -0.263336
2012-03-12 13:30:00+04:00   -0.233213
2012-03-13 13:30:00+04:00    1.273402
2012-03-14 13:30:00+04:00    0.801555
2012-03-15 13:30:00+04:00   -0.645299
2012-03-16 13:30:00+04:00    0.338384
2012-03-19 13:30:00+04:00    0.703581
2012-03-20 13:30:00+04:00    0.214767
dtype: float64

In [37]:
result = ts1 + ts2

In [38]:
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-19 09:30:00+00:00', '2012-03-20 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [57]:
# 기간과 기간 연산

In [67]:
p = pd.Period(2007, freq='A-DEC')

In [68]:
p

Period('2007', 'A-DEC')

In [69]:
p + 5

Period('2012', 'A-DEC')

In [70]:
p - 2

Period('2005', 'A-DEC')

In [71]:
pd.Period('2014', freq='A-DEC') - p  # 6년의 차이가 난다. -> 바뀜 책과 다름

<7 * YearEnds: month=12>

In [72]:
rng2 = pd.period_range('2000-01-01', '2000-06-30', freq='M')

In [73]:
rng2

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [74]:
pd.Series(np.random.randn(6), index=rng2)

2000-01   -0.090118
2000-02    1.095542
2000-03   -0.240521
2000-04    1.023326
2000-05   -0.087211
2000-06    0.631958
Freq: M, dtype: float64

In [75]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [77]:
index = pd.PeriodIndex(values, freq='Q-DEC')

In [78]:
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

In [79]:
# 11.5.1 Period의 빈도 변환

In [80]:
p

Period('2007', 'A-DEC')

In [83]:
p.asfreq('M', how='start') # 연간 빈도를 월간 빈도로 변환

Period('2007-01', 'M')

In [84]:
p.asfreq('M', how='end')

Period('2007-12', 'M')

In [85]:
p1 = pd.Period('2007', freq='A-JUN') # A-JUN 6월

In [86]:
p1

Period('2007', 'A-JUN')

In [87]:
p1.asfreq('M', 'start')

Period('2006-07', 'M')

In [88]:
p1.asfreq('M', 'end')

Period('2007-06', 'M')

In [89]:
p2 = pd.Period('Aug-2007', 'M')

In [90]:
p2.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [91]:
rng3= pd.period_range('2006', '2009', freq='A-DEC')

In [93]:
ts4 = pd.Series(np.random.randn(len(rng3)), index=rng3)

In [94]:
ts4

2006   -1.030375
2007   -0.607964
2008   -0.276414
2009    0.146860
Freq: A-DEC, dtype: float64

In [95]:
ts4.asfreq('M', how='start')

2006-01   -1.030375
2007-01   -0.607964
2008-01   -0.276414
2009-01    0.146860
Freq: M, dtype: float64

In [96]:
ts4.asfreq('B', how='end')

2006-12-29   -1.030375
2007-12-31   -0.607964
2008-12-31   -0.276414
2009-12-31    0.146860
Freq: B, dtype: float64

In [97]:
# 11.5.2 분기 빈도

In [98]:
p3 = pd.Period('2012Q4', freq='Q-JAN')  # 분기의 시작을 2월(JAN의 다음달) 로 설정한다.

In [99]:
p3

Period('2012Q4', 'Q-JAN')

In [100]:
p3.asfreq('D', 'start')

Period('2011-11-01', 'D')

In [101]:
p3.asfreq('D', 'end')

Period('2012-01-31', 'D')

In [102]:
p4pm = (p3.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [103]:
p4pm  # 분기 영업 마감일의 오후 4시

Period('2012-01-30 16:00', 'T')

In [104]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [105]:
rng4 = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')

In [106]:
ts5 = pd.Series(np.random.randn(len(rng4)), index=rng4)

In [107]:
ts5

2011Q3   -0.024578
2011Q4    0.252440
2012Q1   -0.472483
2012Q2    1.063581
2012Q3   -0.390465
2012Q4   -0.484375
Freq: Q-JAN, dtype: float64

In [108]:
new_rng = (rng4.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [109]:
ts5.index = new_rng.to_timestamp()

In [110]:
ts5

2010-10-28 16:00:00   -0.024578
2011-01-28 16:00:00    0.252440
2011-04-28 16:00:00   -0.472483
2011-07-28 16:00:00    1.063581
2011-10-28 16:00:00   -0.390465
2012-01-30 16:00:00   -0.484375
dtype: float64

In [111]:
# 타임 스탬프와 기간 서로 변환하기

In [113]:
rng5 = pd.date_range('2000-01-01', periods=3, freq='M')

In [114]:
ts6 = pd.Series(np.random.randn(3), index=rng5)

In [115]:
ts6

2000-01-31   -0.096263
2000-02-29   -0.439319
2000-03-31   -0.994577
Freq: M, dtype: float64

In [116]:
pts = ts6.to_period()

In [117]:
pts

2000-01   -0.096263
2000-02   -0.439319
2000-03   -0.994577
Freq: M, dtype: float64

In [118]:
rng6 = pd.date_range('1/29/2000', periods=6, freq='D')

In [119]:
ts7 = pd.Series(np.random.randn(6), index=rng6)

In [120]:
ts7

2000-01-29   -0.625124
2000-01-30    0.212752
2000-01-31    2.017949
2000-02-01   -1.531488
2000-02-02    1.195995
2000-02-03    1.268876
Freq: D, dtype: float64

In [122]:
ts7.to_period('M')

2000-01   -0.625124
2000-01    0.212752
2000-01    2.017949
2000-02   -1.531488
2000-02    1.195995
2000-02    1.268876
Freq: M, dtype: float64

In [123]:
pts = ts7.to_period()

In [124]:
pts

2000-01-29   -0.625124
2000-01-30    0.212752
2000-01-31    2.017949
2000-02-01   -1.531488
2000-02-02    1.195995
2000-02-03    1.268876
Freq: D, dtype: float64

In [125]:
pts.to_timestamp(how='end')

2000-01-29 23:59:59.999999999   -0.625124
2000-01-30 23:59:59.999999999    0.212752
2000-01-31 23:59:59.999999999    2.017949
2000-02-01 23:59:59.999999999   -1.531488
2000-02-02 23:59:59.999999999    1.195995
2000-02-03 23:59:59.999999999    1.268876
Freq: D, dtype: float64

In [126]:
# 11.5.4 배열로 periodIndex 생성하기

In [127]:
data = pd.read_csv('pydata-book/examples/macrodata.csv')

In [128]:
data

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2008.0,3.0,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
199,2008.0,4.0,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
200,2009.0,1.0,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
201,2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [131]:
data.year

0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
        ...  
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64

In [132]:
data.quarter

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
      ... 
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [133]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                      freq='Q-DEC')

In [134]:
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [135]:
data.index = index

In [136]:
data.infl

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
          ... 
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, Length: 203, dtype: float64