## 11.4 시간대 다루기

In [1]:
import numpy as np
import pandas as pd

In [2]:
from datetime import datetime

In [3]:
import pytz

In [4]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [6]:
tz = pytz.timezone('America/New_York')
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

### 시간대 지역화와 변환

In [7]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')

In [9]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)
ts

2012-03-09 09:30:00    0.046993
2012-03-10 09:30:00   -1.233451
2012-03-11 09:30:00   -0.689276
2012-03-12 09:30:00    2.482981
2012-03-13 09:30:00   -2.966138
2012-03-14 09:30:00    0.842986
Freq: D, dtype: float64

In [10]:
print(ts.index.tz)

None


In [11]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [12]:
ts

2012-03-09 09:30:00    0.046993
2012-03-10 09:30:00   -1.233451
2012-03-11 09:30:00   -0.689276
2012-03-12 09:30:00    2.482981
2012-03-13 09:30:00   -2.966138
2012-03-14 09:30:00    0.842986
Freq: D, dtype: float64

In [13]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-09 09:30:00+00:00    0.046993
2012-03-10 09:30:00+00:00   -1.233451
2012-03-11 09:30:00+00:00   -0.689276
2012-03-12 09:30:00+00:00    2.482981
2012-03-13 09:30:00+00:00   -2.966138
2012-03-14 09:30:00+00:00    0.842986
Freq: D, dtype: float64

In [15]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [17]:
ts_utc.tz_convert('America/New_York')

2012-03-09 04:30:00-05:00    0.046993
2012-03-10 04:30:00-05:00   -1.233451
2012-03-11 05:30:00-04:00   -0.689276
2012-03-12 05:30:00-04:00    2.482981
2012-03-13 05:30:00-04:00   -2.966138
2012-03-14 05:30:00-04:00    0.842986
Freq: D, dtype: float64

In [18]:
ts_eastern = ts.tz_localize('America/New_York')

In [19]:
ts_eastern.tz_convert('UTC')

2012-03-09 14:30:00+00:00    0.046993
2012-03-10 14:30:00+00:00   -1.233451
2012-03-11 13:30:00+00:00   -0.689276
2012-03-12 13:30:00+00:00    2.482981
2012-03-13 13:30:00+00:00   -2.966138
2012-03-14 13:30:00+00:00    0.842986
Freq: D, dtype: float64

In [20]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-09 15:30:00+01:00    0.046993
2012-03-10 15:30:00+01:00   -1.233451
2012-03-11 14:30:00+01:00   -0.689276
2012-03-12 14:30:00+01:00    2.482981
2012-03-13 14:30:00+01:00   -2.966138
2012-03-14 14:30:00+01:00    0.842986
Freq: D, dtype: float64

In [21]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

### 시간대를 고려해서 Timestamp 객체 다루기

In [22]:
stamp = pd.Timestamp('2011-03-12 04:00')

In [23]:
stamp_utc = stamp.tz_localize('utc')

In [24]:
stamp_utc.tz_convert('America/New_York')

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [25]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz = 'Europe/Moscow')
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [26]:
stamp_utc.value

1299902400000000000

In [27]:
stamp_utc.tz_convert('America/New_York').value

1299902400000000000

In [28]:
from pandas.tseries.offsets import Hour

In [30]:
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp

Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [31]:
stamp + Hour()

Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [32]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [33]:
stamp + 2 * Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

### 다른 시간대 간의 연산

In [34]:
rng = pd.date_range('3/7/2012 9:30', periods = 10, freq= 'B')

In [35]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-07 09:30:00    1.104853
2012-03-08 09:30:00    0.600447
2012-03-09 09:30:00   -0.095210
2012-03-12 09:30:00    0.146973
2012-03-13 09:30:00    0.585403
2012-03-14 09:30:00    1.050800
2012-03-15 09:30:00    1.516842
2012-03-16 09:30:00   -0.093683
2012-03-19 09:30:00    0.092417
2012-03-20 09:30:00    1.854816
Freq: B, dtype: float64

In [39]:
ts1 = ts[:7].tz_localize('Europe/London')

In [40]:
ts2 = ts[2:].tz_localize('Europe/Moscow')

In [42]:
result = ts1 + ts2
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 05:30:00+00:00', '2012-03-09 09:30:00+00:00',
               '2012-03-12 05:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 05:30:00+00:00', '2012-03-13 09:30:00+00:00',
               '2012-03-14 05:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 05:30:00+00:00', '2012-03-15 09:30:00+00:00',
               '2012-03-16 05:30:00+00:00', '2012-03-19 05:30:00+00:00',
               '2012-03-20 05:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

### 11.5 기간과 기간 연산

In [43]:
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [44]:
p +5

Period('2012', 'A-DEC')

In [45]:
p -2

Period('2005', 'A-DEC')

In [46]:
pd.Period('2014', freq='A-DEC') - p

<7 * YearEnds: month=12>

In [49]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [51]:
index = pd.PeriodIndex(values, freq = 'Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

### Period의 빈도 변환

In [52]:
p = pd.Period('2007', freq='A-DEC')
p

Period('2007', 'A-DEC')

In [53]:
p.asfreq('M',how='start')

Period('2007-01', 'M')

In [54]:
p.asfreq('M',how='end')

Period('2007-12', 'M')

In [55]:
p = pd.Period('2007', freq='A-JUN')
p

Period('2007', 'A-JUN')

In [56]:
p.asfreq('M','start')

Period('2006-07', 'M')

In [58]:
p.asfreq('M','end')

Period('2007-06', 'M')

In [59]:
p = pd.Period('Aug-2007', 'M')

In [60]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [61]:
rng = pd.period_range('2006', '2009', freq = 'A-DEC')

In [63]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2006   -1.703667
2007   -0.645060
2008   -0.473575
2009   -0.543547
Freq: A-DEC, dtype: float64

In [64]:
ts.asfreq('M',how='start')

2006-01   -1.703667
2007-01   -0.645060
2008-01   -0.473575
2009-01   -0.543547
Freq: M, dtype: float64

In [65]:
ts.asfreq('B',how='end')

2006-12-29   -1.703667
2007-12-31   -0.645060
2008-12-31   -0.473575
2009-12-31   -0.543547
Freq: B, dtype: float64

### 분기 빈도

In [66]:
p = pd.Period('2012Q4', freq='Q-JAN')
p

Period('2012Q4', 'Q-JAN')

In [67]:
p.asfreq('D','start')

Period('2011-11-01', 'D')

In [68]:
p.asfreq('D','end')

Period('2012-01-31', 'D')

In [69]:
p4pm = (p.asfreq('B','e') -1).asfreq('T','s')+16 *60
p4pm

Period('2012-01-30 16:00', 'T')

In [70]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [71]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')

In [73]:
ts = pd.Series(np.arange(len(rng)), index=rng)
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int32

In [75]:
new_rng = (rng.asfreq('B','e') -1).asfreq('T','s') + 16 * 60

In [76]:
ts.index = new_rng.to_timestamp()
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int32

### 타임스탬프와 기간 서로 변환하기

In [77]:
rng = pd.date_range('2001-01-01', periods=3, freq='M')

In [80]:
ts = pd.Series(np.random.randn(3), index=rng)
ts

2001-01-31    1.156740
2001-02-28    1.607081
2001-03-31    0.050500
Freq: M, dtype: float64

In [82]:
pts = ts.to_period()
pts

2001-01    1.156740
2001-02    1.607081
2001-03    0.050500
Freq: M, dtype: float64

In [83]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')

In [85]:
ts2 = pd.Series(np.random.randn(6), index=rng)
ts2

2000-01-29    0.365466
2000-01-30   -0.437753
2000-01-31   -0.338602
2000-02-01   -0.243884
2000-02-02    0.492856
2000-02-03    0.002249
Freq: D, dtype: float64

In [86]:
ts2.to_period('M')

2000-01    0.365466
2000-01   -0.437753
2000-01   -0.338602
2000-02   -0.243884
2000-02    0.492856
2000-02    0.002249
Freq: M, dtype: float64

In [88]:
pts = ts2.to_period()
pts

2000-01-29    0.365466
2000-01-30   -0.437753
2000-01-31   -0.338602
2000-02-01   -0.243884
2000-02-02    0.492856
2000-02-03    0.002249
Freq: D, dtype: float64

In [89]:
pts.to_timestamp(how='end')

2000-01-29 23:59:59.999999999    0.365466
2000-01-30 23:59:59.999999999   -0.437753
2000-01-31 23:59:59.999999999   -0.338602
2000-02-01 23:59:59.999999999   -0.243884
2000-02-02 23:59:59.999999999    0.492856
2000-02-03 23:59:59.999999999    0.002249
Freq: D, dtype: float64