In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

# chap 11.1

In [19]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

datetime.timedelta(days=926, seconds=56700)

In [20]:
delta.days

926

In [21]:
delta.seconds

56700

In [22]:
from datetime import timedelta
start = datetime(2011, 1, 7)

In [23]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [24]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

In [25]:
stamp = datetime(2011, 1, 3)
str(stamp)

'2011-01-03 00:00:00'

In [26]:
stamp.strftime('%Y-%m-%d') #날짜를 문자로

'2011-01-03'

In [27]:
value = '2011-01-03'
datetime.strptime(value, '%Y-%m-%d') #문자를 날짜로

datetime.datetime(2011, 1, 3, 0, 0)

In [28]:
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

In [29]:
from dateutil.parser import parse
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

In [30]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

In [31]:
parse('6/12/2011', dayfirst=True) #day를 먼저 적음

datetime.datetime(2011, 12, 6, 0, 0)

In [32]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [33]:
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [36]:
pd.isnull(idx) #isnull : 결측치 확인 함수

array([False, False,  True])

# chap 11.7 리샘플링과 빈도 변환

In [38]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2000-01-01   -0.009891
2000-01-02   -0.967103
2000-01-03    0.293108
2000-01-04   -1.609566
2000-01-05    0.389995
                ...   
2000-04-05    0.445372
2000-04-06    1.810621
2000-04-07    1.212439
2000-04-08   -1.403786
2000-04-09    0.752452
Freq: D, Length: 100, dtype: float64

In [39]:
ts.resample('M').mean()

2000-01-31   -0.331857
2000-02-29   -0.123476
2000-03-31   -0.020403
2000-04-30    0.630657
Freq: M, dtype: float64

In [40]:
ts.resample('M', kind='period').mean()

2000-01   -0.331857
2000-02   -0.123476
2000-03   -0.020403
2000-04    0.630657
Freq: M, dtype: float64

## 다운샘플링

In [42]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [43]:
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [44]:
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [45]:
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [47]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


## 업샘플링과 보간

In [48]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.16471,0.700593,0.003919,-1.386607
2000-01-12,0.446868,0.017587,-0.712199,-1.090815


In [49]:
df_daily = frame.resample('D').asfreq()
df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.16471,0.700593,0.003919,-1.386607
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.446868,0.017587,-0.712199,-1.090815


In [50]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.16471,0.700593,0.003919,-1.386607
2000-01-06,-0.16471,0.700593,0.003919,-1.386607
2000-01-07,-0.16471,0.700593,0.003919,-1.386607
2000-01-08,-0.16471,0.700593,0.003919,-1.386607
2000-01-09,-0.16471,0.700593,0.003919,-1.386607
2000-01-10,-0.16471,0.700593,0.003919,-1.386607
2000-01-11,-0.16471,0.700593,0.003919,-1.386607
2000-01-12,0.446868,0.017587,-0.712199,-1.090815


In [52]:
frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.16471,0.700593,0.003919,-1.386607
2000-01-06,-0.16471,0.700593,0.003919,-1.386607
2000-01-07,-0.16471,0.700593,0.003919,-1.386607
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.446868,0.017587,-0.712199,-1.090815


In [53]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.16471,0.700593,0.003919,-1.386607
2000-01-13,0.446868,0.017587,-0.712199,-1.090815


## 기간 샘플링

In [54]:
frame = pd.DataFrame(np.random.randn(24, 4),
                     index=pd.period_range('1-2000', '12-2001',
                                           freq='M'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,1.114202,-1.606902,-0.15194,-1.607886
2000-02,-0.746161,-0.555348,-0.682547,-0.313379
2000-03,-1.238509,-1.338406,-0.19568,1.776939
2000-04,0.386784,-1.355881,-1.466301,-0.221949
2000-05,-0.456894,-0.084465,0.598144,-0.106706


In [55]:
annual_frame = frame.resample('A-DEC').mean()
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,-0.370474,-0.60303,0.126859,-0.037422
2001,0.323153,0.180347,-0.007744,0.299343


In [59]:
annual_frame.resample('Q-DEC').ffill()
#Q-DEC : 12월을 연도 마감으로 하는 분기 주기

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,-0.370474,-0.60303,0.126859,-0.037422
2000Q2,-0.370474,-0.60303,0.126859,-0.037422
2000Q3,-0.370474,-0.60303,0.126859,-0.037422
2000Q4,-0.370474,-0.60303,0.126859,-0.037422
2001Q1,0.323153,0.180347,-0.007744,0.299343
2001Q2,0.323153,0.180347,-0.007744,0.299343
2001Q3,0.323153,0.180347,-0.007744,0.299343
2001Q4,0.323153,0.180347,-0.007744,0.299343
