# DatetimeIndex
- pd.to_datetime() : 문자열을 날짜/시간의 자료형인 datetime 자료형으로 바꿔주는 함수
- pd.date_range() : 시작일과 종료일 또는 시작일과 기간을 입력하면 범위 내의 인덱스를 자동으로 생성
- 시계열 자료 : 인덱스에 날짜나 시간이 오는 데이터
- 판다스에서 시계열 자료를 생성하려면 인덱스를 DatetimeIndex 자료형으로 만들어야 한다.

In [3]:
import pandas as pd
import numpy as np

In [5]:
date_str = ['2018, 1, 1', '2018, 1, 4', '2018, 1, 5', '2018, 1, 6']

In [6]:
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2018-01-01', '2018-01-04', '2018-01-05', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [7]:
# Series
s = pd.Series(np.random.randn(4), index=idx)
s

2018-01-01    2.194711
2018-01-04   -0.345031
2018-01-05    0.500958
2018-01-06   -0.882424
dtype: float64

In [8]:
pd.date_range('2022-4-2', '2022-4-30')

DatetimeIndex(['2022-04-02', '2022-04-03', '2022-04-04', '2022-04-05',
               '2022-04-06', '2022-04-07', '2022-04-08', '2022-04-09',
               '2022-04-10', '2022-04-11', '2022-04-12', '2022-04-13',
               '2022-04-14', '2022-04-15', '2022-04-16', '2022-04-17',
               '2022-04-18', '2022-04-19', '2022-04-20', '2022-04-21',
               '2022-04-22', '2022-04-23', '2022-04-24', '2022-04-25',
               '2022-04-26', '2022-04-27', '2022-04-28', '2022-04-29',
               '2022-04-30'],
              dtype='datetime64[ns]', freq='D')

In [9]:
pd.date_range(start='2022-10-1', periods = 30)

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30'],
              dtype='datetime64[ns]', freq='D')

#### freq 매개변수로 특정한 날짜만 생성되도록 설정할 수 있다.
- s : second, 초
- T : minute, 분
- H : hour, 시간
- D : day, 일
- B : 주말이 아닌 평일
- W : 주(일요일)
- W-MON : 주(월요일)
- M : 각 달의 마지막 날
- MS : 각 달의 첫날

## shift

In [10]:
ts = pd.Series(np.random.randn(4), index = pd.date_range('2018-1-1', periods=4, freq='M'))

In [11]:
ts

2018-01-31    0.754719
2018-02-28    2.388339
2018-03-31    0.124829
2018-04-30   -0.368038
Freq: M, dtype: float64

In [12]:
ts.shift(-1)

2018-01-31    2.388339
2018-02-28    0.124829
2018-03-31   -0.368038
2018-04-30         NaN
Freq: M, dtype: float64

In [13]:
ts.shift(1, freq='M')

2018-02-28    0.754719
2018-03-31    2.388339
2018-04-30    0.124829
2018-05-31   -0.368038
Freq: M, dtype: float64

In [14]:
ts.shift(1, freq='W')

2018-02-04    0.754719
2018-03-04    2.388339
2018-04-01    0.124829
2018-05-06   -0.368038
dtype: float64

### resample : 날짜나 시간 간격을 재조어하는 기능
- up-sample(업샘플링) : 데이터 양이 증가
- down-sample(다운 샘플링) : 데이터 양이 감소

In [15]:
ts = pd.Series(np.random.randn(100), index = pd.date_range('2018-1-1',
                                                           periods=100, freq='D'))
ts.head(20)

2018-01-01    1.001317
2018-01-02   -1.285987
2018-01-03    0.331437
2018-01-04    0.277442
2018-01-05    0.013249
2018-01-06   -0.989785
2018-01-07   -1.070277
2018-01-08   -0.179702
2018-01-09   -0.151596
2018-01-10   -0.548379
2018-01-11   -0.829374
2018-01-12    0.744920
2018-01-13   -0.381845
2018-01-14   -0.164979
2018-01-15   -0.758922
2018-01-16   -0.733977
2018-01-17   -0.857703
2018-01-18    1.480951
2018-01-19    1.774957
2018-01-20   -1.685165
Freq: D, dtype: float64

In [16]:
ts.resample('M').first()

2018-01-31    1.001317
2018-02-28    0.442081
2018-03-31    0.901889
2018-04-30   -0.022551
Freq: M, dtype: float64

In [None]:
##