# DatetimeIndex  (pandas에서 제공)
- pd.to_datatime() : 문자열을 날짜/시간의 자료형인 datatime 자료형으로 바꿔주는 함수
- pd.data_range()  : 시작일과 종료일 또는 사작일과 기간을 입력하면 범위내의 인덱스를 자동으로 생성

- 시계열 자료 : 인덱스에 날짜나 시간이 오는 데이터
- 판다스에서 시계열 자료를 생성하려면 인덱스를 DatatimeIndex 자료형으로 만들어야 한다

In [4]:
import numpy as np
import pandas as pd

In [10]:
date_str = ['2018, 1, 1', '2018, 1, 4', '2018, 1, 5', '2018, 1, 6']
print(type(date_str))

<class 'list'>


In [11]:
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2018-01-01', '2018-01-04', '2018-01-05', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [14]:
# Series
s = pd.Series(np.random.randn(4), index=idx)
s

2018-01-01   -0.158310
2018-01-04   -1.268831
2018-01-05    0.777561
2018-01-06   -1.534907
dtype: float64

In [16]:
#시계열 데이터 생성
pd.date_range('2022-4-2', '2022-4-30')

DatetimeIndex(['2022-04-02', '2022-04-03', '2022-04-04', '2022-04-05',
               '2022-04-06', '2022-04-07', '2022-04-08', '2022-04-09',
               '2022-04-10', '2022-04-11', '2022-04-12', '2022-04-13',
               '2022-04-14', '2022-04-15', '2022-04-16', '2022-04-17',
               '2022-04-18', '2022-04-19', '2022-04-20', '2022-04-21',
               '2022-04-22', '2022-04-23', '2022-04-24', '2022-04-25',
               '2022-04-26', '2022-04-27', '2022-04-28', '2022-04-29',
               '2022-04-30'],
              dtype='datetime64[ns]', freq='D')

In [17]:
#시계열 데이터 생성       시작            끝
pd.date_range(start = '2022-10-1', periods = 30) # 범위

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30'],
              dtype='datetime64[ns]', freq='D')

#### freq 매개변수로 특정한 날짜만 생성되도록 설정할 수 있다
- s : second, 초
- T : minute, 분
- H : hour  , 시간
- D : day   , 일
- B : 평일
- W : 일요일
- W-MON : (월요일)
- M : 각 달의 마지막날
- MS : 각 달의 첫날

In [18]:
pd.date_range('2022-10-1', '2022-10-31', freq='D')

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30', '2022-10-31'],
              dtype='datetime64[ns]', freq='D')

In [23]:
pd.date_range('2022-10-1', '2022-10-31', freq='W')

DatetimeIndex(['2022-10-02', '2022-10-09', '2022-10-16', '2022-10-23',
               '2022-10-30'],
              dtype='datetime64[ns]', freq='W-SUN')

In [24]:
pd.date_range('2022-10-1', '2022-10-31', freq='W-MON')

DatetimeIndex(['2022-10-03', '2022-10-10', '2022-10-17', '2022-10-24',
               '2022-10-31'],
              dtype='datetime64[ns]', freq='W-MON')

### shift 
- 시계열 데이터의 인덱스는 shift

In [26]:
ts = pd.Series(np.random.randn(4), index = pd.date_range('2018-1-1', periods = 4, freq = 'M'))
ts

2018-01-31    0.579314
2018-02-28    0.895975
2018-03-31    2.730436
2018-04-30   -0.817324
Freq: M, dtype: float64

In [28]:
ts.shift(1)

2018-01-31         NaN
2018-02-28    0.579314
2018-03-31    0.895975
2018-04-30    2.730436
Freq: M, dtype: float64

In [30]:
ts.shift(-1)

2018-01-31    0.895975
2018-02-28    2.730436
2018-03-31   -0.817324
2018-04-30         NaN
Freq: M, dtype: float64

In [41]:
ts.shift(1, freq='M') #***********************

2018-02-28    0.579314
2018-03-31    0.895975
2018-04-30    2.730436
2018-05-31   -0.817324
Freq: M, dtype: float64

In [35]:
ts.shift(1, freq='W') #***********************

2018-02-04    0.579314
2018-03-04    0.895975
2018-04-01    2.730436
2018-05-06   -0.817324
dtype: float64

### resample : 날짜나 시간 간격을 재조정하는 기능
- up-sample(업샘플링)  :  데이터 양이 증가
- down-sample(아운 샘플링 : 데이터 양이 감소, 간격이 커지기 떄문이다.

In [42]:
ts = pd.Series(np.random.randn(100), index = pd.date_range('2018-1-1', periods = 100, freq = 'D'))
ts.head(20)

2018-01-01   -0.152254
2018-01-02   -0.011183
2018-01-03   -1.421842
2018-01-04   -0.847322
2018-01-05    0.794151
2018-01-06   -1.344438
2018-01-07    1.192402
2018-01-08   -0.558989
2018-01-09    0.083587
2018-01-10    1.816245
2018-01-11   -0.048219
2018-01-12    0.710435
2018-01-13   -0.704013
2018-01-14    0.267260
2018-01-15    0.429285
2018-01-16   -1.427302
2018-01-17   -0.862682
2018-01-18   -0.303900
2018-01-19    0.347044
2018-01-20   -0.470446
Freq: D, dtype: float64

In [43]:
# 다운 샘플링의 경우에는 원래의 데이터가 그룹화
ts.resample('W').mean() #****************

2018-01-07   -0.255784
2018-01-14    0.223758
2018-01-21   -0.121039
2018-01-28    0.745609
2018-02-04    0.676583
2018-02-11    0.345922
2018-02-18   -0.636234
2018-02-25    0.189369
2018-03-04   -0.520725
2018-03-11   -0.775972
2018-03-18    0.801139
2018-03-25    0.463726
2018-04-01   -0.055283
2018-04-08   -0.519113
2018-04-15    0.696919
Freq: W-SUN, dtype: float64

In [45]:
ts.resample('M').first() #****************

2018-01-31   -0.152254
2018-02-28    1.610807
2018-03-31    0.683443
2018-04-30   -1.396298
Freq: M, dtype: float64