### 시계열 데이터

In [23]:
import numpy as np
import pandas as pd

df=pd.read_csv('./data/stock-data.csv')
print(df.info())  #자료형 확인

#1. Date 열의 값을 시계열로 변경해서 추가
df['newDate'] = pd.to_datetime(df['Date']) #생긴건 같지만 info()를 찍어보면 type이 다르다.

#2. 위와 같은 데이터프레임에서는 날짜를 index로 설정하는 경우가 많음
df.set_index('newDate', inplace=True)
df.drop('Date', axis=1, inplace=True)
df

#3. 일정한 간격을 갖는 날짜 문자열 만들기
dates = ['2017-03-01', '2017-06-01', '2017-08-05']

#3-1. 날짜로 변경
pddates = pd.to_datetime(dates)
pddates

#3-2. Period로 변환하기
pdperiod = pddates.to_period(freq='D')
print(pdperiod)
pdperiod = pddates.to_period(freq='M')
print(pdperiod)
pdperiod = pddates.to_period(freq='Q')
print(pdperiod)
pdperiod = pddates.to_period(freq='A')
print(pdperiod)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
Date      20 non-null object
Close     20 non-null int64
Start     20 non-null int64
High      20 non-null int64
Low       20 non-null int64
Volume    20 non-null int64
dtypes: int64(5), object(1)
memory usage: 1.1+ KB
None
PeriodIndex(['2017-03-01', '2017-06-01', '2017-08-05'], dtype='period[D]', freq='D')
PeriodIndex(['2017-03', '2017-06', '2017-08'], dtype='period[M]', freq='M')
PeriodIndex(['2017Q1', '2017Q2', '2017Q3'], dtype='period[Q-DEC]', freq='Q-DEC')
PeriodIndex(['2017', '2017', '2017'], dtype='period[A-DEC]', freq='A-DEC')


### date_Range()

In [35]:
ts_ms = pd.date_range(start='2018-10-01',end=None, periods=12,freq='D')
ts_ms

DatetimeIndex(['2018-10-01', '2018-10-02', '2018-10-03', '2018-10-04',
               '2018-10-05', '2018-10-06', '2018-10-07', '2018-10-08',
               '2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12'],
              dtype='datetime64[ns]', freq='D')

### 필요한 날짜 부분 추출하기

In [39]:
df=pd.read_csv('./data/stock-data.csv')
df['newDate'] = pd.to_datetime(df['Date'])

df['year'] = df['newDate'].dt.year #필요한 부분을 dt.month와 같은 명령어로 자를 수 있다.
df

Unnamed: 0,Date,Close,Start,High,Low,Volume,newDate,year
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29,2018
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28,2018
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27,2018
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26,2018
5,2018-06-25,11150,11400,11450,11000,55519,2018-06-25,2018
6,2018-06-22,11300,11250,11450,10750,134805,2018-06-22,2018
7,2018-06-21,11200,11350,11750,11200,133002,2018-06-21,2018
8,2018-06-20,11550,11200,11600,10900,308596,2018-06-20,2018
9,2018-06-19,11300,11850,11950,11300,180656,2018-06-19,2018


### 파이썬으로 시계열 자료 가공

In [54]:
from datetime import datetime

dates = [datetime(2017, 1, 1), datetime(2017, 2, 4), datetime(2017, 3, 5)]
ts = pd.Series(np.random.randn(3),index=dates)
ts.shift(1)

2017-01-01         NaN
2017-02-04    0.583432
2017-03-05   -1.223673
dtype: float64

### 비정상 시계열의 차분

In [66]:
ran = pd.date_range('11/3/2020', periods=20, freq='T')
ran

#정상 시계열 자료
ts = pd.Series(np.arange(20), index=ran)
ts

# 7분 단위로 기술통계량 계산하기
ts.resample('7T').sum() 
ts.resample('7T').mean()
ts.resample('7T').std()



2020-11-03 00:00:00    2.160247
2020-11-03 00:07:00    2.160247
2020-11-03 00:14:00    1.870829
Freq: 7T, dtype: float64