# 예제 5-15 문자열을 Timestamp로 변환 

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('stock-data.csv')

### 데이터 내용 및 자료형 확인 

In [3]:
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume
0,2018-07-02,10100,10850,10900,10000,137977
1,2018-06-29,10700,10550,10900,9990,170253
2,2018-06-28,10400,10900,10950,10150,155769
3,2018-06-27,10900,10800,11050,10500,133548
4,2018-06-26,10800,10900,11000,10700,63039


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 944.0+ bytes


Date열의 자료형이 object임을 알 수 있다.

이를 Timestamp로 변환한다.

### 문자열 데이터 (시리즈 객체)를 판다스 Timestamp로 변환

In [6]:
df['new_Date'] = pd.to_datetime(df['Date'])

### 데이터 내용 및 자료형 확인 

In [7]:
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume,new_Date
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      20 non-null     object        
 1   Close     20 non-null     int64         
 2   Start     20 non-null     int64         
 3   High      20 non-null     int64         
 4   Low       20 non-null     int64         
 5   Volume    20 non-null     int64         
 6   new_Date  20 non-null     datetime64[ns]
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 1.1+ KB


### 시계열 값으로 반환된 열을 새로운 행 인덱스로 지정. 
기존 날짜 열은 삭제

In [9]:
df.set_index('new_Date', inplace=True)
df.drop('Date', axis=1, inplace=True)

### 데이터 내용 및 자료형 확인 

In [10]:
df.head()

Unnamed: 0_level_0,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-07-02,10100,10850,10900,10000,137977
2018-06-29,10700,10550,10900,9990,170253
2018-06-28,10400,10900,10950,10150,155769
2018-06-27,10900,10800,11050,10500,133548
2018-06-26,10800,10900,11000,10700,63039


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 20 entries, 2018-07-02 to 2018-06-01
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Close   20 non-null     int64
 1   Start   20 non-null     int64
 2   High    20 non-null     int64
 3   Low     20 non-null     int64
 4   Volume  20 non-null     int64
dtypes: int64(5)
memory usage: 960.0 bytes


# 예제 5-16 Timestamp를 Period로 변환 

### 날짜 형식의 문자열로 구성되는 리스트 정의 

In [12]:
dates = ['2019-01-01', '2020-03-01', '2021-06-21']

### 문자열의 배열(시리즈 객체)을 판다스 Timestamp로 변환 

In [13]:
ts_dates = pd.to_datetime(dates)
ts_dates

DatetimeIndex(['2019-01-01', '2020-03-01', '2021-06-21'], dtype='datetime64[ns]', freq=None)

### Timestamp를 Period로 변환 

In [14]:
pr_day = ts_dates.to_period(freq='D')
pr_day

PeriodIndex(['2019-01-01', '2020-03-01', '2021-06-21'], dtype='period[D]', freq='D')

In [16]:
pr_month = ts_dates.to_period(freq='M')
pr_month

PeriodIndex(['2019-01', '2020-03', '2021-06'], dtype='period[M]', freq='M')

In [15]:
pr_year = ts_dates.to_period(freq='A')
pr_year

PeriodIndex(['2019', '2020', '2021'], dtype='period[A-DEC]', freq='A-DEC')

# 예제 5-17 Timestamp 배열 만들기 

In [2]:
import pandas as pd

### Timestamp의 배열 만들기 - 월 간격, 월의 시작일 기준 

In [17]:
ts_ms = pd.date_range(start='2019-01-01', # 날짜 범위 시작
                     end=None, # 날짜 범위 끝
                     periods=6, # 생성할 Timestamp 개수
                     freq='MS', # 시간 간격(MS: 월의 시작일)
                     tz='Asia/Seoul') # 시간대(timezone)
ts_ms

DatetimeIndex(['2019-01-01 00:00:00+09:00', '2019-02-01 00:00:00+09:00',
               '2019-03-01 00:00:00+09:00', '2019-04-01 00:00:00+09:00',
               '2019-05-01 00:00:00+09:00', '2019-06-01 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='MS')

### 월 간격, 월의 마지막 날 기준 

In [18]:
ts_me = pd.date_range('2019-01-01', periods=6,
                     freq='M', # 시간 간격(M: 월의 마지막 날)
                     tz='Asia/Seoul')
ts_me

DatetimeIndex(['2019-01-31 00:00:00+09:00', '2019-02-28 00:00:00+09:00',
               '2019-03-31 00:00:00+09:00', '2019-04-30 00:00:00+09:00',
               '2019-05-31 00:00:00+09:00', '2019-06-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='M')

### 분기(3개월) 간격, 월의 마지막 날 기준 

In [3]:
ts_3m = pd.date_range('2019-01-01', periods=6,
                     freq='3M',
                     tz='Asia/Seoul')
ts_3m

DatetimeIndex(['2019-01-31 00:00:00+09:00', '2019-04-30 00:00:00+09:00',
               '2019-07-31 00:00:00+09:00', '2019-10-31 00:00:00+09:00',
               '2020-01-31 00:00:00+09:00', '2020-04-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='3M')

# 예제 5-18 Period 배열 만들기 - 1개월 길이 

In [4]:
pr_m = pd.period_range(start='2019-01-01',
                      end=None,
                      periods=3,
                      freq='M')
pr_m

PeriodIndex(['2019-01', '2019-02', '2019-03'], dtype='period[M]', freq='M')

### period 배열 만들기 - 1시간 길이 

In [5]:
pr_h = pd.period_range('2019-01-01',
                      end=None,
                      periods=3,
                      freq='H')
pr_h

PeriodIndex(['2019-01-01 00:00', '2019-01-01 01:00', '2019-01-01 02:00'], dtype='period[H]', freq='H')

### Period 배열 만들기 - 2시간 길이 

In [6]:
pr_2h = pd.period_range(start='2019-01-01',
                       end=None,
                       periods=3,
                       freq='2H')
pr_2h

PeriodIndex(['2019-01-01 00:00', '2019-01-01 02:00', '2019-01-01 04:00'], dtype='period[2H]', freq='2H')

# 예제 5-19 날짜 데이터 분리 

In [7]:
import pandas as pd

In [8]:
 df = pd.read_csv('stock-data.csv')

### 문자열인 날짜 데이터를 판다스 Timestamp로 변환 

In [9]:
df['new_Date'] = pd.to_datetime(df['Date'])
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume,new_Date
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26


### dt 속성을 이용하여 new_Date 열의 연-월-일 정보를<br> 년, 월, 일로 구분 

In [10]:
df['Year'] = df['new_Date'].dt.year
df['Month'] = df['new_Date'].dt.month
df['Day'] = df['new_Date'].dt.day
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume,new_Date,Year,Month,Day
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018,7,2
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29,2018,6,29
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28,2018,6,28
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27,2018,6,27
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26,2018,6,26


### Timestamp를 Period로 변환하여 연-월-일 표기 변경 

In [11]:
df['Date_yr'] = df['new_Date'].dt.to_period(freq='A')
df['Date_m'] = df['new_Date'].dt.to_period(freq='M')
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume,new_Date,Year,Month,Day,Date_yr,Date_m
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018,7,2,2018,2018-07
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29,2018,6,29,2018,2018-06
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28,2018,6,28,2018,2018-06
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27,2018,6,27,2018,2018-06
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26,2018,6,26,2018,2018-06


### 원하는 열을 새로운 행 인덱스로 지정 

In [12]:
df.set_index('Date_m', inplace=True)
df.head()

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_Date,Year,Month,Day,Date_yr
Date_m,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-07,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018,7,2,2018
2018-06,2018-06-29,10700,10550,10900,9990,170253,2018-06-29,2018,6,29,2018
2018-06,2018-06-28,10400,10900,10950,10150,155769,2018-06-28,2018,6,28,2018
2018-06,2018-06-27,10900,10800,11050,10500,133548,2018-06-27,2018,6,27,2018
2018-06,2018-06-26,10800,10900,11000,10700,63039,2018-06-26,2018,6,26,2018


# 예제 5-20 날짜 인덱스 활용 

In [13]:
df = pd.read_csv('stock-data.csv')

### 문자열인 날짜 데이터를 Timestamp로 변환

In [16]:
df['new_Date'] = pd.to_datetime(df['Date'])
df.set_index('new_Date', inplace=True)
df.head()

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_date
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-07-02,2018-07-02,10100,10850,10900,10000,137977,2018-07-02
2018-06-29,2018-06-29,10700,10550,10900,9990,170253,2018-06-29
2018-06-28,2018-06-28,10400,10900,10950,10150,155769,2018-06-28
2018-06-27,2018-06-27,10900,10800,11050,10500,133548,2018-06-27
2018-06-26,2018-06-26,10800,10900,11000,10700,63039,2018-06-26


### 날짜 인덱스를 이용하여 데이터 선택하기 

In [19]:
df_y = ['2018']
df_ym = df.loc['2018-07']
df_ym

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_date
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-07-02,2018-07-02,10100,10850,10900,10000,137977,2018-07-02


In [22]:
df_ym_cols = df.loc['2018-07', 'Start':'High']
df_ym_cols

Unnamed: 0_level_0,Start,High
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-07-02,10850,10900


In [24]:
df_ymd = df['2018-07-02']
df_ymd

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_date
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-07-02,2018-07-02,10100,10850,10900,10000,137977,2018-07-02


In [25]:
df_ymd_range = df['2018-06-25':'2018-06-20']
df_ymd_range

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_date
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-06-25,2018-06-25,11150,11400,11450,11000,55519,2018-06-25
2018-06-22,2018-06-22,11300,11250,11450,10750,134805,2018-06-22
2018-06-21,2018-06-21,11200,11350,11750,11200,133002,2018-06-21
2018-06-20,2018-06-20,11550,11200,11600,10900,308596,2018-06-20


### 시간 간격 계산. 최근 180일~189일 사이의 값만 

In [26]:
today = pd.to_datetime('2018-12-25') # 기준일 생성
df['time_delta'] = today - df.index # 날짜 차이 계산
df.set_index('time_delta', inplace=True) # 행 인덱스로 지정
df_180 = df['180 days':'189 days']
df_180

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume,new_date
time_delta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
180 days,2018-06-28,10400,10900,10950,10150,155769,2018-06-28
181 days,2018-06-27,10900,10800,11050,10500,133548,2018-06-27
182 days,2018-06-26,10800,10900,11000,10700,63039,2018-06-26
183 days,2018-06-25,11150,11400,11450,11000,55519,2018-06-25
186 days,2018-06-22,11300,11250,11450,10750,134805,2018-06-22
187 days,2018-06-21,11200,11350,11750,11200,133002,2018-06-21
188 days,2018-06-20,11550,11200,11600,10900,308596,2018-06-20
189 days,2018-06-19,11300,11850,11950,11300,180656,2018-06-19
