## 데이터셋 다운로드

In [1]:
# 라이브러리 설치

!pip install yfinance



In [2]:
import yfinance as yf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

1. 티커 리스트 설정

In [3]:
ticker_list = ['^KS11', '^GSPC', 'KRW=X', '^TNX', '^VIX']

2. 데이터 다운로드(20년치, 일봉)

In [4]:
KOSPI_20y_daily = yf.download(ticker_list,
start='2005-01-01',
end='2025-11-04',
interval='1d')

print("원본 데이터 : ")
print(KOSPI_20y_daily.head())


  KOSPI_20y_daily = yf.download(ticker_list,
[*********************100%***********************]  5 of 5 completed

원본 데이터 : 
Price             Close                                                High  \
Ticker            KRW=X        ^GSPC       ^KS11   ^TNX   ^VIX        KRW=X   
Date                                                                          
2005-01-03  1027.300049  1202.079956  893.710022  4.220  14.08  1058.400024   
2005-01-04  1028.699951  1188.050049  886.900024  4.283  13.98  1036.949951   
2005-01-05  1035.800049  1183.739990  885.190002  4.277  14.09  1045.000000   
2005-01-06  1048.400024  1187.890015  871.280029  4.272  13.58  1056.900024   
2005-01-07  1045.099976  1186.189941  870.840027  4.285  13.49  1049.750000   

Price                                              ...         Open  \
Ticker            ^GSPC       ^KS11   ^TNX   ^VIX  ...        KRW=X   
Date                                               ...                
2005-01-03  1217.800049  897.590027  4.270  14.23  ...  1044.199951   
2005-01-04  1205.839966  895.400024  4.307  14.45  ...  1027.099976   
20




3. 데이터 정리

In [5]:
# 가격 데이터
df_price = pd.DataFrame()
df_price['KOSPI_Close'] = KOSPI_20y_daily[('Close', '^KS11')]
df_price['KOSPI_Open'] = KOSPI_20y_daily[('Open', '^KS11')]
df_price['KOSPI_High'] = KOSPI_20y_daily[('High', '^KS11')]
df_price['KOSPI_Low'] = KOSPI_20y_daily[('Low', '^KS11')]

df_price['SP500'] = KOSPI_20y_daily[('Close', '^GSPC')]
df_price['USD_KRW'] = KOSPI_20y_daily[('Close', 'KRW=X')]
df_price['US_10Y_Bond'] = KOSPI_20y_daily[('Close', '^TNX')]
df_price['VIX'] = KOSPI_20y_daily[('Close', '^VIX')]

# 거래량 데이터
df_volume = pd.DataFrame()
df_volume['KOSPI_Volume'] = KOSPI_20y_daily[('Volume', '^KS11')]


4. 결측치 처리

In [6]:
# 가격 데이터는 직전 값으로 채우기
df_price_filled = df_price.fillna(method='ffill') 

# 거래량 데이터는 0으로 채우기
df_volume_filled = df_volume.fillna(0) 

  df_price_filled = df_price.fillna(method='ffill')


5. 최종 데이터셋으로 병합

In [7]:
df_final = pd.concat([df_price_filled, df_volume_filled], axis=1)

df_final.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5435 entries, 2005-01-03 to 2025-11-03
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   KOSPI_Close   5435 non-null   float64
 1   KOSPI_Open    5435 non-null   float64
 2   KOSPI_High    5435 non-null   float64
 3   KOSPI_Low     5435 non-null   float64
 4   SP500         5435 non-null   float64
 5   USD_KRW       5435 non-null   float64
 6   US_10Y_Bond   5435 non-null   float64
 7   VIX           5435 non-null   float64
 8   KOSPI_Volume  5435 non-null   float64
dtypes: float64(9)
memory usage: 424.6 KB


In [8]:
print(df_final.head())

            KOSPI_Close  KOSPI_Open  KOSPI_High   KOSPI_Low        SP500  \
Date                                                                       
2005-01-03   893.710022  896.000000  897.590027  890.929993  1202.079956   
2005-01-04   886.900024  890.559998  895.400024  884.940002  1188.050049   
2005-01-05   885.190002  874.919983  885.200012  873.179993  1183.739990   
2005-01-06   871.280029  878.229980  886.309998  871.280029  1187.890015   
2005-01-07   870.840027  876.359985  878.890015  866.719971  1186.189941   

                USD_KRW  US_10Y_Bond    VIX  KOSPI_Volume  
Date                                                       
2005-01-03  1027.300049        4.220  14.08      252600.0  
2005-01-04  1028.699951        4.283  13.98      331600.0  
2005-01-05  1035.800049        4.277  14.09      343100.0  
2005-01-06  1048.400024        4.272  13.58      394700.0  
2005-01-07  1045.099976        4.285  13.49      294800.0  


6. 최종 CSV 파일 저장

In [9]:
df_final.to_csv("kospi_data_final.csv")
print("파일 저장 완료")

파일 저장 완료
