In [1]:
# finance datareader가 설치 안되어 있을시 실행
# !pip install finance-datareader

In [2]:
from pathlib import Path
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import FinanceDataReader as fdr

from concurrent import futures
import concurrent

## KOSPI 종목 데이터 가져오기

In [103]:
kospi = fdr.StockListing('Kospi')

In [6]:
kospi

Unnamed: 0,Symbol,Market,Name,Sector,Industry,ListingDate,SettleMonth,Representative,HomePage,Region
1,095570,KOSPI,AJ네트웍스,산업용 기계 및 장비 임대업,"렌탈(파렛트, OA장비, 건설장비)",2015-08-21,12월,박대현,http://www.ajnet.co.kr,서울특별시
2,006840,KOSPI,AK홀딩스,기타 금융업,지주사업,1999-08-11,12월,"채형석, 이석주(각자 대표이사)",http://www.aekyunggroup.co.kr,서울특별시
6,152100,KOSPI,ARIRANG 200,,,NaT,,,,
7,295820,KOSPI,ARIRANG 200동일가중,,,NaT,,,,
8,253150,KOSPI,ARIRANG 200선물레버리지,,,NaT,,,,
...,...,...,...,...,...,...,...,...,...,...
6309,215620,KOSPI,흥국 S&P코리아로우볼,,,NaT,,,,
6311,000540,KOSPI,흥국화재,보험업,손해보험,1974-12-05,12월,권중원,http://www.insurance.co.kr,서울특별시
6312,000547,KOSPI,흥국화재2우B,,,NaT,,,,
6313,000545,KOSPI,흥국화재우,,,NaT,,,,


In [105]:
kospi.dropna(subset=['Sector'], inplace=True)

## 가격 데이터 가져오기

In [26]:
def get_data(code: str):
    data = fdr.DataReader(code)
    data['Ticker'] = code
    return data

In [41]:
data_list = []

def run(code):
    data = get_data(code)
    data_list.append(data)
    
with futures.ThreadPoolExecutor(50) as excutor:
    excutor.map(run, kospi.Symbol.to_list())

In [42]:
kospi_data = pd.concat(data_list)

In [51]:
kospi_data.columns = list(map(lambda x: x.lower(), kospi_data.columns))

In [57]:
kospi_data.index.names = ['date']

In [59]:
kospi_data = kospi_data.groupby(['date','ticker']).last()

In [62]:
kospi_data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,change
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1993-02-02,001780,15243,15243,14778,15140,1660,
1993-02-03,001780,14727,15657,14727,15657,4540,0.034148
1993-02-04,001780,15760,16329,15760,16328,7290,0.042856
1993-02-05,001780,16587,16794,16329,16328,7770,0.000000
1993-02-06,001780,16225,16329,15812,16018,1720,-0.018986
...,...,...,...,...,...,...,...
2021-05-18,365550,6380,6550,6380,6500,280833,0.020408
2021-05-18,375500,133500,138000,133500,136500,171460,0.026316
2021-05-18,378850,6530,6800,6380,6440,793275,0.023847
2021-05-18,900140,3620,3690,3565,3585,350371,-0.009669


## Data 저장 위치 설정

In [45]:
DATA_STORE = Path('assets.h5')

## 가격 데이터 저장

In [64]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('finance_datareader/prices', kospi_data)

## 회사 코드랑 이름 저장

In [68]:
kospi_stock = kospi[['Symbol','Name']]

In [70]:
kospi_stock.columns = ['code', 'name']

In [72]:
kospi_stock.set_index('code', inplace=True)

In [73]:
kospi_stock

Unnamed: 0_level_0,name
code,Unnamed: 1_level_1
095570,AJ네트웍스
006840,AK홀딩스
027410,BGF
282330,BGF리테일
138930,BNK금융지주
...,...
079980,휴비스
005010,휴스틸
069260,휴켐스
000540,흥국화재


In [74]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('finance_datareader/stocks', kospi_stock)

## BenchMark로 설정할 KOPSI 100 데이터

In [80]:
kospi_100_price = fdr.DataReader('KS100')

In [82]:
kospi_100_price.rename(columns=str.lower, inplace=True)

In [84]:
kospi_100_price.index.names = ['date']

In [86]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('finance_datareader/kospi100', kospi_100_price)

## 시가 총액 데이터 저장
- http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201
- KOSPI만 선택 
- 다운로드

In [94]:
market_cap = pd.read_csv('data_3334_20210520.csv', encoding='CP949')

In [96]:
market_cap = market_cap[['종목코드', '시가총액']]

In [100]:
market_cap.set_index('종목코드', inplace=True)

## 두 데이터 합치기

In [109]:
kospi.rename(columns=str.lower, inplace=True)
kospi.set_index('symbol', inplace=True)

In [110]:
kospi_marketcap = pd.merge(kospi, market_cap, left_index=True, right_index=True, how='left')

In [113]:
kospi_marketcap.rename(columns={'시가총액':'market_cap'}, inplace=True)

In [116]:
kospi_marketcap.index.name = 'ticker'

In [118]:
kospi_marketcap.head(5)

Unnamed: 0_level_0,market,name,sector,industry,listingdate,settlemonth,representative,homepage,region,market_cap
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
95570,KOSPI,AJ네트웍스,산업용 기계 및 장비 임대업,"렌탈(파렛트, OA장비, 건설장비)",2015-08-21,12월,박대현,http://www.ajnet.co.kr,서울특별시,289830006050
6840,KOSPI,AK홀딩스,기타 금융업,지주사업,1999-08-11,12월,"채형석, 이석주(각자 대표이사)",http://www.aekyunggroup.co.kr,서울특별시,458365610600
27410,KOSPI,BGF,기타 금융업,지주회사,2014-05-19,12월,홍정국,http://www.bgf.co.kr,서울특별시,658531522080
282330,KOSPI,BGF리테일,종합 소매업,체인화 편의점,2017-12-08,12월,이건준,http://www.bgfretail.com,서울특별시,3145670892000
138930,KOSPI,BNK금융지주,기타 금융업,금융지주회사,2011-03-30,12월,김지완,http://www.bnkfg.com,부산광역시,2633556787680


In [119]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('kospi/stocks', kospi_marketcap)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['market', 'name', 'sector', 'industry', 'settlemonth', 'representative',
       'homepage', 'region'],
      dtype='object')]

  if (await self.run_code(code, result,  async_=asy)):
