In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import rc
rc('font', family='Malgun Gothic')
rc('axes', unicode_minus=False)

In [2]:
import datetime
import os
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen

In [None]:
MARKET_CODE_DICT = {
    'kospi': 'stockMkt',
    'kosdaq': 'kosdaqMkt',
    'konex': 'konexMkt'
}

In [None]:
DOWNLOAD_URL = 'kind.krx.co.kr/corpgeneral/corpList.do'

def download_stock_codes(market=None, delisted=False):
    params = {'method': 'download'}

    if market.lower() in MARKET_CODE_DICT:
        params['marketType'] = MARKET_CODE_DICT[market]

    if not delisted:
        params['searchType'] = 13

    params_string = urllib.parse.urlencode(params)
    request_url = urllib.parse.urlunsplit(['http', DOWNLOAD_URL, '', params_string, ''])

    df = pd.read_html(request_url, header=0)[0]
    df.종목코드 = df.종목코드.map('{:06d}'.format)

    return df

In [3]:
sto_code = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13')[0]
sto_code[:5]

Unnamed: 0,회사명,종목코드,업종,주요제품,상장일,결산월,대표자명,홈페이지,지역
0,GS글로벌,1250,상품 종합 도매업,"수출입업(시멘트,철강금속,전기전자,섬유,기계화학),상품중개,광업,채석업/하수처리 서...",1976-06-26,12월,김태형,http://www.gsgcorp.com,서울특별시
1,HSD엔진,82740,일반 목적용 기계 제조업,"대형선박용엔진,내연발전엔진",2011-01-04,12월,고영열,http://www.doosanengine.com,경상남도
2,KG케미칼,1390,기초 화학물질 제조업,"콘크리트혼화제, 비료, 친환경농자재, 수처리제",1989-08-25,12월,김경묵,http://www.kgchem.co.kr,울산광역시
3,LG이노텍,11070,전자부품 제조업,기타 전자부품 제조업,2008-07-24,12월,정철동,http://www.lginnotek.co.kr,서울특별시
4,OCI,10060,기초 화학물질 제조업,"타르제품,카본블랙,무수프탈산,농약원제,석탄화학제품,정밀화학제품,플라스틱창호재 제조,판매",1985-07-09,12월,"백우석, 이우현, 김택중(3인, 각자 대표이사)",http://www.oci.co.kr,서울특별시


In [4]:
date_today = datetime.date.today()
date_key = date_today.strftime('%y%m%d')

In [5]:
code_list_save = ('data/code_list/' + 'code_list_' + date_key + '.csv').strip()
code_list_save

'data/code_list/code_list_190729.csv'

In [6]:
sto_code.to_csv(code_list_save, encoding='euc-kr', index=False) #cp949
print('save >> ' + code_list_save)

save >> data/code_list/code_list_190729.csv


In [7]:
code_list_load = ('data/code_list/' + os.listdir('data/code_list')[-1])
code_list_load

'data/code_list/code_list_190729.csv'

In [8]:
# os.path.getmtime('data/')

### 저장 로드 방식 다시해~

In [9]:
sto_code = pd.read_csv(code_list_load, encoding='euc-kr')
sto_code[:3]

Unnamed: 0,회사명,종목코드,업종,주요제품,상장일,결산월,대표자명,홈페이지,지역
0,GS글로벌,1250,상품 종합 도매업,"수출입업(시멘트,철강금속,전기전자,섬유,기계화학),상품중개,광업,채석업/하수처리 서...",1976-06-26,12월,김태형,http://www.gsgcorp.com,서울특별시
1,HSD엔진,82740,일반 목적용 기계 제조업,"대형선박용엔진,내연발전엔진",2011-01-04,12월,고영열,http://www.doosanengine.com,경상남도
2,KG케미칼,1390,기초 화학물질 제조업,"콘크리트혼화제, 비료, 친환경농자재, 수처리제",1989-08-25,12월,김경묵,http://www.kgchem.co.kr,울산광역시


In [10]:
sto_code['종목코드'] = sto_code['종목코드'].map('{:06d}'.format)

In [11]:
sto_code_sort = sto_code[['회사명', '종목코드']]
sto_code_sort[:5]

Unnamed: 0,회사명,종목코드
0,GS글로벌,1250
1,HSD엔진,82740
2,KG케미칼,1390
3,LG이노텍,11070
4,OCI,10060


In [27]:
def last_page_finder(html):
    page_pkg = html.find_all("table",align="center")
    last_page_pkg = page_pkg[0].find_all("td",class_="pgRR")[0].a.get('href')
    last_page_num = int(last_page_pkg.split("=")[-1].strip())
    return last_page_num

In [13]:
item_code = 'KOSPI' #page=1

In [14]:
sise_url = 'https://finance.naver.com/sise/sise_index_day.nhn?code=' + item_code

In [15]:
source_html = BeautifulSoup(urlopen(sise_url).read(), 'lxml') #html.parser

In [16]:
last_page = last_page_finder(source_html)

In [17]:
sise_df = pd.DataFrame()

In [18]:
for i in range(1, last_page+1):
    url = (f'{sise_url}&page={i}')
    sise_df = sise_df.append(pd.read_html(url, header=0)[0], ignore_index=True)

sise_df

Unnamed: 0,날짜,체결가,전일비,등락률,거래량(천주),거래대금(백만)
0,,,,,,
1,2019.07.29,2029.48,36.78,-1.78%,608670.0,4681216.0
2,2019.07.26,2066.26,8.22,-0.40%,583332.0,5056450.0
3,2019.07.25,2074.48,7.82,-0.38%,590890.0,5126430.0
4,,,,,,
5,,,,,,
6,,,,,,
7,,,,,,
8,2019.07.24,2082.30,19.15,-0.91%,841580.0,4115461.0
9,2019.07.23,2101.45,8.11,+0.39%,640162.0,4523644.0


In [19]:
sise_df = sise_df.dropna(axis=0)
sise_df[-5:]

Unnamed: 0,날짜,체결가,전일비,등락률,거래량(천주),거래대금(백만)
16715,1990.01.09,920.21,0.75,+0.08%,18646.0,382992.0
16719,1990.01.08,919.46,6.6,+0.72%,17577.0,369946.0
16720,1990.01.06,912.86,2.25,-0.25%,12519.0,269397.0
16721,1990.01.05,915.11,13.71,-1.48%,22179.0,476295.0
16726,1990.01.04,928.82,20.23,+2.23%,18094.0,405061.0


In [20]:
type(sise_df['체결가'].values[0])

numpy.float64

In [21]:
sise_df['날짜'] = pd.to_datetime(sise_df['날짜'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
sise_df['등락률'] = sise_df['등락률'].map(lambda x: float(x.split("%")[0].strip()))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
sise_df.dtypes

날짜          datetime64[ns]
체결가                float64
전일비                float64
등락률                float64
거래량(천주)            float64
거래대금(백만)           float64
dtype: object

In [24]:
sise_df[-5:]

Unnamed: 0,날짜,체결가,전일비,등락률,거래량(천주),거래대금(백만)
16715,1990-01-09,920.21,0.75,0.08,18646.0,382992.0
16719,1990-01-08,919.46,6.6,0.72,17577.0,369946.0
16720,1990-01-06,912.86,2.25,-0.25,12519.0,269397.0
16721,1990-01-05,915.11,13.71,-1.48,22179.0,476295.0
16726,1990-01-04,928.82,20.23,2.23,18094.0,405061.0


In [25]:
sise_save = 'data/sise_list/' + item_code + '_' + date_key + '.csv'
sise_save

'data/sise_list/KOSPI_190729.csv'

In [26]:
sise_df.to_csv(sise_save, encoding='euc-kr', index=False) #cp949
print('save >> ' + sise_save)

save >> data/sise_list/KOSPI_190729.csv
