In [1]:
import pandas as pd
import requests

In [2]:
# 종목 코드 가져오는 함수
def get_stock_code():
    stock_code = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download', header=0)[0]
    
    stock_code = stock_code[['회사명', '종목코드']]
    
    stock_code = stock_code.rename(columns={'회사명': 'company', '종목코드': 'code'})
    
    stock_code.code = stock_code.code.map('{:06d}'.format)
    
    return stock_code

stock_code = get_stock_code()
print(stock_code)

       company    code
0           DL  000210
1        DRB동일  004840
2          DSR  155660
3           GS  078930
4        GS글로벌  001250
...        ...     ...
2494    토마토시스템  393210
2495        틸론  217880
2496  플럼라인생명과학  222670
2497  한국미라클피플사  331660
2498       휴벡셀  212310

[2499 rows x 2 columns]


In [3]:
# 일별 시세 가져오는 함수

def get_stock(code):
    df = pd.DataFrame()
    for page in range(1,21):
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
        url = '{url}&page={page}'.format(url=url, page=page)
        print(url)
        
        header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'}
        res = requests.get(url, headers=header)
        current_df = pd.read_html(res.text, header=0)[0]
        df = df.append(current_df, ignore_index=True)
        
    return df

code = '005930' # 삼성전자 종목코드
df = get_stock(code)
print(df)

http://finance.naver.com/item/sise_day.nhn?code=005930&page=1
http://finance.naver.com/item/sise_day.nhn?code=005930&page=2
http://finance.naver.com/item/sise_day.nhn?code=005930&page=3
http://finance.naver.com/item/sise_day.nhn?code=005930&page=4
http://finance.naver.com/item/sise_day.nhn?code=005930&page=5
http://finance.naver.com/item/sise_day.nhn?code=005930&page=6
http://finance.naver.com/item/sise_day.nhn?code=005930&page=7
http://finance.naver.com/item/sise_day.nhn?code=005930&page=8
http://finance.naver.com/item/sise_day.nhn?code=005930&page=9
http://finance.naver.com/item/sise_day.nhn?code=005930&page=10
http://finance.naver.com/item/sise_day.nhn?code=005930&page=11
http://finance.naver.com/item/sise_day.nhn?code=005930&page=12
http://finance.naver.com/item/sise_day.nhn?code=005930&page=13
http://finance.naver.com/item/sise_day.nhn?code=005930&page=14
http://finance.naver.com/item/sise_day.nhn?code=005930&page=15
http://finance.naver.com/item/sise_day.nhn?code=005930&page=16
h

In [4]:
# 데이터 정제
def clean_data(df):
    df = df.dropna() # 결측값 있는 행 제거
    
    df = df.rename(columns = {
        '날짜': 'date', '종가': 'close',
        '전일비': 'diff', '시가': 'open',
        '고가': 'high', '저가': 'low',
        '거래량': 'volume'
    })
    
    df[['close', 'diff', 'open', 'high', 'low', 'vloume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
    
    df['date'] = pd.to_datetime(df['date'])
    
    df = dfd.sort_values(by=['date'], ascending=True)
    
    return df

In [5]:
# Example. 삼성전자
company = '삼성전자'
stock_code = get_stock_code()

code = stock_code[stock_code.company==company].code.values[0].strip()

df = get_stock(code)

df = get_stock(df)

df = clean_data(df)
print(df)

http://finance.naver.com/item/sise_day.nhn?code=005930&page=1
http://finance.naver.com/item/sise_day.nhn?code=005930&page=2
http://finance.naver.com/item/sise_day.nhn?code=005930&page=3
http://finance.naver.com/item/sise_day.nhn?code=005930&page=4
http://finance.naver.com/item/sise_day.nhn?code=005930&page=5
http://finance.naver.com/item/sise_day.nhn?code=005930&page=6
http://finance.naver.com/item/sise_day.nhn?code=005930&page=7
http://finance.naver.com/item/sise_day.nhn?code=005930&page=8
http://finance.naver.com/item/sise_day.nhn?code=005930&page=9
http://finance.naver.com/item/sise_day.nhn?code=005930&page=10
http://finance.naver.com/item/sise_day.nhn?code=005930&page=11
http://finance.naver.com/item/sise_day.nhn?code=005930&page=12
http://finance.naver.com/item/sise_day.nhn?code=005930&page=13
http://finance.naver.com/item/sise_day.nhn?code=005930&page=14
http://finance.naver.com/item/sise_day.nhn?code=005930&page=15
http://finance.naver.com/item/sise_day.nhn?code=005930&page=16
h

http://finance.naver.com/item/sise_day.nhn?code=             날짜       종가     전일비       시가       고가       저가         거래량
0           NaN      NaN     NaN      NaN      NaN      NaN         NaN
1    2022.03.11  70000.0  1200.0  70500.0  70700.0  69700.0  15669868.0
2    2022.03.10  71200.0  1700.0  70800.0  71200.0  70500.0  21159248.0
3    2022.03.08  69500.0   600.0  68800.0  70000.0  68700.0  15828269.0
4    2022.03.07  70100.0  1400.0  70000.0  70600.0  69900.0  18617138.0
..          ...      ...     ...      ...      ...      ...         ...
295  2021.05.25  79900.0   200.0  80000.0  80400.0  79800.0  13628942.0
296  2021.05.24  79700.0   400.0  80100.0  80400.0  79500.0  13398666.0
297  2021.05.21  80100.0   600.0  80100.0  81500.0  79800.0  20961714.0
298  2021.05.20  79500.0   100.0  79400.0  79700.0  79100.0  16541828.0
299         NaN      NaN     NaN      NaN      NaN      NaN         NaN

[300 rows x 7 columns]&page=9
http://finance.naver.com/item/sise_day.nhn?code=         

http://finance.naver.com/item/sise_day.nhn?code=             날짜       종가     전일비       시가       고가       저가         거래량
0           NaN      NaN     NaN      NaN      NaN      NaN         NaN
1    2022.03.11  70000.0  1200.0  70500.0  70700.0  69700.0  15669868.0
2    2022.03.10  71200.0  1700.0  70800.0  71200.0  70500.0  21159248.0
3    2022.03.08  69500.0   600.0  68800.0  70000.0  68700.0  15828269.0
4    2022.03.07  70100.0  1400.0  70000.0  70600.0  69900.0  18617138.0
..          ...      ...     ...      ...      ...      ...         ...
295  2021.05.25  79900.0   200.0  80000.0  80400.0  79800.0  13628942.0
296  2021.05.24  79700.0   400.0  80100.0  80400.0  79500.0  13398666.0
297  2021.05.21  80100.0   600.0  80100.0  81500.0  79800.0  20961714.0
298  2021.05.20  79500.0   100.0  79400.0  79700.0  79100.0  16541828.0
299         NaN      NaN     NaN      NaN      NaN      NaN         NaN

[300 rows x 7 columns]&page=18
http://finance.naver.com/item/sise_day.nhn?code=        

NameError: name 'dfd' is not defined

In [12]:
# 보고서 만들기
import matplotlib.pyplot as plt
from pandas.plotting import table
import os

# jupyter notebook 내에 그려지게 함
%matplotlib inline

plt.figure(figsize=(10,4))
plt.plot(df['date', df['close']])
plt.xlabel('date')
plt.ylabele('close')

chart_fname = os.path.join("res/stock_report", '{company}_chart.png'.format(company=company))
plt.savefig(chart_fname)
plt.show()

plt.figure(figsize(15,4))
ax = plt.subplot(111, frame_on=False)
ax.xaxis.set_visible(False)
ax.yaxis.seet_visible(False)
df = df.sort(valuse['date'], ascending=False)
table(ax, df.heead(10), loc='center', cellLoc='center', rowLoc='center')

table_fname = os.path.join("res/stock_report", '{company}_table.png'.format(company=company))
plt.savefig(table_fname)

KeyError: 'close'

<Figure size 720x288 with 0 Axes>