## Import library

    

In [0]:
import pandas as pd
import plotly.offline as offline
import plotly.graph_objs as go
from bs4 import BeautifulSoup as bs
import urllib.request
import requests
import math
import re

In [3]:
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
    
# Set format
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

code_df = code_df[['회사명','종목코드']]

code_df = code_df.rename(columns={'회사명':'name','종목코드':'code'})
    
code_df.head()

Unnamed: 0,name,code
0,DSR,155660
1,GS글로벌,1250
2,HSD엔진,82740
3,LG이노텍,11070
4,LS산전,10120


## Getting historical stock price

In [0]:
# Configure plotly
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>requirejs.config({paths: {base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',},});</script>'''))
 
# Find Stock Code
def stock():
    # Get stock code from KRX
    code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
    
    # Set format
    code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

    code_df = code_df[['회사명','종목코드']]

    code_df = code_df.rename(columns={'회사명':'name','종목코드':'code'})
    
    return code_df

# Find proper URL in Naver
def naver(code_df, stockCode):
    if type(stockCode) != int:
        code = code_df.query("name=='{}'".format(stockCode))['code'].to_string(index=False)[1:]
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
    else: 
        code = stockCode
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
    
    print("Requested URL = {}".format(url))
    return url

# Getting data from Naver Stock
def crawling(a, b):
    df = pd.DataFrame()
    b = int(b/10)+1
    
    for page in range(1,b):
        pg_url = '{url}&page={page}'.format(url=a, page=page)
        df = df.append(pd.read_html(pg_url,header=0)[0], ignore_index=True)
        
    df = df.dropna()
    
    # Reset index 
    df = df.reset_index(drop=True)
    
    # Change column names into English for further analysis
    df = df.rename(columns= {'날짜': 'Date', '종가': 'Closing Price', '전일비': 'Difference', '시가': 'Open Price', \
                             '고가': 'Highest', '저가': 'Lowest', '거래량': 'Trading Vol'})
    
    # Change date format to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Set ascending order
    df = df.sort_values(by=['Date'], ascending=True)
    
    return df

# Draw graph
def graph(df):
    offline.init_notebook_mode(connected=True) 
    trace = go.Candlestick(x=df['Date'], open=df['Open Price'], high=df['Highest'], low=df['Lowest'],
                          close=df['Closing Price']) 
    data = [trace]
        
    layout = dict(title='Historical Stock Price', 
                  xaxis=dict(rangeselector=dict(buttons=list([dict(count=1,label='1m',step='month',stepmode='backward'), 
                                            dict(count=3,label='3m',step='month',stepmode='backward'), 
                                            dict(count=6,label='6m',step='month',stepmode='backward'), 
                                            dict(step='all')])),rangeslider=dict(),type='date')) 
    
    fig = go.Figure(data=data, layout=layout)
    configure_plotly_browser_state()
    plot = offline.iplot(fig)
    return plot

# Potential Golden Cross
def golden(dic):
    # Create empty lists to save stock code and dataframe
    empty = []
    keys = []
    for key in dic.keys():
        Difference = dic[key]['Difference'].tolist()
        #print(Difference)
        #print(key)
        if Difference[-1] < 0 and max(Difference) <= Difference[-1]:
            empty.append(key)
        else:
            continue
            
    print("Those stocks would be golden cross stocks: ")
    return empty

# Potential Death Cross
def death(dic):
    # Create empty lists to save stock code and dataframe
    empty = []
    keys = []
    for key in dic.keys():
        Difference = dic[key]['Difference'].tolist()
        #print(Difference)
        #print(key)
        if Difference[-1] > 0 and min(Difference) >= Difference[-1]:
            empty.append(key)
        else:
            continue
            
    print("Those stocks would be death cross stocks: ")
    return empty

def dict_df_code(code_df):
    
    dict_code_df = dict()
    #len(code_df)
    for i in range(100):
        
        # extract the total page number
        company = pd.DataFrame()
        #pg_url_pre = 'http://finance.naver.com/item/sise_day.nhn?code={code}&page=1'\
        #    .format(code=code_df['code'][i])
        #url_raw = urllib.request.urlopen(pg_url_pre)
        #url_content = url_raw.read().decode(url_raw.headers.get_content_charset())
        #pattern = r'(?<=&amp;page=)(.+?)(?=")'
        #page_list = re.findall(pattern, url_content, re.DOTALL)
        #page_list_int = [int(i) for i in page_list]
        #page_max = max(page_list_int)
        #print("company code:",code_df['code'][i],"has", page_max, "pages")
        
        #if page_max >= 14:  # apply filter for new companies
            
        for page in range(1,6): 

            pg_url = 'http://finance.naver.com/item/sise_day.nhn?code={code}&page={page}'\
            .format(code=code_df['code'][i], page=page)
            company = company.append(pd.read_html(pg_url,header=0)[0], ignore_index=True)

        #company = company.dropna()

        # Change column names into English for further analysis
        company = company.rename(columns= {'날짜': 'Date', '종가': 'Closing Price', '전일비': 'Difference', 
                                     '시가': 'Open Price', '고가': 'Highest', '저가': 'Lowest', '거래량': 'Trading Vol'})

        # Drop useless columns
        company = company.drop(['Difference','Open Price','Highest','Lowest','Trading Vol'], axis = 1)

        # Change date format to datetime
        company['Date'] = pd.to_datetime(company['Date'])

        # Set ascending order
        company = company.sort_values(by=['Date'], ascending=True)

        company['20 Days'] = company['Closing Price'].rolling(20).mean()
        company['50 Days'] = company['Closing Price'].rolling(50).mean()

        company = company.dropna().reset_index(drop = True)

        company['Difference'] = company['20 Days'] - company['50 Days']
   
        if company['Difference'].lt(0).all() == True:
            dict_code_df[code_df['code'][i]]=company
        
    return dict_code_df

# Choose Option
def option():
    print("=" * 54, "Menu", "=" * 54)
    print("""
    1. Draw Historical Stock Graph
    2. Golden Cross Check
    3. Death Cross Check
    4. Trading Volume
    5. Draw Historical International Net Sales
    6. The end
    """)
    print("=" * 115)
    
    choice = input('Choose Menu: ')
    return choice

def trading(df):
    offline.init_notebook_mode(connected=True) 
    trace = go.scatter(x=df['Date'], y = df['Trading Vol']) 
    data = [trace]
        
    layout = dict(title='Historical Trading Volume', 
                  xaxis=dict(rangeselector=dict(buttons=list([dict(count=1,label='1m',step='month',stepmode='backward'), 
                                            dict(count=3,label='3m',step='month',stepmode='backward'), 
                                            dict(count=6,label='6m',step='month',stepmode='backward'), 
                                            dict(step='all')])),rangeslider=dict(),type='date')) 

    fig = go.Figure(data=data, layout=layout) 
    plot = offline.iplot(fig)
    return plot

# Retrieve historical international net sales 
def int_netsale(code_or_name,iscode,days):
    pages = math.ceil(days/20) 
    if iscode == 1:
        code = code_or_name
    elif iscode == 0:
        code = code_df.loc[code_df.name == code_or_name, 'code'][0]

    for page in range(pages):
        url = 'https://finance.naver.com/item/frgn.nhn?code={}&page={}'.format(code,str(page+1))
        html = requests.get(url).text
        soup = bs(html, 'html.parser')

        # Retrieve date
        date_content = soup.find_all("td", {"class":"tc"})
        if page == 0:
            date = [date.text for date in date_content]
        else:
            date += [date.text for date in date_content]
        # Retrieve net sale of international institutes
        sale_ins_content = soup.find_all("td", {"class":"num","width":"66"})
        if page == 0:
            sale_ins = [int(sale.text.replace(',','')) for sale in sale_ins_content]
        else:
            sale_ins += [int(sale.text.replace(',','')) for sale in sale_ins_content]
        # Retrieve net sale of international investors
        sale_inv_content = soup.find_all("td", {"class":"num","width":"80"})
        if page == 0:
            sale_inv = [int(sale.text.replace(',','')) for sale in sale_inv_content]
        else:
            sale_inv += [int(sale.text.replace(',','')) for sale in sale_inv_content]
    # Create dataframe
    netsale_df = pd.DataFrame({'Date':pd.to_datetime(date),'International Institute':sale_ins,'International Investor':sale_inv})
    netsale_df = netsale_df.iloc[:103,:].copy()

    return netsale_df

# Plot line graphs for international net sales
def graph_ns(df):
    offline.init_notebook_mode(connected=True) 

    data_ins = go.Scatter(x=df['Date'],y=df['International Institute'],mode='lines',name='Institutes')
    data_inv = go.Scatter(x=df['Date'],y=df['International Investor'],mode='lines',name='Investors')

    layout = dict(title='Historical International Net Sales', 
                    xaxis=dict(rangeselector=dict(buttons=list([dict(count=1,label='1m',step='month',stepmode='backward'), 
                                            dict(count=3,label='3m',step='month',stepmode='backward'), 
                                            dict(count=6,label='6m',step='month',stepmode='backward'), 
                                            dict(step='all')])),rangeslider=dict(),type='date')) 

    fig = go.Figure(layout=layout)
    fig.add_trace(data_ins)
    fig.add_trace(data_inv)
    configure_plotly_browser_state()
    plot = offline.iplot(fig)
    return plot

# Main Func
def main():
    
    configure_plotly_browser_state()
    flag = True
    flag_code = False
    
    while flag:
        
        code_df = stock()
        
        choice = option()
    
        if choice == '1':

            while not flag_code:
                code = input("Please enter proper stock code or accurate company name: ")

                if code.isdigit(): 
                    if code in code_df.code.values:
                        flag_code = True
                        code = int(code)
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock code")

                else:
                    if code in code_df.name.values:
                        flag_code = True
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock name")

            ran = int(input("Enter the number of working days you want to see (min. 10days): "))

            file = crawling(url,ran)

            stockGraph = graph(file)
            
            continue
            
            return stockGraph
        
        if choice == '2':
            dic = dict_df_code(code_df)
            gold = golden(dic)
            
            continue
            
            return gold
        
        if choice == '3':
            dic = dict_df_code(code_df)
            dead = death(dic)
            
            continue
            
            return dead
        
        if choice == '4':
            
            while not flag_code:
                code = input("Please enter proper stock code or accurate company name: ")

                if code.isdigit(): 
                    if code in code_df.code.values:
                        flag_code = True
                        code = int(code)
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock code")

                else:
                    if code in code_df.name.values:
                        flag_code = True
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock name")

            ran = int(input("Enter the number of working days you want to see (min. 10days): "))

            file = crawling(url,ran)

            tradingGraph = trading(file)
            
            continue
            
            return tradingGraph
        
        if choice == '5':

            while not flag_code:
                code = input("Please enter proper stock code or accurate company name: ")

                if code.isdigit(): 
                    if code in code_df.code.values:
                        flag_code = True
                        iscode = 1
                    else:
                        print("Please check the stock code")

                else:
                    if code in code_df.name.values:
                        flag_code = True
                        iscode = 0
                    else:
                        print("Please check the stock name")

            ran = int(input("Enter the number of working days you want to see (min. 10days): "))
            df = int_netsale(code,iscode,ran)
            stockGraph = graph_ns(df)
            
            continue

            return stockGraph

        if choice == '6':
            print("It was nice to meet you:)")
            flag = False

if __name__ == '__main__':
    main()

# Resources

1. Pandas를 이용한 Naver금융에서 주식데이터 가져오기 <br>
https://excelsior-cjh.tistory.com/109 
2. Calculating Moving Average with Python <br>
https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html
3. Seasonality Analysis <br>
https://www.dataquest.io/blog/tutorial-time-series-analysis-with-pandas/
