## Get Industry & Sector List

Credit : https://www.dataquest.io/blog/web-scraping-tutorial-python/

In [37]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

ณ วันที่พัฒนานี้รองรับ URL "https://www.settrade.com/C13_MarketSummary.jsp?detail=INDUSTRY" ได้

In [72]:
def getIndustryAndSector(url):
    page = requests.get(url) # ดูดหน้าเว็บมา
    
    if page.status_code == 200 :
        # All HTML of Page
        soup = BeautifulSoup(page.content, "html5lib")
        # ซอยข้อมูลทั้ง page เป็นทีละส่วนๆ แล้วลงลึกไปเอาข้อมูลทีละจุดๆ
        html = list(soup.children)[1]
        head = list(html.children)[0]
        body = list(html.children)[2]
        table_body = list(body.find(class_="table-hover"))[3]
        
        # Industry and Sector
        IAS_rows = table_body.find_all('tr')
        IAS_name_list = np.array([ IAS_row.find(class_="link-stt").text for IAS_row in IAS_rows ])
        IAS_url_list = np.array([ IAS_row.find(class_="link-stt").get('href') for IAS_row in IAS_rows ])
        
        # Only Industry
        Ind_rows = table_body.find_all(style="background: #cccccc;")
        Ind_name_list = np.array([ Ind_row.find(class_="link-stt").text for Ind_row in Ind_rows ])
        
        # Label Sector by Industry
        Ind_index = -1
        labels = np.array([])
        for IAS_name in IAS_name_list:
            if IAS_name in Ind_name_list:
                Ind_index+=1
            Ind_name = Ind_name_list[Ind_index]
            labels = np.append(labels, Ind_name)

        # Convert to DataFrame         
        df = pd.DataFrame({'Industry': labels, 'Sector': IAS_name_list, 'URL': IAS_url_list})
        industry = df[df['Industry'] == df['Sector']]
        del industry['Sector']
        sector = df[df['Industry'] != df['Sector']]
        return [industry,sector]
    else:
        print("Can't get content from this URL!!!")

In [73]:
[industry_df,sector_df] = getIndustryAndSector("https://www.settrade.com/C13_MarketSummary.jsp?detail=INDUSTRY")

In [40]:
industry_df

Unnamed: 0,Industry,URL
0,AGRO,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
3,CONSUMP,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
7,FINCIAL,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
11,INDUS,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
18,PROPCON,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
23,RESOURC,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
26,SERVICE,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...
33,TECH,/C13_MarketSummary.jsp?detail=INDUSTRY&industr...


In [41]:
sector_df[0:10]

Unnamed: 0,Industry,Sector,URL
1,AGRO,AGRI,/C13_MarketSummary.jsp?detail=INDUSTRY§or=AGRI...
2,AGRO,FOOD,/C13_MarketSummary.jsp?detail=INDUSTRY§or=FOOD...
4,CONSUMP,FASHION,/C13_MarketSummary.jsp?detail=INDUSTRY§or=FASH...
5,CONSUMP,HOME,/C13_MarketSummary.jsp?detail=INDUSTRY§or=HOME...
6,CONSUMP,PERSON,/C13_MarketSummary.jsp?detail=INDUSTRY§or=PERS...
8,FINCIAL,BANK,/C13_MarketSummary.jsp?detail=INDUSTRY§or=BANK...
9,FINCIAL,FIN,/C13_MarketSummary.jsp?detail=INDUSTRY§or=FIN&...
10,FINCIAL,INSUR,/C13_MarketSummary.jsp?detail=INDUSTRY§or=INSU...
12,INDUS,AUTO,/C13_MarketSummary.jsp?detail=INDUSTRY§or=AUTO...
13,INDUS,IMM,/C13_MarketSummary.jsp?detail=INDUSTRY§or=IMM&...


In [42]:
industry_df.to_csv('../../data/explore/industry_list.csv', index=False)

In [43]:
sector_df.to_csv('../../data/explore/sector_list.csv', index=False)

In [44]:
len(industry_df)

8

In [45]:
len(sector_df)

28

In [46]:
str(datetime.now().strftime('%Y-%m-%d'))

'2018-04-25'

## Get Stock List

In [47]:
sector_table = pd.read_csv('../../data/explore/sector_list.csv')
sector_table.head()

Unnamed: 0,Industry,Sector,URL
0,AGRO,AGRI,/C13_MarketSummary.jsp?detail=INDUSTRY§or=AGRI...
1,AGRO,FOOD,/C13_MarketSummary.jsp?detail=INDUSTRY§or=FOOD...
2,CONSUMP,FASHION,/C13_MarketSummary.jsp?detail=INDUSTRY§or=FASH...
3,CONSUMP,HOME,/C13_MarketSummary.jsp?detail=INDUSTRY§or=HOME...
4,CONSUMP,PERSON,/C13_MarketSummary.jsp?detail=INDUSTRY§or=PERS...


In [36]:
for index,sector in sector_table.iterrows():
    print(sector['Sector'],sector['URL']) 

AGRI /C13_MarketSummary.jsp?detail=INDUSTRY§or=AGRI&market=SET
FOOD /C13_MarketSummary.jsp?detail=INDUSTRY§or=FOOD&market=SET
FASHION /C13_MarketSummary.jsp?detail=INDUSTRY§or=FASHION&market=SET
HOME /C13_MarketSummary.jsp?detail=INDUSTRY§or=HOME&market=SET
PERSON /C13_MarketSummary.jsp?detail=INDUSTRY§or=PERSON&market=SET
BANK /C13_MarketSummary.jsp?detail=INDUSTRY§or=BANK&market=SET
FIN /C13_MarketSummary.jsp?detail=INDUSTRY§or=FIN&market=SET
INSUR /C13_MarketSummary.jsp?detail=INDUSTRY§or=INSUR&market=SET
AUTO /C13_MarketSummary.jsp?detail=INDUSTRY§or=AUTO&market=SET
IMM /C13_MarketSummary.jsp?detail=INDUSTRY§or=IMM&market=SET
PAPER /C13_MarketSummary.jsp?detail=INDUSTRY§or=PAPER&market=SET
PETRO /C13_MarketSummary.jsp?detail=INDUSTRY§or=PETRO&market=SET
PKG /C13_MarketSummary.jsp?detail=INDUSTRY§or=PKG&market=SET
STEEL /C13_MarketSummary.jsp?detail=INDUSTRY§or=STEEL&market=SET
CONMAT /C13_MarketSummary.jsp?detail=INDUSTRY§or=CONMAT&market=SET
PROP /C13_MarketSummary.jsp?detail=INDU

สร้าง function Get หุ้นจาก แต่ละ Sector