In [1]:
import numpy as np
import requests
import pandas as pd
import datetime

def get_historical_stock_data(date, stock_no):
    quotes = []
    url = 'http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=%s&stockNo=%s' % ( date, stock_no)
    r = requests.get(url)
    data = r.json()
    return data

def convert_date(date_str):
    
    y,m,d = date_str.split('/')
    d = str(int(y) + 1911) + '/' + m + '/' + d
    
    return datetime.datetime.strptime(d, '%Y/%m/%d')

def convert_int(int_str):
    
    return int(int_str.replace(',',''))

def convert_float(float_str):
    
    return float(float_str.replace(',',''))

def convert_sign_float(sign_float):
    
    d = sign_float.replace(',','')
    return 0.0 if d == 'X0.00' else float(d)


def convert_data_vec(vec,i):
    
    d = {'date':convert_date(vec[0]),
    'volume':convert_int(vec[1]),
    'amount':convert_int(vec[2]),
    'open':convert_float(vec[3]),
    'high':convert_float(vec[4]),
    'low':convert_float(vec[5]),
    'close':convert_float(vec[6]),
    'change':convert_sign_float(vec[7]),
    'num_of_trans':convert_float(vec[8])}
    
    return pd.DataFrame(d,index=[i])

def get_stock_df(stock_data, key='data'):
    
    stock_data_list = stock_data[key]
    n = len(stock_data_list)
    
    return pd.concat([convert_data_vec(stock_data_list[i], i) for i in range(n)], axis=0)

In [2]:
res = requests.get("http://isin.twse.com.tw/isin/C_public.jsp?strMode=2")

df = pd.read_html(res.text)

len(df)

1

In [3]:
df = df[0]

In [8]:
df[5].unique()

array(['CFICode', '股票', 'ESVUFR', 'ESVTFR', '上市認購(售)權證', 'RWSCCE',
       'RWICPE', 'RWBCPE', 'RWBCCE', 'RWSCPE', 'RWICCE', 'RWBCCA',
       'RWSCCA', 'ETN', 'CMXXXU', '特別股', 'EPNRAR', 'EPNRQR', 'EPRRQR',
       'EPNNFR', 'EPNRFR', 'ETF', 'CEOGEU', 'CEOGDU', 'CEOGMU', 'CEOGBU',
       'CEOGCU', 'CEOJEU', 'CEOIBU', 'CEOIEU', 'CEOIRU', 'CEOJLU',
       '臺灣存託憑證(TDR)', 'EDSDDR', '受益證券-不動產投資信託', 'CBCIXU'], dtype=object)

In [4]:
stock_ref = df[(df[5]=='ESVUFR') | (df[5]=='ESVTFR')].reset_index()[[0,2,3,4]]
stock_ref.head()

Unnamed: 0,0,2,3,4
0,1101　台泥,1962/02/09,上市,水泥工業
1,1102　亞泥,1962/06/08,上市,水泥工業
2,1103　嘉泥,1969/11/14,上市,水泥工業
3,1104　環泥,1971/02/01,上市,水泥工業
4,1108　幸福,1990/06/06,上市,水泥工業


In [5]:
stock_ref.columns = ['name_and_no','ipo_date','market','industry']

stock_ref['stock_no'] = stock_ref['name_and_no'].apply(lambda x: x.split()[0])
stock_ref['stock_name'] = stock_ref['name_and_no'].apply(lambda x: x.split()[1])

stock_ref = stock_ref[['stock_no','stock_name','ipo_date','market','industry']]

In [6]:
mystock = get_historical_stock_data(20180930, 2330)

In [7]:
#get_stock_df(mystock)