# 產生台股上市上櫃一覽表

表格說明：有價證券代號/名稱/國際證券辨識號碼(ISIN Code)/上市日/市場別/產業別/CFICode

英文對照：stock_id/stock_name/ISIN_Code/Listing_date/Listing_category/Industry_category/CFICode

In [None]:
import requests
import pandas as pd
from datetime import datetime
import time
import random

urllist=[2,4]#已知上市上櫃股票的清單分別為在第二和第四子網頁
starttime=datetime.now()#存放爬下來的資料#紀錄爬取該支股票要多久
df=pd.DataFrame()#存放爬下來的資料

for obj in urllist:
    url = "http://isin.twse.com.tw/isin/C_public.jsp?strMode="+str(obj)
    data = pd.read_html(requests.get(url).text)[0]
    df = df.append(data)
    time.sleep (random.randrange(1, 10))
    
mask = df[5] == "ESVUFR"
df = df[mask].reset_index(drop=True)
df = df.iloc[:,0:6]
df.columns=['有價證券代號及名稱','國際證券辨識號碼(ISIN Code)','上市日','市場別','產業別','CFICode']
df['有價證券代號'] = df['有價證券代號及名稱'].str.split('\s+').str.get(0)
df['名稱'] = df['有價證券代號及名稱'].str.split('\s+').str.get(1)
df["上市日"] = pd.to_datetime(df["上市日"], format='%Y/%m/%d')
df = df[['有價證券代號','名稱','國際證券辨識號碼(ISIN Code)','上市日','市場別','產業別','CFICode']]
df.columns=["stock_id","stock_name",'ISIN_Code','Listing_date','Listing_category','Industry_category','CFICode']
df.to_csv("stock_list.csv",encoding='utf-8', index=False)

endtime= datetime.now() #紀錄爬取該支股票要多久
print("執行時間:",endtime-starttime,"秒") #紀錄爬取該支股票要多久

# 證交所單股每日交易量 (爬取上市股票）
表格說明：股票代碼/公司名稱/日期/成交量/成交金額/開盤價/最高價/最低價/收盤價/漲跌幅/交易筆數

英文對照：stock_id/stock_name/date/Volume/Volume_Cash/Open/High/Low/Close/Change/Order

In [3]:
def Stock_crawl(stockNo,Name):
    import pandas as pd
    import numpy as np
    import json
    import requests
    import time
    import random
    from datetime import datetime

    dates=[]
#     for i in range (2010,2022): #證交所只提供99年起的資料。
    for i in range (2021,2022): #這行供小量測試。
        for j in range(1,13):
            if j <10:
                j="0"+str(j)
            dates.append(str(i)+str(j)+str("01"))
    starttime=datetime.now() #紀錄多久

    json_list=[]
    json_data=[]
    for Date in dates:
        try:
            url = f'https://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date={Date}&stockNo={stockNo}'

            data = requests.get(url).text
            json_data = json.loads(data)
            json_list.append(json_data['data'])
            time.sleep (random.randrange(1, 10))
        except:
            print("No data of",Date,Name)
            time.sleep (random.randrange(1, 10))
    try:
        json=[] 
        json_stock=[]
        for i in range (0, len(json_list)):
            for j in range(0,len(json_list[i])):
                json_stock.append(json_list[i][j])

        for j in range(0,(len(json_stock))):
            StockPrice = pd.DataFrame(json_stock, columns = ['date','Volume','Volume_Cash','Open','High','Low','Close','Change','Order'])

            StockPrice['date'] = StockPrice['date'].str.replace('/','').astype(int) + 19110000
            StockPrice['date'] = pd.to_datetime(StockPrice['date'].astype(str))
            StockPrice['Volume'] = StockPrice['Volume'].str.replace(',','').astype(float)
            StockPrice['Volume_Cash'] = StockPrice['Volume_Cash'].str.replace(',','').astype(float)
            StockPrice['Order'] = StockPrice['Order'].str.replace(',','').astype(float)

            StockPrice['Open'] = StockPrice['Open'].str.replace(',','').astype(float)
            StockPrice['High'] = StockPrice['High'].str.replace(',','').astype(float)
            StockPrice['Low'] = StockPrice['Low'].str.replace(',','').astype(float)
            StockPrice['Close'] = StockPrice['Close'].str.replace(',','').astype(float)
            StockPrice['Change'] = StockPrice['Change'].str.replace(',','').str.replace('+','').str.replace('X','').astype(float)
            StockPrice.insert(0,column='stock_id',value=stockNo)
            StockPrice.insert(1,column='stock_name',value=Name)
            StockPrice = StockPrice[['stock_id','stock_name','date','Volume','Volume_Cash','Open','High','Low','Close','Change','Order']]
            #中文對照：股票代碼/公司名稱/日期/成交量/成交金額/開盤價/最高價/最低價/收盤價/漲跌幅/交易筆數
    except:
        print("Woops, Something wrong~")

    file_name = "{}_daily.csv".format(stockNo)
    StockPrice.to_csv(file_name, index=False)
    print(file_name+"下載完成...", end="")
    endtime= datetime.now() #紀錄爬取該支股票要多久
    print("執行時間:",endtime-starttime,"秒") #紀錄爬取該支股票要多久

In [4]:
import csv
with open('stock_list_for_test.csv') as f:#供小型測試用
# with open('stock_list.csv') as f:
    list_of_stock = csv.DictReader(f)
    for obj in list_of_stock:
        Stock_crawl(obj['stock_id'],obj['stock_name'])

No data of 20210501 台泥
No data of 20210601 台泥
No data of 20210701 台泥
No data of 20210801 台泥
No data of 20210901 台泥
No data of 20211001 台泥
No data of 20211101 台泥
No data of 20211201 台泥
1101_daily.csv下載完成...執行時間: 0:01:06.839531 秒
