# 目前只使用以下package 抓取網頁資訊

In [15]:
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
import json

## 抓取TWSE的json資訊

In [17]:
stock = "tse_2317.tw|tse_2002.tw"
now = dt.datetime.now()
timestamp = dt.datetime.timestamp(now)

url = "https://mis.twse.com.tw/stock/api/getStockInfo.jsp?ex_ch={StockNo}&json=1&delay=0&_={timeInterval}".format(StockNo=stock,timeInterval=timestamp )
req = requests.get(url)
req = json.loads(req.text.replace('\r','').replace('\n',''))
req['msgArray']

[{'ts': '0',
  'fv': '417',
  'tk0': '2317.tw_tse_20191104_B_9999036583',
  'tk1': '2317.tw_tse_20191104_B_9999013377',
  'oa': '88.50',
  'ob': '88.40',
  'tlong': '1572849000000',
  'ot': '14:30:00',
  'f': '900_472_614_376_424_',
  'ex': 'tse',
  'g': '1041_213_179_264_278_',
  'ov': '921158',
  'd': '20191104',
  'it': '12',
  'b': '88.90_88.80_88.70_88.60_88.50_',
  'c': '2317',
  'mt': '000000',
  'a': '89.00_89.10_89.20_89.30_89.40_',
  'n': '鴻海',
  'o': '86.10',
  'l': '86.10',
  'oz': '88.50',
  'h': '89.10',
  'ip': '0',
  'i': '31',
  'w': '76.40',
  'v': '154486',
  'u': '93.20',
  't': '13:30:00',
  's': '6038',
  'pz': '89.00',
  'tv': '6038',
  'p': '0',
  'nf': '鴻海精密工業股份有限公司',
  'ch': '2317.tw',
  'z': '89.00',
  'y': '84.80',
  'ps': '5894'},
 {'ts': '0',
  'fv': '108',
  'tk0': '2002.tw_tse_20191104_B_9999036619',
  'tk1': '2002.tw_tse_20191104_B_9999013197',
  'oa': '23.55',
  'ob': '23.50',
  'tlong': '1572849000000',
  'ot': '14:30:00',
  'f': '702_1081_931_524_266

### 稍微列一下比較清楚的資料
* f : 賣出數量
* ex : 交易所
* g : 買進數量
* d : 交易日
* b : 買進價格
* c : 股票代號
* a : 賣出價格
* n : 股票名稱
* l : 最低價
* h : 最高價
* w : 跌停價
* v : 總量
* u : 漲跌價
* t : 報價
* s : 整股最後一盤交易量
* tv : 單量
* nf : 公司名稱
* ch : 股票代號(全)
* z : 成交價/收盤價
* y : 平盤價

In [19]:
df = pd.DataFrame(req['msgArray'])
df['y'] =  df['y'].astype(float) #平盤價
df['z'] =  df['z'].astype(float) #成交價/收盤價
df['y'] = round((df["z"]/df["y"] - 1) *100,2) #算出漲幅
df['Point'] = df['z']- df['y']
df = df.loc[:,["ex","c","n","ch","z","y","Point"]]
df = df.rename(columns={"ex":"Exchange","c":"StockNo","n":"StockName","ch":"StockNoWithMarket","z":"ClosingPrice","y":"Change"})

In [20]:
df

Unnamed: 0,Exchange,StockNo,StockName,StockNoWithMarket,ClosingPrice,Change,Point
0,tse,2317,鴻海,2317.tw,89.0,4.95,84.05
1,tse,2002,中鋼,2002.tw,23.6,0.21,23.39


抓取股票基本資料(網頁有點慢)
--------------------------------------------------------------------

In [30]:
respStockList = requests.get("http://isin.twse.com.tw/isin/C_public.jsp?strMode=2")
df = pd.read_html(respStockList.text)[0]
df.columns = df.iloc[0]
df = df.dropna(thresh=3, axis=0).dropna(thresh=3, axis=1)
df

Unnamed: 0,有價證券代號及名稱,國際證券辨識號碼(ISIN Code),上市日,市場別,產業別,CFICode,備註
0,有價證券代號及名稱,國際證券辨識號碼(ISIN Code),上市日,市場別,產業別,CFICode,備註
1,股票,股票,股票,股票,股票,股票,股票
2,1101　台泥,TW0001101004,1962/02/09,上市,水泥工業,ESVUFR,
3,1102　亞泥,TW0001102002,1962/06/08,上市,水泥工業,ESVUFR,
4,1103　嘉泥,TW0001103000,1969/11/14,上市,水泥工業,ESVUFR,
...,...,...,...,...,...,...,...
16533,01003T　兆豐新光R1,TW00001003T4,2005/12/26,上市,,CBCIXU,
16534,01004T　土銀富邦R2,TW00001004T2,2006/04/13,上市,,CBCIXU,
16535,01007T　兆豐國泰R2,TW00001007T5,2006/10/13,上市,,CBCIXU,
16536,01009T　王道圓滿R1,TW00001009T1,2018/06/21,上市,,CBCIXU,


In [31]:
#只篩選股票和ETF
prefixes = ['ES','CE'] 
df = df[df['CFICode'].str.startswith(tuple(prefixes))]

In [32]:
df = df.loc[:,["有價證券代號及名稱","市場別"]]
df.rename(columns={"市場別":"Exchange"},inplace=True)
df['Exchange'] = df['Exchange'].replace('上市','tse')

#上市if(mktValue =="2"): 
df[["StockNo","StockName","None"]]=df.有價證券代號及名稱.str.split(expand=True)
#上櫃 else:
#上櫃    df[["StockNo","StockName"]]=df.有價證券代號及名稱.str.split(expand=True)        

df.loc[:,"Exchange":"StockName"] 

Unnamed: 0,Exchange,StockNo,StockName
2,tse,1101,台泥
3,tse,1102,亞泥
4,tse,1103,嘉泥
5,tse,1104,環泥
6,tse,1108,幸福
...,...,...,...
16501,tse,008201,BP上證50
16502,tse,00830,國泰費城半導體
16503,tse,00850,元大臺灣ESG永續
16504,tse,00851,台新全球AI
