In [1]:
import requests
import pandas as pd
from pprint import pprint
from bs4 import BeautifulSoup

In [2]:
# 目標網址
url = "https://www.twse.com.tw/rwd/zh/afterTrading/MI_INDEX?date=20251107&type=ALLBUT0999&response=html"

In [3]:
# 取得資料
resp = requests.get(url)

In [4]:
# 確認 HTTP 狀態碼
print(resp.status_code)

# 若 HTTP 狀態碼有問題(4xx/5xx) 則拋出錯誤提示
resp.raise_for_status()

200


In [5]:
# 輸出取得內容
pprint(resp.text)

('<!doctype html>\r\n'
 '<html lang="zh">\r\n'
 '<head>\r\n'
 '    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">\r\n'
 '    <meta http-equiv="X-UA-Compatible" content="IE=edge">\r\n'
 '    <title> 報表 - TWSE 臺灣證券交易所 </title>\r\n'
 '    <style>\r\n'
 '    table { border-collapse: collapse; margin: 1em 0; }\r\n'
 '    table, th, td { border: 1px solid lightgray; }\r\n'
 '    table thead th, table thead td { text-align: center; background-color: '
 '#eee; }\r\n'
 '    table thead div { font-size: 1.5em; padding: 10px; }\r\n'
 '    table tbody tr:nth-child(even){ background-color:#f8f8f8; }\r\n'
 '    td { padding: 4px; }\r\n'
 '    td>p { margin: 0; text-align: center; }\r\n'
 '    div.notes { margin: 2em 0 4em 0; line-height: 1.5em; }\r\n'
 '    table tbody td { text-align: right; }\r\n'
 '    table tbody td:first-child { text-align: left; }\r\n'
 '    table tbody td:first-child + td { text-align: right; }\r\n'
 '    </style>\r\n'
 '</head>\r\n'
 '<body>\r\n'
 '<div>

In [6]:
# 轉為BS4格式
soup = BeautifulSoup(resp.text, "html.parser")

In [7]:
print(soup)

<!DOCTYPE html>

<html lang="zh">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<title> 報表 - TWSE 臺灣證券交易所 </title>
<style>
    table { border-collapse: collapse; margin: 1em 0; }
    table, th, td { border: 1px solid lightgray; }
    table thead th, table thead td { text-align: center; background-color: #eee; }
    table thead div { font-size: 1.5em; padding: 10px; }
    table tbody tr:nth-child(even){ background-color:#f8f8f8; }
    td { padding: 4px; }
    td>p { margin: 0; text-align: center; }
    div.notes { margin: 2em 0 4em 0; line-height: 1.5em; }
    table tbody td { text-align: right; }
    table tbody td:first-child { text-align: left; }
    table tbody td:first-child + td { text-align: right; }
    </style>
</head>
<body>
<div>
<table>
<thead>
<tr>
<th colspan="6">
<div>114年11月07日 價格指數(臺灣證券交易所)</div>
</th>
</tr>
<tr>
<th>指數</th>
<th>收盤指數</th>
<th>漲跌(+/-)</th>
<th>漲跌點數</th>
<th>漲跌百分比(%)</th>

In [8]:
# 取得table節點
tables = soup.find_all("table")

In [None]:
# 取得目標table位置
t = tables[8]
t

In [None]:
# 整理資料
headers = []
rows = []

# 解析 header（th）
thead = t.find("thead")
if thead:
    for th in thead.find_all("th"):
        headers.append(th.get_text(strip=True))

# 解析 rows（td）
tbody = t.find("tbody")
if tbody:
    for tr in tbody.find_all("tr"):
        cells = [td.get_text(strip=True) for td in tr.find_all("td")]
        rows.append(cells)

In [None]:
# 調整header
headers = headers[1:]

In [None]:
# 轉為Df
df = pd.DataFrame(rows, columns=headers)
df

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,0050,元大台灣50,111187957,136880,6952850646,62.65,62.75,62.40,62.55,-,0.75,62.55,380,62.60,1018,0.00
1,0051,元大中型100,42616,333,3888260,90.75,91.75,90.75,91.00,-,0.90,91.00,5,91.05,1,0.00
2,0052,富邦科技,3564264,11186,896588623,252.60,252.60,250.75,251.50,-,3.35,251.45,2,251.50,62,0.00
3,0053,元大電子,20137,148,2772275,138.30,138.30,137.00,138.10,-,1.90,137.50,2,137.80,1,0.00
4,0055,元大MSCI金融,150905,375,4630099,30.85,30.85,30.60,30.61,-,0.24,30.58,2,30.61,57,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1316,9944,新麗,9798,40,164936,16.85,17.00,16.70,16.85,,0.00,16.85,1,16.90,1,0.00
1317,9945,潤泰新,2377556,1502,71297485,30.15,30.15,29.80,29.80,-,0.35,29.80,172,29.95,19,8.19
1318,9946,三發地產,143873,111,2373122,16.70,16.70,16.40,16.55,-,0.20,16.55,8,16.65,6,51.72
1319,9955,佳龍,225893,192,6422433,28.30,28.55,28.25,28.50,+,0.25,28.50,5,28.55,3,0.00
