### `ETF資料傳輸系統_每日申購贖回資料爬蟲系統`
> 表定15:30所有申購贖回資料都會上系統，實際上需要等到16:00再進行操作比較好

> U:\【指數投資部】\證交所造市商報價\ETF_DB\0.開發中\每階段程式碼整理_除錯用，固定存放每階段HTML檔案

> 檔案餵給ChatGPT就可以解決問題

In [None]:
import asyncio
import os
from playwright.async_api import async_playwright
from datetime import datetime, timedelta

today = datetime.today().strftime('%Y%m%d')
today

# 使用者帳密資訊
ETF_ID = "ETF ID"
USER_ID = "USER_ID"
PASSWORD = "ETF PASSWORD"

# 儲存除錯 HTML 的資料夾
DEBUG_DIR = "U:/【指數投資部】/證交所造市商報價/ETF_DB/0.開發中/每階段程式碼整理_除錯用"
os.makedirs(DEBUG_DIR, exist_ok=True)

# 儲存 HTML 成 txt
async def save_txt(filename, content):
    full_path = os.path.join(DEBUG_DIR, filename)
    with open(full_path, "w", encoding="utf-8") as f:
        f.write(content)
    print(f"📝 已儲存 HTML：{full_path}")

# 主流程
async def run():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False,
                                           executable_path="C:/Program Files/Google/Chrome/Application/chrome.exe",
                                           proxy={"server": "http://99999"})
        context = await browser.new_context()
        page = await context.new_page()

        await page.goto("https://clear.twse.com.tw/etf/FSys_Logon")
        await asyncio.sleep(2)

        # 填寫登入資料
        await page.fill("input[name='BRKID']", ETF_ID)
        await page.fill("input[name='USERID']", USER_ID)
        await page.fill("input[name='PASSWORD']", PASSWORD)
        await page.evaluate("document.getElementById('IsNeedCheckCA').value = '0'")
        await page.click("input#send")
        await asyncio.sleep(2)

        await page.wait_for_load_state("networkidle")
        await save_txt("step_1_after_login.txt", await page.content())
        await asyncio.sleep(2)

        # 強制登入（若遇已登入使用者）
        if "強制登出前一個已登入使用者" in await page.content():
            print("⚠️ 偵測到已登入使用者，準備強制登入...")
            await page.click("input[type='submit']")  # 點選「確定」
            await page.wait_for_load_state("networkidle")
            await save_txt("step_1_force_login.txt", await page.content())
            await asyncio.sleep(2)

        login_success = False
        for _ in range(20):
            content = await page.content()
            if "<frameset" in content and "workspace" in content:
                login_success = True
                break
            await asyncio.sleep(0.5)

        if not login_success:
            print("❌ 登入後頁面未切換到主畫面（frameset 未出現）")
            await save_txt("error_after_login.txt", await page.content())
            return
        

        # 🔧 等待 frame[name='workspace'] 出現
        try:
            await page.wait_for_selector("frame[name='workspace']", timeout=10000)
        except:
            print("❌ workspace frame 等待逾時")
            await save_txt("error_no_workspace_frame.txt", await page.content())
            return
        
        # 🔄 嘗試取得 frame 物件
        top_frame = None
        for _ in range(20):
            top_frame = page.frame(name="workspace")
            if top_frame:
                break
            await asyncio.sleep(0.5)

        if not top_frame:
            print("❌ 找不到上層 workspace frame")
            print("📦 現有 frames:", [f.name for f in page.frames])
            return
        
        await asyncio.sleep(2)
        await top_frame.click("text=檔案下載")
        await asyncio.sleep(2)
        await save_txt("step_2_after_click_download.txt", await page.content())

        # 下層 frame
        try:
            await page.wait_for_selector("frame[name='result_messages']", timeout=10000)
        except:
            print("❌ result_messages frame 等待逾時")
            return

        bottom_frame = None
        for _ in range(20):
            bottom_frame = page.frame(name="result_messages")
            if bottom_frame and "ETF_D_RCV" in (bottom_frame.url or ""):
                print("✅ result_messages frame 載入 ETF_D_RCV")
                break
            await asyncio.sleep(0.5)
        else:
            print("❌ result_messages 沒有載入 ETF_D_RCV")
            return

        await asyncio.sleep(2)
        await save_txt("step_3_result_page.txt", await bottom_frame.content())

        # 防呆：如果沒有任何資料列（只有表頭）
        rows = await bottom_frame.query_selector_all("table.resultTable tr")
        if len(rows) <= 1:
            print("⚠️ 目前下層沒有可下載的檔案。")
            return

            # 抓取所有包含 "M27" 的連結並下載
        links = await bottom_frame.query_selector_all("a")
        for link in links:
            href = await link.get_attribute("href")

            if href and "fname=MA1" in href:
                full_link = f"https://clear.twse.com.tw{href}"
                print(f"✅ 準備下載：{full_link}")
                
                # 等待並保存下載的檔案
                async with page.expect_download() as download_info:
                    await bottom_frame.click(f"a[href='{href}']")
                download = await download_info.value
                clean_name = os.path.basename(href).split('?')[0] 
                new_file_path = f"U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA1/MA1-{today}.dat"
                await download.save_as(new_file_path)
                print(f"✅ 檔案重新命名並儲存完成，路徑：{new_file_path}")

            if href and "fname=MA2" in href:
                full_link = f"https://clear.twse.com.tw{href}"
                print(f"✅ 準備下載：{full_link}")
                
                # 等待並保存下載的檔案
                async with page.expect_download() as download_info:
                    await bottom_frame.click(f"a[href='{href}']")
                download = await download_info.value
                clean_name = os.path.basename(href).split('?')[0] 
                new_file_path = f"U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA2/MA2-{today}.dat"
                await download.save_as(new_file_path)
                print(f"✅ 檔案重新命名並儲存完成，路徑：{new_file_path}")

            if href and "fname=MA3" in href:
                full_link = f"https://clear.twse.com.tw{href}"
                print(f"✅ 準備下載：{full_link}")
                
                # 等待並保存下載的檔案
                async with page.expect_download() as download_info:
                    await bottom_frame.click(f"a[href='{href}']")
                download = await download_info.value
                clean_name = os.path.basename(href).split('?')[0] 
                new_file_path = f"U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA3/MA3-{today}.dat"
                await download.save_as(new_file_path)
                print(f"✅ 檔案重新命名並儲存完成，路徑：{new_file_path}")

            if href and "fname=MA5" in href:
                full_link = f"https://clear.twse.com.tw{href}"
                print(f"✅ 準備下載：{full_link}")
                
                # 等待並保存下載的檔案
                async with page.expect_download() as download_info:
                    await bottom_frame.click(f"a[href='{href}']")
                download = await download_info.value
                clean_name = os.path.basename(href).split('?')[0] 
                new_file_path = f"U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA5/MA5-{today}.dat"
                await download.save_as(new_file_path)
                print(f"✅ 檔案重新命名並儲存完成，路徑：{new_file_path}")

        await browser.close()


# 若你在 Jupyter / VSCode .ipynb 裡執行，用這段方式呼叫
import nest_asyncio
nest_asyncio.apply()
await run()

📝 已儲存 HTML：U:/【指數投資部】/證交所造市商報價/ETF_DB/0.開發中/每階段程式碼整理_除錯用\step_1_after_login.txt
📝 已儲存 HTML：U:/【指數投資部】/證交所造市商報價/ETF_DB/0.開發中/每階段程式碼整理_除錯用\step_2_after_click_download.txt
✅ result_messages frame 載入 ETF_D_RCV
📝 已儲存 HTML：U:/【指數投資部】/證交所造市商報價/ETF_DB/0.開發中/每階段程式碼整理_除錯用\step_3_result_page.txt
✅ 準備下載：https://clear.twse.com.tw/etf/fileTransfer/SaveRecord.jsp?etfid=0098&fname=MA5-20250327-0315421771R.dat
✅ 檔案重新命名並儲存完成，路徑：U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA5/MA5-20250327.dat
✅ 準備下載：https://clear.twse.com.tw/etf/fileTransfer/SaveRecord.jsp?etfid=0098&fname=MA3-20250327-0315421289R.dat
✅ 檔案重新命名並儲存完成，路徑：U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA3/MA3-20250327.dat
✅ 準備下載：https://clear.twse.com.tw/etf/fileTransfer/SaveRecord.jsp?etfid=0098&fname=MA2-20250327-0315420712R.dat
✅ 檔案重新命名並儲存完成，路徑：U:/【指數投資部】/證交所造市商報價/ETF_DB/1.ETF 歷史資料/MA2/MA2-20250327.dat
✅ 準備下載：https://clear.twse.com.tw/etf/fileTransfer/SaveRecord.jsp?etfid=0098&fname=MA1-20250327-0314064613R.dat
✅ 檔案重新命名並儲存完成，路徑：U:/【指數投資部】/證交所造市商報價/ETF_D