In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 按下查詢按鈕
    search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@class='button search']")))
    search_button.click()
    print("Successfully clicked the search button.")

    # 設置一個時間間隔以確保頁面刷新後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")

    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用證券代號來抓取指定的公司資料 (2603: 長榮, 2609: 陽明, 2615: 萬海)
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (2603, 2609, 2615). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期，並使用 YYYY-MM-DD 格式保存文件
        today = datetime.date.today()
        year_str = today.strftime('%y')[-2:]
        date_str = today.strftime(year_str + '-%m%d')
        
        # 將數據寫入 CSV 文件，文件名包含日期
        filename = f'top_investors_daily_{date_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 創建精簡版的 CSV 文件，只保存特定欄位
        simplified_headers = [headers[i] for i in [0, 1, 4, 10, 11, 18]]
        simplified_data = [[row[i] for i in [0, 1, 4, 10, 11, 18]] for row in data]
        simplified_filename = f'top_investors_daily_simplified_{date_str}.csv'
        with open(simplified_filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(simplified_headers)
            writer.writerows(simplified_data)
        
        print(f"Simplified data has been written to {simplified_filename}")
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully clicked the search button.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
Row data for 2609: ['2609', '陽明', '34,208,090', '12,595,128', '21,612,962', '0', '0', '0', '148,000', '0', '148,000', '4,404,522', '2,185,128', '173,000', '2,012,128', '2,716,657', '324,263', '2,392,394', '26,165,484']
Row data for 2615: ['2615', '萬海', '26,739,250', '13,065,975', '13,673,275', '0', '0', '0', '1,283,000', '0', '1,283,000', '2,251,646', '2,385,718', '888,998', '1,496,720', '1,480,188', '725,262', '754,926', '17,207,921']
Row data for 2603: ['2603

# 以下都是中間錯誤的程式過程參考


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(3)

    # 點擊 "查詢" 按鈕以獲取更新後的數據
    search_button = driver.find_element(By.XPATH, "//input[@type='submit']")
    search_button.click()

    # 設置另一個時間間隔，等待數據加載
    time.sleep(5)

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。


An error occurred: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//input[@type='submit']"}
  (Session info: chrome=130.0.6723.92)
Stacktrace:
0   chromedriver                        0x000000010110f648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x0000000101107ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000100b74104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000100bb6364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000100befbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x0000000100baaf54 cxxbridge1$string$len + 313264
6   chromedriver                        0x0000000100babba4 cxxbridge1$string$len + 316416
7   chromedriver                        0x00000001010da1e8 cxxbridge1$str$ptr + 3427196
8   chromedriver                        0x00000001010dd52c cxxbridge1$str$ptr + 3440320
9   chromedriver     

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(3)

    # 使用新的查詢按鈕選擇器
    search_button = driver.find_element(By.CSS_SELECTOR, "button.btn")
    search_button.click()

    # 設置另一個時間間隔，等待數據加載
    time.sleep(5)

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。


An error occurred: Message: no such element: Unable to locate element: {"method":"css selector","selector":"button.btn"}
  (Session info: chrome=130.0.6723.92)
Stacktrace:
0   chromedriver                        0x000000010147f648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x0000000101477ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000100ee4104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000100f26364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000100f5fbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x0000000100f1af54 cxxbridge1$string$len + 313264
6   chromedriver                        0x0000000100f1bba4 cxxbridge1$string$len + 316416
7   chromedriver                        0x000000010144a1e8 cxxbridge1$str$ptr + 3427196
8   chromedriver                        0x000000010144d52c cxxbridge1$str$ptr + 3440320
9   chromedriver           

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(3)

    # 使用新的查詢按鈕選擇器
    try:
        search_button = driver.find_element(By.CSS_SELECTOR, "button.btn")
        search_button.click()
        print("Successfully clicked the search button.")
    except Exception as e:
        print("Failed to locate or click the search button. Exception:", e)
        raise

    # 設置另一個時間間隔，等待數據加載
    time.sleep(5)

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Failed to locate or click the search button. Exception: Message: no such element: Unable to locate element: {"method":"css selector","selector":"button.btn"}
  (Session info: chrome=130.0.6723.92)
Stacktrace:
0   chromedriver                        0x0000000102e1f648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x0000000102e17ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000102884104 cxxbridge1$string$len + 88416
3   chromedriver                        0x00000001028c6364 cxxbridge1$string$len + 359360
4   chromedriver                        0x00000001028ffbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x00000001028baf54 cxxbridge1$string$len + 313264
6   chromedriver                        0x00000001028bbba4 cxxbridge1$string$len 

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(3)

    # 使用新的查詢按鈕選擇器，嘗試使用 XPath 找到包含 "查詢" 文字的按鈕
    try:
        search_button = driver.find_element(By.XPATH, "//input[@type='button' and @value='查詢']")
        search_button.click()
        print("Successfully clicked the search button.")
    except Exception as e:
        print("Failed to locate or click the search button. Exception:", e)
        raise

    # 設置另一個時間間隔，等待數據加載
    time.sleep(5)

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Failed to locate or click the search button. Exception: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//input[@type='button' and @value='查詢']"}
  (Session info: chrome=130.0.6723.92)
Stacktrace:
0   chromedriver                        0x00000001051c3648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x00000001051bbea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000104c28104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000104c6a364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000104ca3bd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x0000000104c5ef54 cxxbridge1$string$len + 313264
6   chromedriver                        0x0000000104c5fba4 

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(3)

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。


Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
Data has been written to shipping_share_distribution_2024_w45.csv


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮(2603)、陽明(2609)、萬海(2615)的資料
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (2603, 2609, 2615). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。


Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
No data found for the specified companies (2603, 2609, 2615). Please check if the table has loaded correctly.
Data has been written to shipping_share_distribution_2024_w45.csv


In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮、陽明、萬海的資料
            if row_data and row_data[1] in ['長榮', '陽明', '萬海']:
                data.append(row_data)
                print(f"Row data for {row_data[1]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (長榮, 陽明, 萬海). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
No data found for the specified companies (長榮, 陽明, 萬海). Please check if the table has loaded correctly.
Data has been written to shipping_share_distribution_2024_w45.csv


In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 打印整個表格的 HTML 內容，確認其是否正確加載
    table_html = table.get_attribute('outerHTML')
    with open('table_snapshot.html', 'w', encoding='utf-8') as f:
        f.write(table_html)
    print("Table HTML content has been saved to 'table_snapshot.html' for verification.")

    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮、陽明、萬海的資料
            if row_data and row_data[1] in ['長榮', '陽明', '萬海']:
                data.append(row_data)
                print(f"Row data for {row_data[1]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (長榮, 陽明, 萬海). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Table HTML content has been saved to 'table_snapshot.html' for verification.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
No data found for the specified companies (長榮, 陽明, 萬海). Please check if the table has loaded correctly.
Data has been written to shipping_share_distribution_2024_w45.csv


In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 按下查詢按鈕
    search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@class='button search']")))
    search_button.click()
    print("Successfully clicked the search button.")

    # 設置一個時間間隔以確保頁面刷新後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")
    
    # 打印整個表格的 HTML 內容，確認其是否正確加載
    table_html = table.get_attribute('outerHTML')
    with open('table_snapshot.html', 'w', encoding='utf-8') as f:
        f.write(table_html)
    print("Table HTML content has been saved to 'table_snapshot.html' for verification.")

    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用更精確的條件來確保只抓取長榮、陽明、萬海的資料
            if row_data and row_data[1] in ['長榮', '陽明', '萬海']:
                data.append(row_data)
                print(f"Row data for {row_data[1]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (長榮, 陽明, 萬海). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期並計算是第幾週
        today = datetime.date.today()
        year, week_num, _ = today.isocalendar()
        week_str = f'w{week_num:02}'
        
        # 將數據寫入 CSV 文件，文件名包含年份和第幾週
        filename = f'shipping_share_distribution_{year}_{week_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 印出長榮、陽明、萬海的數據
        for row in data:
            print(row)
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。

Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully clicked the search button.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Table HTML content has been saved to 'table_snapshot.html' for verification.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
Row data for 陽明: ['2609', '陽明', '34,208,090', '12,595,128', '21,612,962', '0', '0', '0', '148,000', '0', '148,000', '4,404,522', '2,185,128', '173,000', '2,012,128', '2,716,657', '324,263', '2,392,394', '26,165,484']
Row data for 萬海: ['2615', '萬海', '26,739,250', '13,065,975', '13,673,275', '0', '0', '0', '1,283,000', '0', '1,283,000', '2,251,646', '2,385,718', '888,998', '1,496,720', '

In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import csv
import datetime
import chromedriver_autoinstaller
import time

# 設置 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument('--headless')  # 隱藏瀏覽器窗口
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# 自動安裝 ChromeDriver
chromedriver_autoinstaller.install()

# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)

# 訪問網站
url = "https://www.twse.com.tw/pcversion/zh/page/trading/fund/T86.html"
driver.get(url)

try:
    # 設置一個隨機時間間隔來避免過於頻繁的訪問，減少被封禁的風險
    time.sleep(5)  # 可以根據需要調整等待的秒數，這裡設置為 5 秒

    # 等待下拉選單元素加載出來
    wait = WebDriverWait(driver, 10)
    select_element = wait.until(EC.presence_of_element_located((By.NAME, 'selectType')))
    print("Successfully located select element for industry selection.")
    
    # 選擇 "航運業"
    select = Select(select_element)
    select.select_by_value('15')
    print("Successfully selected '航運業'.")

    # 設置另一個時間間隔以確保下拉選單更改後的穩定性
    time.sleep(3)

    # 按下查詢按鈕
    search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@class='button search']")))
    search_button.click()
    print("Successfully clicked the search button.")

    # 設置一個時間間隔以確保頁面刷新後的穩定性
    time.sleep(5)

    # 等待表格加載完成後設置每頁顯示為 "全部"
    page_size_select_element = wait.until(EC.presence_of_element_located((By.NAME, 'report-table_length')))
    print("Successfully located page size selection element.")
    page_size_select = Select(page_size_select_element)
    page_size_select.select_by_value('-1')
    print("Successfully set page size to '全部'.")

    # 設置一個時間間隔以確保設置每頁顯示後的穩定性
    time.sleep(10)  # 增加延遲以確保資料已完全加載

    # 等待表格元素加載出來
    table = wait.until(EC.presence_of_element_located((By.ID, 'report-table')))
    print("Successfully located the report table.")

    # 找到所有行元素
    rows = table.find_elements(By.TAG_NAME, "tr")
    data = []
    headers = []
    for idx, row in enumerate(rows):
        if idx == 0:  # 第一行是表格的標題
            headers = [col.text.strip() for col in row.find_elements(By.TAG_NAME, "th")]
            print(f"Headers: {headers}")
        else:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            # 使用證券代號來抓取指定的公司資料 (2603: 長榮, 2609: 陽明, 2615: 萬海)
            if row_data and row_data[0] in ['2603', '2609', '2615']:
                data.append(row_data)
                print(f"Row data for {row_data[0]}: {row_data}")

    # 如果沒有抓取到任何資料，打印提示訊息
    if not data:
        print("No data found for the specified companies (2603, 2609, 2615). Please check if the table has loaded correctly.")
    
    # 如果表格有表頭信息，將其與表格數據合併
    if headers:
        # 獲取當前日期，並使用 YYYY-MM-DD 格式保存文件
        today = datetime.date.today()
        date_str = today.strftime('%Y-%m-%d')
        
        # 將數據寫入 CSV 文件，文件名包含日期
        filename = f'shipping_share_distribution_{date_str}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(data)
        
        print(f"Data has been written to {filename}")
        
        # 創建精簡版的 CSV 文件，只保存特定欄位
        simplified_headers = [headers[i] for i in [0, 1, 4, 10, 11, 18]]
        simplified_data = [[row[i] for i in [0, 1, 4, 10, 11, 18]] for row in data]
        simplified_filename = f'shipping_share_distribution_simplified_{date_str}.csv'
        with open(simplified_filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(simplified_headers)
            writer.writerows(simplified_data)
        
        print(f"Simplified data has been written to {simplified_filename}")
    else:
        print("Failed to find headers in the table.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # 關閉瀏覽器
    driver.quit()

# 注意：
# 這段程式碼使用 Selenium 來模擬瀏覽器行為，因此可以抓取動態加載的數據。
# 請確保您已安裝 ChromeDriver，並根據您的環境設置 PATH。
# 加入了多個時間間隔 (time.sleep) 來減少過於頻繁的請求，從而減少被封禁的風險。


Successfully located select element for industry selection.
Successfully selected '航運業'.
Successfully clicked the search button.
Successfully located page size selection element.
Successfully set page size to '全部'.
Successfully located the report table.
Headers: ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']
Row data for 2609: ['2609', '陽明', '34,208,090', '12,595,128', '21,612,962', '0', '0', '0', '148,000', '0', '148,000', '4,404,522', '2,185,128', '173,000', '2,012,128', '2,716,657', '324,263', '2,392,394', '26,165,484']
Row data for 2615: ['2615', '萬海', '26,739,250', '13,065,975', '13,673,275', '0', '0', '0', '1,283,000', '0', '1,283,000', '2,251,646', '2,385,718', '888,998', '1,496,720', '1,480,188', '725,262', '754,926', '17,207,921']
Row data for 2603: ['2603