In [None]:
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
import logging

# 設置日誌
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def scrape_product_data(product_url):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(options=options)
    driver.get(product_url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    data = {}

    try:
        data['Title'] = driver.find_element(By.XPATH, //li[@class = 'last']).text
    except Exception as e:
        logging.error(f"Error getting Title: {e}")
        data['Title'] = "N/A"
    try:
        data['Price'] = driver.find_element(By.CLASS_NAME, 'price').text
    except Exception as e:
        logging.error(f"Error getting Price: {e}")
        data['Price'] = "N/A"
    try:
        data['Price_Discount'] = driver.find_element(By.CLASS_NAME, 'discount').text
    except Exception as e:
        logging.error(f"Error getting Price_Discount: {e}")
        data['Price_Discount'] = "N/A"
    try:
        data['Rate'] = driver.find_element(By.CLASS_NAME, 'averageRating').text
    except Exception as e:
        logging.error(f"Error getting Rate: {e}")
        data['Rate'] = "N/A"
    try:
        origin_elements = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')
        origin = "N/A"
        for element in origin_elements:
            if "產地" in element.text:
                origin = element.text.split("產地 ")[1]
                break
        data['Origin'] = origin
    except Exception as e:
        logging.error(f"Error getting Origin: {e}")
        data['Origin'] = "N/A"
    try:
        short_desc_top_note = "N/A"
        short_desc_element = driver.find_element(By.CLASS_NAME, 'short-desc').text
        # for line in short_desc_element.split('\n'):
        #     if '前調：' in line:
        #         short_desc_top_note = line.split('：')[1].split('、')
        #         break
        # data['Short_Desc_Top_Note'] = short_desc_top_note
    except Exception as e:
        logging.error(f"Error getting Short_Desc_Top_Note: {e}")
        data['Short_Desc_Top_Note'] = "N/A"

    driver.quit()
    return data

# 初始化 WebDriver 選項以無頭模式運行
options = webdriver.ChromeOptions()
options.add_argument('--headless')

# 初始化 WebDriver
driver = webdriver.Chrome(options=options)
driver.get("https://www.hktvmall.com/hktv/zh/search_a?keyword=%E9%A6%99%E6%B0%B4&sort=hktvProductSellingPriceAscV2&page=0")

# 初始化 WebDriverWait
wait = WebDriverWait(driver, 20)

# 獲取總頁數
Total_Page_element = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="search-result-wrapper"]/div/div[3]/div[3]/div/span'))
).text
page_TOP_num = int(re.search(r'\d+', Total_Page_element).group())

# 創建一個空的 DataFrame
columns = ["Title", "Price", "Price_Discount", "Rate", "Short_Desc_Top_Note", "Origin"]
data = pd.DataFrame(columns=columns)

page_number = 1
product_urls = []
last_page_reached = False

# 在循環外初始化 ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=20)

while page_number <= page_TOP_num and not last_page_reached:
    logging.info(f"正在處理第 {page_number} 頁")
    product_index = 1

    while True:
        try:
            xpath = f'//*[@id="algolia-search-result-container"]/div/div/span[{product_index}]/div/a'
            product_link = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
            product_urls.append(product_link.get_attribute('href'))
            product_index += 1
        except:
            break

    # 從每個商品 URL 抓取數據
    future_to_url = {executor.submit(scrape_product_data, url): url for url in product_urls}
    for future in as_completed(future_to_url):
        try:
            product_data = future.result()
            new_row = pd.DataFrame([product_data])
            data = pd.concat([data, new_row], ignore_index=True)
        except Exception as e:
            logging.error(f"抓取 {future_to_url[future]} 時出錯: {e}")

    # 重置 product_urls 以便處理下一頁
    product_urls = []

    page_number += 1
    try:
        nextPageButton = wait.until(EC.presence_of_element_located((By.XPATH, "//button[contains(@id, 'paginationMenu_nextBtn')]")))
        if "disabled" in nextPageButton.get_attribute("class"):
            logging.info("已經到達最後一頁")
            last_page_reached = True
            break
        
        driver.execute_script("arguments[0].scrollIntoView();", nextPageButton)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", nextPageButton)
        time.sleep(5)
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="algolia-search-result-container"]/div/div/span[1]/div/a')))
    except Exception as e:
        logging.error(f"導航到下一頁時出錯: {e}")
        last_page_reached = True
        break

driver.quit()
executor.shutdown()

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
file_name = f'product_{timestamp}.csv'

data.to_csv(file_name, index=False)
logging.info(f"數據已保存到 {file_name}")

In [None]:
display(data)

test version

In [None]:
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
import logging

# 設置日誌
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def scrape_product_data(product_url):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(options=options)
    driver.get(product_url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    data = {}

    try:
        data['Title'] = driver.find_element(By.XPATH, //li[@class = 'last']).text
    except Exception as e:
        logging.error(f"Error getting Title: {e}")
        data['Title'] = "N/A"
    try:
        data['Price'] = driver.find_element(By.CLASS_NAME, 'price').text
    except Exception as e:
        logging.error(f"Error getting Price: {e}")
        data['Price'] = "N/A"
    try:
        data['Price_Discount'] = driver.find_element(By.CLASS_NAME, 'discount').text
    except Exception as e:
        logging.error(f"Error getting Price_Discount: {e}")
        data['Price_Discount'] = "N/A"
    try:
        data['Rate'] = driver.find_element(By.CLASS_NAME, 'averageRating').text
    except Exception as e:
        logging.error(f"Error getting Rate: {e}")
        data['Rate'] = "N/A"
    try:
        origin_elements = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')
        origin = "N/A"
        for element in origin_elements:
            if "產地" in element.text:
                origin = element.text.split("產地 ")[1]
                break
        data['Origin'] = origin
    except Exception as e:
        logging.error(f"Error getting Origin: {e}")
        data['Origin'] = "N/A"
    try:
        short_desc_top_note = "N/A"
        short_desc_element = driver.find_element(By.CLASS_NAME, 'short-desc').text
        # for line in short_desc_element.split('\n'):
        #     if '前調：' in line:
        #         short_desc_top_note = line.split('：')[1].split('、')
        #         break
        # data['Short_Desc_Top_Note'] = short_desc_top_note
    except Exception as e:
        logging.error(f"Error getting Short_Desc_Top_Note: {e}")
        data['Short_Desc_Top_Note'] = "N/A"

    driver.quit()
    return data

# 初始化 WebDriver 選項以無頭模式運行
options = webdriver.ChromeOptions()
options.add_argument('--headless')

# 初始化 WebDriver
driver = webdriver.Chrome(options=options)
driver.get("https://www.hktvmall.com/hktv/zh/search_a?keyword=%E9%A6%99%E6%B0%B4&sort=hktvProductSellingPriceAscV2&page=0")

# 初始化 WebDriverWait
wait = WebDriverWait(driver, 20)

# 獲取總頁數
Total_Page_element = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="search-result-wrapper"]/div/div[3]/div[3]/div/span'))
).text
page_TOP_num = int(re.search(r'\d+', Total_Page_element).group())

# 創建一個空的 DataFrame
columns = ["Title", "Price", "Price_Discount", "Rate", "Short_Desc_Top_Note", "Origin"]
data = pd.DataFrame(columns=columns)

page_number = 1
product_urls = []
last_page_reached = False

# 在循環外初始化 ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=20)

while page_number <= page_TOP_num and not last_page_reached:
    logging.info(f"正在處理第 {page_number} 頁")
    product_index = 1

    while True:
        try:
            xpath = f'//*[@id="algolia-search-result-container"]/div/div/span[{product_index}]/div/a'
            product_link = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
            product_urls.append(product_link.get_attribute('href'))
            product_index += 1
        except:
            break

    # 從每個商品 URL 抓取數據
    future_to_url = {executor.submit(scrape_product_data, url): url for url in product_urls}
    for future in as_completed(future_to_url):
        try:
            product_data = future.result()
            new_row = pd.DataFrame([product_data])
            data = pd.concat([data, new_row], ignore_index=True)
        except Exception as e:
            logging.error(f"抓取 {future_to_url[future]} 時出錯: {e}")

    # 重置 product_urls 以便處理下一頁
    product_urls = []

    page_number += 1
    try:
        nextPageButton = wait.until(EC.presence_of_element_located((By.XPATH, "//button[contains(@id, 'paginationMenu_nextBtn')]")))
        if "disabled" in nextPageButton.get_attribute("class"):
            logging.info("已經到達最後一頁")
            last_page_reached = True
            break
        
        driver.execute_script("arguments[0].scrollIntoView();", nextPageButton)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", nextPageButton)
        time.sleep(5)
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="algolia-search-result-container"]/div/div/span[1]/div/a')))
    except Exception as e:
        logging.error(f"導航到下一頁時出錯: {e}")
        last_page_reached = True
        break

driver.quit()
executor.shutdown()

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
file_name = f'product_{timestamp}.csv'

data.to_csv(file_name, index=False)
logging.info(f"數據已保存到 {file_name}")



In [None]:
display(data)

5/6 5pm version by Tak

In [None]:
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
import logging

# 設置日誌
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def scrape_product_data(product_url):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(options=options)
    driver.get(product_url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    data = {}

    try:
        data['Title'] = driver.find_element(By.XPATH, //li[@class = 'last']).text
    except Exception as e:
        logging.error(f"Error getting Title: {e}")
        data['Title'] = "N/A"
    try:
        data['Price'] = driver.find_element(By.CLASS_NAME, 'price').text
    except Exception as e:
        logging.error(f"Error getting Price: {e}")
        data['Price'] = "N/A"
    try:
        data['Price_Discount'] = driver.find_element(By.CLASS_NAME, 'discount').text
    except Exception as e:
        logging.error(f"Error getting Price_Discount: {e}")
        data['Price_Discount'] = "N/A"
    try:
        data['Rate'] = driver.find_element(By.CLASS_NAME, 'averageRating').text
    except Exception as e:
        logging.error(f"Error getting Rate: {e}")
        data['Rate'] = "N/A"
    try:
        origin_elements = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')
        origin = "N/A"
        for element in origin_elements:
            if "產地" in element.text:
                origin = element.text.split("產地 ")[1]
                break
        data['Origin'] = origin
    except Exception as e:
        logging.error(f"Error getting Origin: {e}")
        data['Origin'] = "N/A"
    try:
        short_desc_top_note = "N/A"
        short_desc_element = driver.find_element(By.CLASS_NAME, 'short-desc').text
        # for line in short_desc_element.split('\n'):
        #     if '前調：' in line:
        #         short_desc_top_note = line.split('：')[1].split('、')
        #         break
        # data['Short_Desc_Top_Note'] = short_desc_top_note
    except Exception as e:
        logging.error(f"Error getting Short_Desc_Top_Note: {e}")
        data['Short_Desc_Top_Note'] = "N/A"

    driver.quit()
    return data

# 初始化 WebDriver 選項以無頭模式運行
options = webdriver.ChromeOptions()
options.add_argument('--headless')

# 初始化 WebDriver
driver = webdriver.Chrome(options=options)
driver.get("https://www.hktvmall.com/hktv/zh/search_a?keyword=%E9%A6%99%E6%B0%B4&sort=hktvProductSellingPriceAscV2&page=0")

# 初始化 WebDriverWait
wait = WebDriverWait(driver, 20)

# 獲取總頁數
Total_Page_element = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="search-result-wrapper"]/div/div[3]/div[3]/div/span'))
).text
page_TOP_num = int(re.search(r'\d+', Total_Page_element).group())

# 創建一個空的 DataFrame
columns = ["Title", "Price", "Price_Discount", "Rate", "Short_Desc_Top_Note", "Origin"]
data = pd.DataFrame(columns=columns)

page_number = 1
product_urls = []
last_page_reached = False

# 在循環外初始化 ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=20)

while page_number <= page_TOP_num and not last_page_reached:
    logging.info(f"正在處理第 {page_number} 頁")
    product_index = 1

    while True:
        try:
            xpath = f'//*[@id="algolia-search-result-container"]/div/div/span[{product_index}]/div/a'
            product_link = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
            product_urls.append(product_link.get_attribute('href'))
            product_index += 1
        except:
            break

    # 從每個商品 URL 抓取數據
    future_to_url = {executor.submit(scrape_product_data, url): url for url in product_urls}
    for future in as_completed(future_to_url):
        try:
            product_data = future.result()
            new_row = pd.DataFrame([product_data])
            data = pd.concat([data, new_row], ignore_index=True)
        except Exception as e:
            logging.error(f"抓取 {future_to_url[future]} 時出錯: {e}")

    # 重置 product_urls 以便處理下一頁
    product_urls = []

    page_number += 1
    try:
        nextPageButton = wait.until(EC.presence_of_element_located((By.XPATH, "//button[contains(@id, 'paginationMenu_nextBtn')]")))
        if "disabled" in nextPageButton.get_attribute("class"):
            logging.info("已經到達最後一頁")
            last_page_reached = True
            break
        
        driver.execute_script("arguments[0].scrollIntoView();", nextPageButton)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", nextPageButton)
        time.sleep(5)
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="algolia-search-result-container"]/div/div/span[1]/div/a')))
    except Exception as e:
        logging.error(f"導航到下一頁時出錯: {e}")
        last_page_reached = True
        break

driver.quit()
executor.shutdown()

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
file_name = f'product_{timestamp}.csv'

data.to_csv(file_name, index=False)
logging.info(f"數據已保存到 {file_name}")

Bill version

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re


# 初始化 WebDriver
driver = webdriver.Chrome()

# 瀏覽到目標頁面
driver.get("https://www.hktvmall.com/hktv/zh/search_a?keyword=%E9%A6%99%E6%B0%B4&page=0")

# 初始化 WebDriverWait
wait = WebDriverWait(driver, 5)

# 定義一個變量來跟踪產品索引
index = 1

# how many total page count , a count to help stop loop in the last page
Total_Page_element = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="search-result-wrapper"]/div/div[3]/div[3]/div/span'))
).text
page_TOP_num = int(re.search(r'\d+', Total_Page_element).group())

print (page_TOP_num )

# 定義一個變量來跟踪當前頁數
web_page_count = 1

# 創建一個空的 DataFrame
columns = ["Title", "Price", "Rate", "Short_Desc"]
data = pd.DataFrame(columns=columns)

while True:
    try:
        print(f"正在處理第 {web_page_count} 頁")
        # roll to the bottom to load full page
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # 循環處理當前頁面的所有產品
        while True:
            try:
                # 動態生成 XPath 表達式
                open_page_btn = driver.find_element(By.XPATH, f'//*[@id="algolia-search-result-container"]/div/div/span[{index}]')
                open_page_btn.click()
                driver.switch_to.window(driver.window_handles[-1])
                
                time.sleep(2)
                #close add
                try:
                    click_add = driver.find_element(By.XPATH,'/html/body/div[2]/div[6]/div/i')
                    click_add.click()
                except:
                    pass
                # 在產品頁面上提取數據
                try:
                    title = driver.find_element(By.XPATH, '//*[@id="breadcrumb"]/div[2]/ul/li[2]/h1').text
                except:
                    title = "N/A"
                try:
                    price_text = driver.find_element(By.CLASS_NAME, 'price').text
                    price = price_text.split('\n')[0].strip('$')
                except:
                    price = "N/A"
                try:
                    rate = driver.find_element(By.CLASS_NAME, 'averageRating').text
                except:
                    rate = "N/A"
                try:
                    short_desc = driver.find_element(By.XPATH, "//span[contains(@class = 'short-desc')]").text
                except:
                    short_desc = "N/A"
                try:
                    origin = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')[0].text
                    if '包裝' in origin:
                        origin = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')[1].text
                    origin = origin.split(' ')[1]   
                except:
                    origin = "N/A"

                # 將數據添加到 DataFrame
                new_row = pd.DataFrame({"Title": [title], "Price": [price], "Rate": [rate], "Short_Desc": [short_desc], "Origin": [origin]})
                data = pd.concat([data, new_row], ignore_index=True)

                # 關閉新窗口並切換回主窗口
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
                
                # 更新索引以定位下一個產品
                index += 1
                
            except Exception as e:
                # 如果出現異常，說明沒有更多的產品鏈接
                print(f"第 {web_page_count} 頁已經處理完所有產品或出現錯誤：", e)
                break

        # if no more page to click , BREAK
        web_page_count = web_page_count + 1
        print (f'{web_page_count}____________')
        print (page_TOP_num)
        # if get up to the max number of page , BREAK
        if web_page_count > page_TOP_num:
            break
        # 重置產品索引
        index = 1

        # 嘗試找到下一頁按鈕並檢查是否可點擊
        try:
            nextPageButton = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(@id, 'paginationMenu_nextBtn')]")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            nextPageButton.click()
            time.sleep(3)
            print ("Next _page _GO! ")
            
        except:
            print("已經到達最後一頁")
            break

    except Exception as e:
        print(f"已經處理完所有頁面或出現錯誤：", e)
        break

# 完成後關閉瀏覽器
driver.quit()

# 輸出 DataFrame
print(data)

display(data) 

data.to_csv(file_name, index=False)


In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re

# 初始化 WebDriver
driver = webdriver.Chrome()

# 瀏覽到目標頁面
driver.get("https://www.hktvmall.com/hktv/zh/search_a?keyword=%E9%A6%99%E6%B0%B4&page=0")

# 初始化 WebDriverWait
wait = WebDriverWait(driver, 5)

# 定義一個變量來跟踪產品索引
index = 1

# how many total page count , a count to help stop loop in the last page
Total_Page_element = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="search-result-wrapper"]/div/div[3]/div[3]/div/span'))
).text
page_TOP_num = int(re.search(r'\d+', Total_Page_element).group())

print(page_TOP_num)

# 定義一個變量來跟踪當前頁數
web_page_count = 1

# 創建一個空的 DataFrame
columns = ["Title", "Price", "Rate", "Short_Desc"]
data = pd.DataFrame(columns=columns)

while True:
    try:
        print(f"正在處理第 {web_page_count} 頁")
        # 滾動到頁面底部以加載完整頁面
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # 循環處理當前頁面的所有產品
        while True:
            try:
                # 動態生成 XPath 表達式
                open_page_btn = driver.find_element(By.XPATH, f'//*[@id="algolia-search-result-container"]/div/div/span[{index}]')
                open_page_btn.click()
                driver.switch_to.window(driver.window_handles[-1])
                
                time.sleep(2)
                # 關閉彈出廣告
                try:
                    click_add = driver.find_element(By.XPATH, '/html/body/div[2]/div[6]/div/i')
                    click_add.click()
                except:
                    pass
                # 在產品頁面上提取數據
                try:
                    title = driver.find_element(By.XPATH, '//*[@id="breadcrumb"]/div[2]/ul/li[2]/h1').text
                except:
                    title = "N/A"
                try:
                    price_text = driver.find_element(By.CLASS_NAME, 'price').text
                    price = price_text.split('\n')[0].strip('$')
                except:
                    price = "N/A"
                try:
                    rate = driver.find_element(By.CLASS_NAME, 'averageRating').text
                except:
                    rate = "N/A"
                try:
                    short_desc_elements = driver.find_elements(By.XPATH, "//span[contains(@class, 'short-desc')]/p")
                    short_desc = "\n".join([element.text for element in short_desc_elements])
                except:
                    short_desc = "N/A"
                try:
                    origin = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')[0].text
                    if '包裝' in origin:
                        origin = driver.find_elements(By.CLASS_NAME, 'productPackingSpec')[1].text
                    origin = origin.split(' ')[1]
                except:
                    origin = "N/A"

                # 將數據添加到 DataFrame
                new_row = pd.DataFrame({"Title": [title], "Price": [price], "Rate": [rate], "Short_Desc": [short_desc], "Origin": [origin]})
                data = pd.concat([data, new_row], ignore_index=True)

                # 關閉新窗口並切換回主窗口
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
                
                # 更新索引以定位下一個產品
                index += 1
                
            except Exception as e:
                # 如果出現異常，說明沒有更多的產品鏈接
                print(f"第 {web_page_count} 頁已經處理完所有產品或出現錯誤：", e)
                break

        # 如果沒有更多頁面，則退出循環
        web_page_count += 1
        if web_page_count > page_TOP_num:
            break
        # 重置產品索引
        index = 1

        # 嘗試找到下一頁按鈕並檢查是否可點擊
        try:
            nextPageButton = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(@id, 'paginationMenu_nextBtn')]")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            nextPageButton.click()
            time.sleep(3)
            print("Next page GO!")
            
        except:
            print("已經到達最後一頁")
            break

    except Exception as e:
        print(f"已經處理完所有頁面或出現錯誤：", e)
        break

# 完成後關閉瀏覽器
driver.quit()

# 輸出 DataFrame
print(data)

data.to_csv("output.csv", index=False)

17
正在處理第 1 頁
第 1 頁已經處理完所有產品或出現錯誤： Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=125.0.6422.142)
Stacktrace:
0   chromedriver                        0x0000000102f164c8 chromedriver + 4302024
1   chromedriver                        0x0000000102f0ee10 chromedriver + 4271632
2   chromedriver                        0x0000000102b4019c chromedriver + 278940
3   chromedriver                        0x0000000102b1b474 chromedriver + 128116
4   chromedriver                        0x0000000102ba7394 chromedriver + 701332
5   chromedriver                        0x0000000102bacfd4 chromedriver + 724948
6   chromedriver                        0x0000000102b77004 chromedriver + 503812
7   chromedriver                        0x0000000102b779ec chromedriver + 506348
8   chromedriver                        0x0000000102ede510 chromedriver + 4072720
9   chromedriver                        0x0000000102ee2fbc chromedriver + 4091836
10  chr

In [None]:
#testing 06/06

17
正在處理第 1 頁
第 1 頁已經處理完所有產品或出現錯誤： Message: invalid session id
Stacktrace:
0   chromedriver                        0x0000000100df24c8 chromedriver + 4302024
1   chromedriver                        0x0000000100deae10 chromedriver + 4271632
2   chromedriver                        0x0000000100a1c000 chromedriver + 278528
3   chromedriver                        0x0000000100a52eb8 chromedriver + 503480
4   chromedriver                        0x0000000100a539ec chromedriver + 506348
5   chromedriver                        0x0000000100dba510 chromedriver + 4072720
6   chromedriver                        0x0000000100dbefbc chromedriver + 4091836
7   chromedriver                        0x0000000100da1754 chromedriver + 3970900
8   chromedriver                        0x0000000100dbf8a4 chromedriver + 4094116
9   chromedriver                        0x0000000100d946d4 chromedriver + 3917524
10  chromedriver                        0x0000000100ddcb08 chromedriver + 4213512
11  chromedriver           