In [None]:
# 基礎套件
import os
import re
import time
from datetime import datetime
import numpy as np

# 資料處理
import pandas as pd

# Selenium 相關
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options as ChromeOptions

# ChromeDriver 管理
from webdriver_manager.chrome import ChromeDriverManager
import undetected_chromedriver as uc

# 其他自動化工具
import pyautogui

# 網頁解析
from bs4 import BeautifulSoup

In [None]:
df = pd.read_excel('/Users/daniel0522/Downloads/非登食品業者.xlsx')
# 在程式開始前先確實移除重複
df = df.drop_duplicates(subset=['工廠地址'], keep='first')
df.info()
df.head()

In [None]:
def full_to_half(text):
    """將全形字元轉換為半形"""
    half = ""
    for char in text:
        code = ord(char)
        if code == 0x3000:  # 全形空格
            half += " "
        elif 0xFF01 <= code <= 0xFF5E:  # 全形字元範圍
            half += chr(code - 0xFEE0)
        else:
            half += char
    return half

# 在處理地址前先轉換
df['工廠地址'] = df['工廠地址'].apply(full_to_half)

In [None]:
資料 = {'looked':[],'latitude_list':[],'longitude_list':[]}

In [None]:
def process_address(driver, address, a, check):  
    if address not in 資料['looked']:
        資料['looked'].append(address) 
        a += 1
        print(f"\n處理地址: {address}")
        
        try:
            wait = WebDriverWait(driver, 3)  
            search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="maps_sb"]')))
            print("✓ 搜尋框準備就緒")
            
            driver.execute_script("arguments[0].value = '';", search_box)
            search_box.send_keys(f'{address}')
            print("✓ 輸入地址完成")
            
            element = driver.find_element(By.XPATH, '//*[@id="maps_sb_container"]/div[1]/div[2]/a')
            element.click()
            print("✓ 執行搜尋")
            
            time.sleep(1.5)
            page_source = driver.execute_script("return document.documentElement.outerHTML")
            soup = BeautifulSoup(page_source, 'lxml')
            
            time.sleep(1)
            info = soup.find_all('div', class_='panelWrapper multi leftPanel noScroll')
            
            for infos in info:
                data = infos.find_all('div', {'class': 'geochainModuleLatLong'})
                error = infos.find_all('div', {'class': 'overlay-container error'})

            for div_tag in data:
                if div_tag not in check:
                    check.append(div_tag)
                    lat_long_info = div_tag.text
                    latitude, longitude = lat_long_info.split('、')
                    
                    if latitude not in 資料['latitude_list'] and longitude not in 資料['longitude_list']:
                        資料['latitude_list'].append(latitude.strip())
                        資料['longitude_list'].append(longitude.strip())
                        df.loc[df['工廠地址'] == address, '經度'] = longitude.strip()
                        df.loc[df['工廠地址'] == address, '緯度'] = latitude.strip()
                        print(f"✓ 成功獲取座標: {latitude.strip()}, {longitude.strip()}")
            
            if len(error) != 0 and check.count('查無地址')+1 == len(error):
                check.append('查無地址')
                資料['latitude_list'].append('查無地址')
                資料['longitude_list'].append('查無地址')
                df.loc[df['工廠地址'] == address, '經度'] = '查無地址'
                df.loc[df['工廠地址'] == address, '緯度'] = '查無地址'
                print("✗ 查無此地址")
                
            elif len(資料['looked']) != len(check):
                check.append('無明確地址')
                資料['latitude_list'].append('無明確地址')
                資料['longitude_list'].append('無明確地址')
                df.loc[df['工廠地址'] == address, '經度'] = '無明確地址'
                df.loc[df['工廠地址'] == address, '緯度'] = '無明確地址'
                print("✗ 地址不明確")
                raise Exception("地址不明確，需要重新啟動瀏覽器")  # 
                強制重新啟動
                
            print("-" * 50)
            
        except Exception as e:
            print(f"✗ 處理過程發生錯誤: {str(e)}")
            raise
            
    return a

def main():
   
    a = len(資料['looked'])
    
    while True:
        check = [] 
        driver = None
        try:
            # 瀏覽器設定
            service = Service(ChromeDriverManager().install())
            options = uc.ChromeOptions()
            
            options.add_argument('--headless')
            options.add_argument('--disable-blink-features=AutomationControlled')
            options.add_argument('--disable-infobars')
            options.add_argument('--disable-dev-shm-usage')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-gpu')
            options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
            options.add_argument('--disable-extensions')
            options.add_argument('--disable-plugins-discovery')
            options.add_argument('--disable-bundled-ppapi-flash')
            
            driver = uc.Chrome(options=options)
            driver.get('https://www.bing.com/maps?cp=25.025262%7E121.509476&lvl=11.0')
            time.sleep(5)
            
            remaining_addresses = df['工廠地址'][len(資料['looked']):].tolist()
            total = len(remaining_addresses)
            
            if not remaining_addresses:
                print("\n✓ 所有地址處理完畢！")
                break
                
            print(f"\n開始處理剩餘 {total} 筆地址")
            
            for idx, address in enumerate(remaining_addresses, 1):
                try:
                    process_address(driver, address, a, check)  
                    print(f"進度: {idx}/{total} ({idx/total*100:.1f}%)")
                    df.to_excel('工廠地址經緯度.xlsx', index=False)
                    
                except Exception as e:
                    print(f"✗ 此地址處理失敗: {str(e)}")
                    print("重新啟動瀏覽器並繼續...")
                    if driver:
                        driver.quit()
                    break
                
        except Exception as e:
            print(f"發生錯誤: {str(e)}")
            print("等待 10 秒後重試...")
            time.sleep(10)
            
        finally:
            if driver:
                driver.quit()

if __name__ == "__main__":
    main()