# Python3.9.12

## 使用套件

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
from urllib.parse import quote
import re
import requests
import sys
import os
import time
import json
import html2text
import pandas as pd
import undetected_chromedriver as uc
from datetime import datetime
import json
from webdriver_manager.chrome import ChromeDriverManager

## 店家詳細資訊+評論

In [2]:
def initialize_chrome(_from="facebook",retry=0):
    global driver 
    try:
        print("Initializing chromedriver.")
        options = Options()
        options.add_argument("--start-maximized")
        options.add_argument("--incognito")
        options.add_argument("--disable-gpu")
        options.add_argument("--headless")
        options.add_argument('--log-level=3')
        driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
        time.sleep(2)
        return True
    except Exception as e:
        print(e)
        pass

def parse_description(description_tag):
    h = html2text.HTML2Text()
    h.ignore_links = True
    description_text = h.handle(str(description_tag)) 
    return description_text

def place_info():
    initialize_chrome()
    num = 1807
    for i in url_list:
        driver.get(i)
        response = driver.page_source
        soup = BeautifulSoup(response, 'html.parser')

        # 地點資訊區塊
        rating_and_review_div = soup.find('div',{'class':'k7jAl lJ3Kh miFGmb'})
        
        time.sleep(0.1)
        # 店名
        try:
            place_name = rating_and_review_div.find(class_="fontHeadlineLarge").text.strip()
        except:
            num+=1
            continue
        # 評論數
        total_reviews = rating_and_review_div.find('span',{'aria-label':re.compile("^\d.+ 則評論")})
        if total_reviews:
            total_reviews = total_reviews.get('aria-label','').strip()
            total_reviews = total_reviews.removesuffix('則評論').strip()
            total_reviews = total_reviews.replace(',', '')
        if not total_reviews:
            total_reviews = ""

        # 星級
        total_rating = rating_and_review_div.find('span',{'aria-label':re.compile("^\s+?\d+\.\d+ 星級")})
        if total_rating:
            total_rating = total_rating.get('aria-label','').strip() 
            total_rating = total_rating.removesuffix('星級').strip()
        if not total_rating:
            total_rating = ""

        # 標籤
        place_category = rating_and_review_div.find('button',{'jsaction':re.compile("pane.rating.category")})
        if place_category:
            place_category = place_category.getText()
        if not place_category:
            place_category = ""

        # 行政區
        district = rating_and_review_div.find('button',{'data-tooltip':re.compile("複製 Plus Code")})
        if district:
            district = district.get('aria-label','').strip() 
            district = district.removesuffix('Plus Code: ').strip()
        if not district:
            district = ""

        # 內用
        eat_in = rating_and_review_div.find('div',{'aria-label':re.compile("提供內用")})
        if eat_in:
            eat_in = 1
        if not eat_in:
            eat_in = 0

        # 外帶
        to_go_1 = rating_and_review_div.find('div',{'aria-label':re.compile("提供外帶服務")})
        if to_go_1:
            to_go_1 = 1
        if not to_go_1:
            to_go_1 = 0

        # 路邊取餐
        to_go_2 = rating_and_review_div.find('div',{'aria-label':re.compile("提供路邊取餐服務")})
        if to_go_2:
            to_go_2 = 1
        if not to_go_2:
            to_go_2 = 0

        # 外送
        delivery = rating_and_review_div.find('div',{'aria-label':re.compile("提供外送服務")})
        if delivery:
            delivery = 1
        if not delivery:
            delivery = 0

        # 價位
        cost = rating_and_review_div.find('span',{'aria-label':re.compile("^價格")})
        if cost:
            cost = cost.getText()
        if not cost:
            cost = ""

        #  是否關閉
        close = rating_and_review_div.find('span',{'style':re.compile("color:#D93025")})
        if close:
            close = close.getText()
        if not close:
            close = ""    

        # 地址
        address = rating_and_review_div.find('button',{'aria-label':re.compile("^地址\:")})
        if address:
            address = address.get('aria-label','').strip() 
            address = address.removeprefix('地址:')      
        if not address:
            address = ""

        # 電話
        phone = rating_and_review_div.find('button',{'aria-label':re.compile("^電話號碼\:")})
        if phone:
            phone = phone.get('aria-label','').strip()    
            phone = phone.removeprefix('電話號碼:')
        if not phone:
            phone = ""

        # 營業時間
        opening_hours = rating_and_review_div.find('div',{'aria-label':re.compile("^星期")})
        if opening_hours:
            opening_hours = opening_hours.get('aria-label','')   
        if not opening_hours:
            opening_hours = ""

        # 網站
        website = rating_and_review_div.find('a',{'aria-label':re.compile("^網站\:")})
        if website:
            website = website.get('href')   
        if not website:
            website = ""

        # 獲取日期
        place_acquisition_date = datetime.now().strftime("%Y-%m-%d")
        
        # append place_info to dict
        info_dict = {}
        place_data = []
        info_dict['google_url'] = i
        info_dict['place_name'] = place_name
        if close == "永久停業" or close == "暫時關閉":
            info_dict["total_rating"] = None
        else:
            info_dict["total_rating"] = total_rating
        info_dict["place_category"] = place_category                                  
        info_dict['total_reviews'] =  total_reviews             
        info_dict['cost'] = cost
        info_dict['address'] =  address
        info_dict['district'] = district
        info_dict['eat_in'] = eat_in
        info_dict['to_go_1'] = to_go_1
        info_dict['to_go_2'] = to_go_2
        info_dict['delivery'] = delivery
        info_dict['opening_hours'] = opening_hours
        info_dict["website"] = website
        info_dict["phone"] = phone
        info_dict["close"] = close
        info_dict["place_acquisition_date"] = place_acquisition_date 
        place_data.append(info_dict)
        print(str(num) + ' 完成店名(info): ' + place_name)

        # save place_info to csv                                      
        place_info_header = list(place_data[0].keys())
        place_info_df = pd.DataFrame.from_records(place_data,columns=place_info_header)
        place_info_df.to_csv(f'./place_info.csv', mode='a', header=False)
        if close == "永久停業" or close == "暫時關閉":
            num += 1
            continue
        if total_reviews == "":
            num += 1
            continue
        num += 1
    # 關閉瀏覽器
    driver.quit()

In [3]:
if __name__ == '__main__':
    print('請確定該py檔與url的cvs檔案放在相同路徑')
    file = input('請輸入url檔案名稱(含完整副檔名)，例如: url.csv\n')
    df = pd.read_csv(file, header=None)
    url_list = df[0][1806:2201]
    time1 = time.time()
    place_info()
    print(f'執行總花費時間: {time.time() - time1}')
    input('任務完成，若要關閉該視窗請輸入ENTER.....')
    os._exit()

請確定該py檔與url的cvs檔案放在相同路徑
請輸入url檔案名稱(含完整副檔名)，例如: url.csv
清整後的酒吧和中山中正_4Piko.csv
Initializing chromedriver.


  driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)


1807 完成店名(info): 小鵲sing紅豆餅 - 東南起家店
1808 完成店名(info): 透光棉花
1809 完成店名(info): 日京川主題攝影棚
1810 完成店名(info): 新世紀廣東粥-東南店
1811 完成店名(info): 玉蘭豆沙鮮乳
1812 完成店名(info): 阿桂的家-手作吐司專賣店
1813 完成店名(info): 米米香 日式飯糰
1814 完成店名(info): 新竹東南水煎包
1815 完成店名(info): 台東南豐鐵花棧 Inn by the Village
1816 完成店名(info): 粥大福 新竹東南店
1817 完成店名(info): 老王豆瓣麵
1818 完成店名(info): 台南虱目魚/鹽水意麵
1819 完成店名(info): D#minor男仕沙龍
1820 完成店名(info): 葉媽媽糯米飯魷魚羹（新竹店）
1821 完成店名(info): 粥坊Salty congee。創意粥品
1822 完成店名(info): 芽米健康鍋物
1823 完成店名(info): 獎水堂潤餅 冬瓜茶
1824 完成店名(info): 東南快餐
1825 完成店名(info): 鈞渝排骨
1826 完成店名(info): 食品路 老奶奶飯糰
1827 完成店名(info): 麵堂拉麵專賣店
1828 完成店名(info): 麻古茶坊MACU新竹食品店
1829 完成店名(info): 東南蜜餞舖
1830 完成店名(info): 藍玫瑰音樂酒吧
1831 完成店名(info): 台中 草悟廣場
1832 完成店名(info): MyTaiwanTour 飛亞旅行社
1833 完成店名(info): 淡水將捷金鬱金香酒店
1834 完成店名(info): TAP: The Ale Project
1835 完成店名(info): 甬城鳳潮演藝酒吧
1836 完成店名(info): 城市商旅 高雄真愛館
1837 完成店名(info): Jiejiao Western-style Food Pub
1838 完成店名(info): 精釀啤酒專賣店
1839 完成店名(info): 菜宅裡的小酒館 The Garden Tapas & Bar 🌿 取消麵飯主餐。預約制，請參見FB說明 | Reservation O

2099 完成店名(info): 七彩雲國際有限公司
2100 完成店名(info): 千千杯甜品（黑糖刨冰｜雪花冰｜甜湯｜傳統豆花）
2101 完成店名(info): 好樂迪板前店
2102 完成店名(info): Xiangdun Gewu Cheng
2103 完成店名(info): 墩樺室內裝修設計
2104 完成店名(info): 自由國度慢搖吧
2105 完成店名(info): 果凍KTV
2106 完成店名(info): 激旨燒き鳥 逢甲二店
2107 完成店名(info): 珍棧神奇小吃
2108 完成店名(info): 赫茲室內設計工作室
2109 完成店名(info): 極地酒吧
2110 完成店名(info): 豪季水餃專賣店
2111 完成店名(info): 蓮霧滷肉飯
2112 完成店名(info): 牛仔酒吧
2113 完成店名(info): STARBUCKS 星巴克 摩登典藏吧台 (大英門市)
2114 完成店名(info): 統聯客運水湳站
2115 完成店名(info): 佳民田中央
2116 完成店名(info): Chill Land 精釀餐酒吧
2117 完成店名(info): 夜底拾花
2118 完成店名(info): 巢吧
2119 完成店名(info): 漁夫俱樂部
2120 完成店名(info): 七星會館
2121 完成店名(info): Miaoko hostel
2122 完成店名(info): 花蓮民宿七星潭夏屋海邊包棟民宿（花蓮包棟民宿）
2123 完成店名(info): 樂園酒吧
2124 完成店名(info): 海邊的房間（毛小海洋俱樂部）
2125 完成店名(info): 七星日出民宿
2126 完成店名(info): 七星潭天使民宿
2127 完成店名(info): 漫漫日出海景民宿 Before Sunrise Seaview B&B
2128 完成店名(info): 九虹新月景觀民宿
2129 完成店名(info): 小森見晴民宿 Hualien ShiningForest Hostel
2130 完成店名(info): 呼吸民宿
2131 完成店名(info): 坲卡夏莊園民宿 Focaccia Manor
2132 完成店名(info): 汎水淩山花園民宿
2133 完成店名(info): 

TypeError: _exit() missing required argument 'status' (pos 1)