# Python3.9.12

## 使用套件

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
from urllib.parse import quote
import re
import requests
import sys
import os
import time
import json
import html2text
import pandas as pd
import undetected_chromedriver as uc
from datetime import datetime
import json
from webdriver_manager.chrome import ChromeDriverManager

## 店家詳細資訊+評論

In [4]:
def initialize_chrome(_from="facebook",retry=0):
    global driver 
    try:
        print("Initializing chromedriver.")
        options = Options()
        options.add_argument("--start-maximized")
        options.add_argument("--incognito")
        options.add_argument("--disable-gpu")
        options.add_argument("--headless")
        options.add_argument('--log-level=3')
        driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
        time.sleep(2)
        return True
    except Exception as e:
        print(e)
        pass

def parse_description(description_tag):
    h = html2text.HTML2Text()
    h.ignore_links = True
    description_text = h.handle(str(description_tag)) 
    return description_text

def place_info():
    initialize_chrome()
    num = 900
    for i in url_list:
        driver.get(i)
        response = driver.page_source
        soup = BeautifulSoup(response, 'html.parser')

        # 地點資訊區塊
        rating_and_review_div = soup.find('div',{'class':'k7jAl lJ3Kh miFGmb'})
        
        time.sleep(0.1)
        # 店名
        try:
            place_name = rating_and_review_div.find(class_="fontHeadlineLarge").text.strip()
        except:
            num+=1
            continue
        # 評論數
        total_reviews = rating_and_review_div.find('span',{'aria-label':re.compile("^\d.+ 則評論")})
        if total_reviews:
            total_reviews = total_reviews.get('aria-label','').strip()
            total_reviews = total_reviews.removesuffix('則評論').strip()
            total_reviews = total_reviews.replace(',', '')
        if not total_reviews:
            total_reviews = ""

        # 星級
        total_rating = rating_and_review_div.find('span',{'aria-label':re.compile("^\s+?\d+\.\d+ 星級")})
        if total_rating:
            total_rating = total_rating.get('aria-label','').strip() 
            total_rating = total_rating.removesuffix('星級').strip()
        if not total_rating:
            total_rating = ""

        # 標籤
        place_category = rating_and_review_div.find('button',{'jsaction':re.compile("pane.rating.category")})
        if place_category:
            place_category = place_category.getText()
        if not place_category:
            place_category = ""

        # 行政區
        district = rating_and_review_div.find('button',{'data-tooltip':re.compile("複製 Plus Code")})
        if district:
            district = district.get('aria-label','').strip() 
            district = district.removesuffix('Plus Code: ').strip()
        if not district:
            district = ""

        # 內用
        eat_in = rating_and_review_div.find('div',{'aria-label':re.compile("提供內用")})
        if eat_in:
            eat_in = 1
        if not eat_in:
            eat_in = 0

        # 外帶
        to_go_1 = rating_and_review_div.find('div',{'aria-label':re.compile("提供外帶服務")})
        if to_go_1:
            to_go_1 = 1
        if not to_go_1:
            to_go_1 = 0

        # 路邊取餐
        to_go_2 = rating_and_review_div.find('div',{'aria-label':re.compile("提供路邊取餐服務")})
        if to_go_2:
            to_go_2 = 1
        if not to_go_2:
            to_go_2 = 0

        # 外送
        delivery = rating_and_review_div.find('div',{'aria-label':re.compile("提供外送服務")})
        if delivery:
            delivery = 1
        if not delivery:
            delivery = 0

        # 價位
        cost = rating_and_review_div.find('span',{'aria-label':re.compile("^價格")})
        if cost:
            cost = cost.getText()
        if not cost:
            cost = ""

        #  是否關閉
        close = rating_and_review_div.find('span',{'style':re.compile("color:#D93025")})
        if close:
            close = close.getText()
        if not close:
            close = ""    

        # 地址
        address = rating_and_review_div.find('button',{'aria-label':re.compile("^地址\:")})
        if address:
            address = address.get('aria-label','').strip() 
            address = address.removeprefix('地址:')      
        if not address:
            address = ""

        # 電話
        phone = rating_and_review_div.find('button',{'aria-label':re.compile("^電話號碼\:")})
        if phone:
            phone = phone.get('aria-label','').strip()    
            phone = phone.removeprefix('電話號碼:')
        if not phone:
            phone = ""

        # 營業時間
        opening_hours = rating_and_review_div.find('div',{'aria-label':re.compile("^星期")})
        if opening_hours:
            opening_hours = opening_hours.get('aria-label','')   
        if not opening_hours:
            opening_hours = ""

        # 網站
        website = rating_and_review_div.find('a',{'aria-label':re.compile("^網站\:")})
        if website:
            website = website.get('href')   
        if not website:
            website = ""

        # 獲取日期
        place_acquisition_date = datetime.now().strftime("%Y-%m-%d")
        
        # append place_info to dict
        info_dict = {}
        place_data = []
        info_dict['google_url'] = i
        info_dict['place_name'] = place_name
        if close == "永久停業" or close == "暫時關閉":
            info_dict["total_rating"] = None
        else:
            info_dict["total_rating"] = total_rating
        info_dict["place_category"] = place_category                                  
        info_dict['total_reviews'] =  total_reviews             
        info_dict['cost'] = cost
        info_dict['address'] =  address
        info_dict['district'] = district
        info_dict['eat_in'] = eat_in
        info_dict['to_go_1'] = to_go_1
        info_dict['to_go_2'] = to_go_2
        info_dict['delivery'] = delivery
        info_dict['opening_hours'] = opening_hours
        info_dict["website"] = website
        info_dict["phone"] = phone
        info_dict["close"] = close
        info_dict["place_acquisition_date"] = place_acquisition_date 
        place_data.append(info_dict)
        print(str(num) + ' 完成店名(info): ' + place_name)

        # save place_info to csv                                      
        place_info_header = list(place_data[0].keys())
        place_info_df = pd.DataFrame.from_records(place_data,columns=place_info_header)
        place_info_df.to_csv(f'./place_info.csv', mode='a', header=False)
        if close == "永久停業" or close == "暫時關閉":
            num += 1
            continue
        if total_reviews == "":
            num += 1
            continue
        num += 1
    # 關閉瀏覽器
    driver.quit()

In [5]:
if __name__ == '__main__':
    print('請確定該py檔與url的cvs檔案放在相同路徑')
    file = input('請輸入url檔案名稱(含完整副檔名)，例如: url.csv\n')
    df = pd.read_csv(file, header=None)
    url_list = df[0][899:1101]
    time1 = time.time()
    place_info()
    print(f'執行總花費時間: {time.time() - time1}')
    input('任務完成，若要關閉該視窗請輸入ENTER.....')
    os._exit()

請確定該py檔與url的cvs檔案放在相同路徑
請輸入url檔案名稱(含完整副檔名)，例如: url.csv
清整後的酒吧和中山中正_4Piko.csv
Initializing chromedriver.


  driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)


900 完成店名(info): 黑殿男仕理髮廳
901 完成店名(info): 展賦教育
902 完成店名(info): 全亞汽車美容 洗車 汽車包膜 犀牛皮 換色貼 測速器 Li防護罩
903 完成店名(info): 蘇菲的小酒館
904 完成店名(info): 屏東居高風味餐廳/屏東景觀餐廳/屏東景點/三地門美食/三地門餐廳/三地門景點
905 完成店名(info): 德明財經科技大學推廣教育中心
906 完成店名(info): 概亞形象整合有限公司
907 完成店名(info): 花壇山中居景觀餐廳
908 完成店名(info): 竹林雞肉-中和員山店
909 完成店名(info): 轉角秘密基地（菜單更新無火鍋）
910 完成店名(info): 八番 私人住宅 Residence 8
911 完成店名(info): 【永慶房屋】西湖捷運直營店
912 完成店名(info): 望景咖啡WJ-CAFE
913 完成店名(info): 澎湖 三哥酒釀雞排
914 完成店名(info): Dew-chef’s wine and gourmet 露餐廳
915 完成店名(info): 俩吆久景觀貨櫃
916 完成店名(info): 7-ELEVEn 環冠門市
917 完成店名(info): 屈臣氏 德明門市
918 完成店名(info): 牡丹灣Villa
919 完成店名(info): 悠活渡假村 YOHO Beach Resort
920 完成店名(info): 暫停。慢食
921 完成店名(info): 中信房屋 內湖AIT加盟店
922 完成店名(info): 永慶房屋內湖捷運店
923 完成店名(info): 屈臣氏Watsons (來店取貨)
924 完成店名(info): 上海味香小吃店
925 完成店名(info): 小莊火雞肉飯
926 完成店名(info): DARTS TIME 飛鏢咖啡吧
927 完成店名(info): Whatever Homestyle Cuisine & Bar 隨意餐酒館--西屯人氣餐酒館|聚餐推薦|精緻料理|西餐中吃料理|必去餐酒館|熱門餐酒
928 完成店名(info): 音樂王 ktv
929 完成店名(info): 陸樓 Bar
930 完成店名(info): 夜月（採FB預約）
931 完成店名(info): 

TypeError: _exit() missing required argument 'status' (pos 1)