In [4]:
import time
import random
import logging
import re
import os

from typing import Any, Dict, List

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [8]:
urls = [
    "https://place.map.kakao.com/1847980851",
    "https://place.map.kakao.com/105514778",
    "https://place.map.kakao.com/793045923",
    "https://place.map.kakao.com/1616011566",
    "https://place.map.kakao.com/937621964",
    "https://place.map.kakao.com/1580396435",
    "https://place.map.kakao.com/463960151",
    "https://place.map.kakao.com/645041557",
    "https://place.map.kakao.com/1544111495",
    "https://place.map.kakao.com/1154232228",
    "https://place.map.kakao.com/705614953",
    "https://place.map.kakao.com/1400000257",
    "https://place.map.kakao.com/1931556617",
    "https://place.map.kakao.com/318213599",
    "https://place.map.kakao.com/1916721899",
    "https://place.map.kakao.com/946237913",
    "https://place.map.kakao.com/971673754",
    "https://place.map.kakao.com/313842124",
    "https://place.map.kakao.com/27225242",
    "https://place.map.kakao.com/1515321702",
    "https://place.map.kakao.com/15949152",
    "https://place.map.kakao.com/25421177",
    "https://place.map.kakao.com/24063108",
    "https://place.map.kakao.com/1985611346",
    "https://place.map.kakao.com/1460055984",
    "https://place.map.kakao.com/1056158711",
    "https://place.map.kakao.com/1450677913",
    "https://place.map.kakao.com/7833477",
    "https://place.map.kakao.com/609147936",
    "https://place.map.kakao.com/60742442",
    "https://place.map.kakao.com/1491565179",
    "https://place.map.kakao.com/26934691",
    "https://place.map.kakao.com/20060976",
    "https://place.map.kakao.com/1699172104",
    "https://place.map.kakao.com/1551124654",
    "https://place.map.kakao.com/873142956",
    "https://place.map.kakao.com/473922590",
    "https://place.map.kakao.com/1620291920",
    "https://place.map.kakao.com/1891298246",
    "https://place.map.kakao.com/564758628",
    "https://place.map.kakao.com/357461316",
    "https://place.map.kakao.com/174870783",
    "https://place.map.kakao.com/219591400",
    "https://place.map.kakao.com/621309181",
    "https://place.map.kakao.com/1990898649",
    "https://place.map.kakao.com/163876439",
    "https://place.map.kakao.com/7904958",
    "https://place.map.kakao.com/1195027127",
    "https://place.map.kakao.com/574598176",
    "https://place.map.kakao.com/1485465889",
    "https://place.map.kakao.com/108150613"
]

In [9]:
 # 크롬 옵션 설정
chrome_options = Options()

chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--log-level=3")
chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/91.0.4472.124 Safari/537.36"
    )

    # Selenium WebDriver 초기화
driver = webdriver.Chrome(options=chrome_options)

time.sleep(1)

In [19]:
# 결과를 저장할 리스트
data = []

# 모든 URL 크롤링
for url in urls:
    logging.info(f"Processing: {url}")
    driver.get(url)
    time.sleep(3)  # 페이지 로딩 대기

    restaurant_name = "Not Found"
    menu_items = []

    try:
        # ✅ WebDriverWait을 사용하여 요소가 나타날 때까지 기다림
        restaurant_name_element = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.XPATH, "//div[@class='inner_place']//h2[@class='tit_location']"))
        )
        restaurant_name = restaurant_name_element.text.strip()
        logging.info(f"✅ 식당 이름 수집 완료: {restaurant_name}")
    except (NoSuchElementException, TimeoutException):
        logging.warning("⚠️ 식당 이름 가져오기 실패")



    try:
        # "더보기" 버튼 찾고 클릭
        more_button = driver.find_element(By.XPATH, "//ul[contains(@class, 'list_menu')]/following-sibling::a[contains(@class, 'link_more')]")
        driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", more_button)
        time.sleep(2)
        more_button.click()
        time.sleep(2)
        logging.info("'메뉴 더보기' 버튼 클릭 완료!")
    except NoSuchElementException:
        logging.warning("'메뉴 더보기' 버튼 없음, 스킵")

    try:
        # 메뉴 가져오기
        menu_elements = driver.find_elements(By.XPATH, "//li[contains(@class, 'menu_fst') or @data-page]")

        for item in menu_elements:
            menu_name = "Unknown"
            menu_price = None

            # 메뉴명 가져오기
            try:
                menu_name_element = item.find_element(By.XPATH, ".//span[contains(@class, 'loss_word')]")
                menu_name = menu_name_element.text.strip()
            except NoSuchElementException:
                logging.warning("메뉴명을 찾을 수 없음")

            # 가격 가져오기
            try:
                price_element = item.find_element(By.XPATH, ".//em[contains(@class, 'price_menu')]")
                price_text = price_element.text.strip().replace(',', '')  # 쉼표 제거 후 숫자로 변환
                if price_text.isdigit():
                    menu_price = int(price_text)
                else:
                    menu_price = None
            except NoSuchElementException:
                logging.warning(f"⚠️ '{menu_name}' - 가격 정보를 찾을 수 없음")
                menu_price = None  # 가격이 없을 경우 None 처리

            menu_items.append([menu_name, menu_price])

    except NoSuchElementException:
        logging.warning("메뉴 없음")

    # 식당과 메뉴 추가
    data.append([restaurant_name, menu_items])

# 웹 드라이버 종료
driver.quit()

# Pandas DataFrame 생성
df = pd.DataFrame(data, columns=["Restaurant", "Menu"])

logging.info("✅ 모든 데이터 수집 완료!")



In [20]:
df

Unnamed: 0,Restaurant,Menu
0,역전할머니맥주 서울홍대입구역점,"[[버터구이 오징어입, None], [먹태, None], [튀김 쥐포, None],..."
1,제이스피자,"[[치즈피자, 17500], [페퍼로니 피자, 18000], [초리조칠리 피자, 2..."
2,무무,"[[양지수육전골, 17900], [마라양지수육전골, 18900], [백후추탕수육, ..."
3,대표분식,[]
4,공간비틀즈,"[[몽골리안 비프, 17000], [오뎅탕, 15000], [라볶이, 15000],..."
5,추파 홍대점,"[[크림닭강정, 22000], [구운삼겹플레이트, 22000], [돼지고기김치찌개,..."
6,우리동네연탄고기,"[[뚱돼지 삼겹살, 15000], [두툼 뚱목살, 15000], [그 시절 연탄 생..."
7,목벌,"[[목벌 한우모둠세트, 87000], [목벌 한우 눈꽃갈비, 27000], [한우 ..."
8,구복만두 홍대합정본사직영점,"[[구복 전통만두, 7500], [구복 김치만두, 7500], [구복 통새우만두, ..."
9,온복비빔국수 본점,[]


In [22]:
df.to_csv("scraper_price_exception.csv",index=False, encoding="utf-8-sig")

In [23]:
# 📌 파일 경로 설정
restaurant_temp_path = "restaurant_temp.csv"
scraper_price_exception_path = "scraper_price_exception.csv"

# 📌 CSV 파일 불러오기
df_temp = pd.read_csv(restaurant_temp_path, encoding="utf-8-sig")
df_exception = pd.read_csv(scraper_price_exception_path, encoding="utf-8-sig")

# 📌 'Processed' 값이 'Yes'가 아닌 행 찾기
unprocessed_rows = df_temp[df_temp["Processed"] != "Yes"]

# 📌 'Processed'가 No인 경우 매칭하여 '메뉴' 업데이트
for index, row in unprocessed_rows.iterrows():
    business_name = row["사업장명"][:2]  # 첫 두 글자 추출

    # 📌 'scraper_price_exception.csv'에서 '사업장명' 첫 두 글자가 같은 행 찾기
    matching_row = df_exception[df_exception["Restaurant"].str[:2] == business_name]

    if not matching_row.empty:
        # 📌 기존 '메뉴' 컬럼 업데이트
        df_temp.at[index, "메뉴"] = matching_row.iloc[0]["Menu"]
        
        # 📌 'Processed' 값을 'Yes'로 변경
        df_temp.at[index, "Processed"] = "Yes"

In [24]:
# 📌 변경된 데이터 저장
output_path = "restaurant_final.csv"
df_temp.to_csv(output_path, index=False, encoding="utf-8-sig")
