In [5]:
!pip install selenium
!pip install --upgrade webdriver-manager

Collecting webdriver-manager
  Downloading webdriver_manager-4.0.1-py2.py3-none-any.whl (27 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv, webdriver-manager
Successfully installed python-dotenv-1.0.1 webdriver-manager-4.0.1


In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

def search_nutrient(input_value):
    # ChromeDriver 설정
    driver = webdriver.Chrome(options=chrome_options)

    try:
        # 웹 페이지 열기
        driver.get("https://various.foodsafetykorea.go.kr/nutrient/")

        # 사용자가 입력할 검색어
        search_query = input_value

        # 검색 입력 상자 찾기
        search_box = driver.find_element(By.ID, "searchText")

        # 검색어 입력
        search_box.send_keys(search_query)

        # 검색 버튼 클릭
        search_button = driver.find_element(By.CLASS_NAME, "btn")
        search_button.click()

        # 검색 결과 로딩을 기다리기 #여기가 좀 걸림
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="simpleDataBody"]/tr[1]/td[3]/a/em')))

        # 첫 번째 검색 결과의 요리명 추출
        dish_name_element = driver.find_element(By.XPATH, '//*[@id="simpleDataBody"]/tr[1]/td[3]/a/em')
        dish_name = dish_name_element.text

        # 첫 번째 검색 결과 클릭
        first_result = driver.find_element(By.CSS_SELECTOR, '#simpleDataBody > tr:nth-child(1) > td:nth-child(3) > a:nth-child(1)')
        first_result.click()

        # 상세 페이지 로딩을 기다리기
        WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, '//*[@id="content"]/div[5]/div[1]/div/div[1]')))

        # 영양성분 크롤링 및 출력
        nutrient_data = []
        for i in range(1, 6):  # 5개 반복
            nutrient_xpath = f'//*[@id="content"]/div[5]/div[1]/div/div[{i}]'

            try:
                nutrient_element = driver.find_element(By.XPATH, nutrient_xpath)
                nutrient_text = nutrient_element.text
                parts = nutrient_text.split()

                nutrient_name = parts[0]
                g_value = parts[1]
                percentage = parts[-1]

                # g_value가 두 번 나타나는 경우 첫 번째 값만 사용
                if len(parts) > 2 and parts[1] == parts[2]:
                    g_value = parts[1]

                # 단위를 제거하고 숫자와 소수점만 추출
                g_value_numeric = ''.join(filter(lambda x: x.isdigit() or x == '.', g_value))
                percentage_numeric = ''.join(filter(lambda x: x.isdigit() or x == '.', percentage))

                nutrient_info = {
                    'name': nutrient_name,
                    'value_g': g_value_numeric,
                    'percentage': percentage_numeric
                }

                nutrient_data.append(nutrient_info)
            except Exception as e:
                print(f"영양성분 {i} 추출 오류: {e}")

        # 요리명과 영양성분 데이터 함께 반환
        return {'dish_name': dish_name, 'nutrients': nutrient_data}

    finally:
        # 브라우저 닫기
        driver.quit()

if __name__ == "__main__":
    input_value = input("검색할 식품명을 입력하세요: ")
    result = search_nutrient(input_value)
    print(f"Dish Name: {result['dish_name']}")
    for r in result['nutrients']:
        print(r)


검색할 식품명을 입력하세요: 김치
Dish Name: 갓김치
{'name': '에너지', 'value_g': '37.00', 'percentage': '1.85'}
{'name': '나트륨', 'value_g': '720.00', 'percentage': '36.00'}
{'name': '탄수화물', 'value_g': '7.07', 'percentage': '2.18'}
{'name': '총당류', 'value_g': '2.46', 'percentage': '2.46'}
{'name': '지질', 'value_g': '0.40', 'percentage': '0.74'}
