In [25]:
import os
import json
import time
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

def scrape_page(driver, url):
    driver.get(url)
    time.sleep(1)
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    restaurants = soup.find_all('h3', class_='card__menu-content--title pl-text pl-big js-match-height-title')
    restaurant_data = []
    
    for restaurant in restaurants:
        a_tag = restaurant.find('a')
        if a_tag is not None:
            name = a_tag.text.strip()
            restaurant_url = "https://guide.michelin.com" + a_tag['href']
            restaurant_data.append({'name': name, 'url': restaurant_url})

    return restaurant_data

def scrape_restaurant_details(driver, restaurant):
    driver.get(restaurant['url'])
    time.sleep(1)


    soup = BeautifulSoup(driver.page_source, 'html.parser')

    email = None
    phone = None

    
    print(f"Scraping details for {restaurant['name']} at {restaurant['url']}")
    
    
    contact_section = soup.select_one('body > main > div:nth-child(4) > div:nth-child(1) > div > div:nth-child(2) > section:nth-child(4) > div:nth-child(2) > div > div > div:nth-child(1) > div > div')
    if contact_section:
        
        phone_tag = contact_section.select_one('span')
        if phone_tag:
            phone = phone_tag.text.strip()
        else:
            print(f"No phone number found for {restaurant['name']}")

        
        email_tag = contact_section.find('a', href=lambda href: href and "mailto:" in href)
        if email_tag:
            email = email_tag['href'].replace("mailto:", "").strip()
        else:
            print(f"No email found for {restaurant['name']}")
    else:
        print(f"No contact section found for {restaurant['name']}")

    
    print(f"Details: Phone: {phone}, Email: {email}")

    
    restaurant['phone'] = phone
    restaurant['email'] = email

    return restaurant


def scrape_all_pages(start_url, total_pages):
    driver = init_driver()
    all_restaurants = []
    
    
    for page_number in range(1, total_pages + 1):
        url = f"{start_url}?page={page_number}"
        print(f"Scraping page: {url}")
        restaurant_data = scrape_page(driver, url)
        all_restaurants.extend(restaurant_data)
        time.sleep(1)  # Reduced wait time for polite scraping

    
    for restaurant in all_restaurants:
        print(f"Scraping details for: {restaurant['name']}")
        restaurant = scrape_restaurant_details(driver, restaurant)
        time.sleep(1)  # Reduced wait time for polite scraping

    driver.quit()
    return all_restaurants


start_url = "https://guide.michelin.com/dk/en/selection/denmark/restaurants"


total_pages = 7

all_restaurants = scrape_all_pages(start_url, total_pages)

data = {'restaurants': all_restaurants}
writable_directory = os.path.expanduser("~/") 
json_file_path = os.path.join(writable_directory, 'michelin_restaurants_with_details.json')

with open(json_file_path, 'w') as json_file:
    json.dump(data, json_file, indent=4)

print(f"All restaurant details have been saved to {json_file_path}")


Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=1
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=2
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=3
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=4
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=5
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=6
Scraping page: https://guide.michelin.com/dk/en/selection/denmark/restaurants?page=7
Scraping details for: Trio
Scraping details for Trio at https://guide.michelin.com/dk/en/capital-region/copenhagen/restaurant/trio
No email found for Trio
Details: Phone: +45 44 22 74 74, Email: None
Scraping details for: Restaurant Aure
Scraping details for Restaurant Aure at https://guide.michelin.com/dk/en/capital-region/copenhagen/restaurant/restaurant-aure
No email found for Restaurant Aure
Detail