In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import json
import requests
from PIL import Image
from io import BytesIO
from fpdf import FPDF



def get_hotel_data(hotel_name):
    # Base search URL
    url = f'https://www.dineout.co.in/surat-restaurants?search_str={hotel_name}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }

    # Fetch the search page
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    # Get the first hotel object
    first_object = soup.find_all('div', class_='restnt-card-wrap-new')
    if not first_object:
        print("No hotels found!")
        return None

    first_object = first_object[0]

    # Extract the "data-link" value
    data_link = first_object.find('div', class_='img cursor').get('data-link')
    if not data_link:
        print("No data-link found in the first object!")
        return None


    # Construct full hotel URL
    hotel_url = "https://www.dineout.co.in" + data_link

    # -------------------------------- Selenium for Dynamic Content --------------------------------
    # Initialize Selenium WebDriver
    service = Service(r'D:\Company\chromedriver.exe')  # Update the path if needed
    driver = webdriver.Chrome(service=service)

    try:
        # Open the hotel page
        driver.get(hotel_url)

        # Wait for the menu gallery to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "demo-gallery"))
        )

        # Extract page source after rendering
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extract the <ul> element containing the menu
        menu_gallery = soup.find('ul', class_='d-flex demo-gallery')
        menu_links = []
        if menu_gallery:
            for link in menu_gallery.find_all('a'):
                href = link.get('href')
                if href:
                    menu_links.append(href)

        # Extract "restnt-info" details
        details_section = soup.find('div', class_='restnt-details_info')
        restaurant_details = {}
        
    
        name = details_section.find('h1').text.strip()
        #location = details_section.find('div', class_='Address')
        cost_for_two = details_section.find('div', class_='restnt-cost').text.split('|')[0].strip()
         
        type_header = soup.find('h4', string="TYPE")  
        if type_header:
            type = type_header.find_next_sibling('p').text.strip()  
        
        type_header_2 = soup.find('h5', string="Call the restaurant")  
        if type_header_2:
            moblie_number = type_header_2.find_next_sibling('p').text.strip() 

        type_header_3 = soup.find('h3', string="Address:") 
        location = type_header_3.find_next_sibling('p').text.strip()
                
        # Look for the div containing the rating dynamically
        rating_div = soup.find('div', class_=lambda c: c and 'rest-rating' in c)
        
        rating = rating_div.text.strip()
                  

        restaurant_details = {
            "name": name,
            "location": location,
            "cost_for_two": cost_for_two,  
            "rating": rating,
            "type": type,
            "moblie_number": moblie_number 
        }

        restaurant_details["type"] = "Dineout Pay" in restaurant_details["type"]

        cost_for_two = cost_for_two.split(' ')[1].strip()  # Extract the numeric part
        restaurant_details["cost_for_two"] = cost_for_two
        
        file_menu_name = name + "'s menu.pdf"
        create_pdf_from_images(menu_links, file_menu_name)

        return restaurant_details

 

    finally:
        # Quit Selenium WebDriver
        driver.quit()


def create_pdf_from_images(image_urls, output_file):
    # Initialize FPDF
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)

    for i, url in enumerate(image_urls, start=1):
        try:
            # Fetch the image
            response = requests.get(url)
            response.raise_for_status()  # Raise an exception for HTTP errors

            # Open the image using Pillow
            img = Image.open(BytesIO(response.content))
            img_path = f"temp_image_{i}.jpg"  # Temporary file to save the image

            # Save the image locally
            img.save(img_path, "JPEG")

            # Add the image to the PDF
            pdf.add_page()
            pdf.image(img_path, x=10, y=10, w=190)  # Adjust dimensions as needed

        except Exception as e:
            print(f"Failed to process image {url}: {e}")

    # Save the PDF to the specified output file
    pdf.output(output_file)
    print(f"PDF saved as {output_file}")




In [4]:
hotel_name = input("Enter the hotel name: ") # Ex. Zhingalala
get_hotel_data(hotel_name)

PDF saved as Zhingalala's menu.pdf


{'name': 'Zhingalala',
 'location': 'Luxuria Trade Hub, 7, Dumas Road Piplod Near Rundhnath Mahadev Mandir  South Surat 395007',
 'cost_for_two': '1,000',
 'rating': '4.3',
 'type': True,
 'moblie_number': '0261-2971918'}