In [14]:
import csv
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


options = Options()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(options=options)

# Init list
restaurants = []


url = "https://www.zomato.com/roorkee/punjabi-dhaba-p-d-roorkee-locality/order"
driver.get(url)
print("Loading page, waiting for content...")
time.sleep(8)  


driver.save_screenshot("zomato_page.png")
print(f"Screenshot saved as zomato_page.png")

restaurant_data = {}


try:
    name_element = driver.find_element(By.TAG_NAME, "h1")
    restaurant_data["name"] = name_element.text.strip()
    print(f"Restaurant name: {restaurant_data['name']}")
except Exception as e:
    restaurant_data["name"] = "Unknown"
    print(f"Error getting restaurant name: {e}")


page_source = driver.page_source
with open("page_source.html", "w", encoding="utf-8") as f:
    f.write(page_source)
print("Page source saved to page_source.html")


rating_selectors = [
    "[data-testid='header-avgrating']",
    ".sc-fFubgz",
    ".sc-1q7bklc-1",
    "//span[contains(@class, 'rating')]",
    "//div[contains(@class, 'rating')]"
]

restaurant_data["rating"] = "N/A"
for selector in rating_selectors:
    try:
        if selector.startswith("//"):
            rating_element = driver.find_element(By.XPATH, selector)
        else:
            rating_element = driver.find_element(By.CSS_SELECTOR, selector)
        
        rating_text = rating_element.text.strip()
        if rating_text and any(c.isdigit() for c in rating_text):
            restaurant_data["rating"] = rating_text
            print(f"Rating found: {rating_text}")
            break
    except:
        continue


cuisine_selectors = [
    "[data-testid='header-cuisines']",
    ".sc-1s0saks-15",
    "//span[contains(text(), 'North Indian') or contains(text(), 'Chinese') or contains(text(), 'South Indian')]"
]

restaurant_data["cuisines"] = "N/A"
for selector in cuisine_selectors:
    try:
        if selector.startswith("//"):
            cuisine_element = driver.find_element(By.XPATH, selector)
        else:
            cuisine_element = driver.find_element(By.CSS_SELECTOR, selector)
        
        cuisine_text = cuisine_element.text.strip()
        if cuisine_text:
            restaurant_data["cuisines"] = cuisine_text
            print(f"Cuisines found: {cuisine_text}")
            break
    except:
        continue

# Menu approach
restaurant_data["menu_categories"] = []

# Print all h2, h3, h4 elements to help identify menu categories
print("\nPossible menu categories (h2, h3 elements):")
headers = driver.find_elements(By.XPATH, "//h2 | //h3")
for header in headers:
    try:
        print(f"Header text: '{header.text.strip()}'")
    except:
        pass


print("\nLooking for menu items...")


food_item_candidates = driver.find_elements(By.XPATH, 
    "//div[.//h4 and .//span[contains(text(), '₹')]]")

if food_item_candidates:
    print(f"Found {len(food_item_candidates)} potential food items using approach 1")
    

    all_items = []
    category_map = {} 
    
    for item in food_item_candidates:
        try:
            
            category_element = None
            try:
                
                category_element = driver.execute_script("""
                    var element = arguments[0];
                    while (element.previousElementSibling) {
                        element = element.previousElementSibling;
                        if (element.tagName === 'H2' || element.tagName === 'H3') {
                            return element;
                        }
                    }
                    return null;
                """, item)
            except:
                pass
                
            category_name = "Uncategorized"
            if category_element and category_element.text.strip():
                category_name = category_element.text.strip()
                
            # Get item details
            name = item.find_element(By.XPATH, ".//h4").text.strip()
            
            price = "N/A"
            price_elements = item.find_elements(By.XPATH, ".//span[contains(text(), '₹')]")
            if price_elements:
                price = price_elements[0].text.strip()
                
            description = ""
            desc_elements = item.find_elements(By.XPATH, ".//p")
            if desc_elements:
                description = desc_elements[0].text.strip()
                
            # Check if vegetarian
            is_veg = False
            try:
                veg_icon = block.find_element(By.XPATH, ".//div[@data-testid='veg-icon']")
                if veg_icon:
                    is_veg = True
            except:
   
                if "paneer" in name.lower() or ("veg" in name.lower() and "non-veg" not in name.lower()):
                    is_veg = True
                
            item_data = {
                "name": name,
                "price": price,
                "description": description,
                "is_vegetarian": is_vegetarian
            }
            
            # Group by category
            if category_name not in category_map:
                category_map[category_name] = []
            category_map[category_name].append(item_data)
            
            print(f"Found item: {name} - {price} (Category: {category_name})")
            
        except Exception as e:
            print(f"Error processing food item: {e}")
    
    # Add categories to restaurant data
    for category_name, items in category_map.items():
        restaurant_data["menu_categories"].append({
            "category_name": category_name,
            "items": items
        })
        
else:
    print("No food items found with approach 1. Trying approach 2...")
    
    
    price_elements = driver.find_elements(By.XPATH, "//span[contains(text(), '₹')]")
    food_items = []
    
    for price_element in price_elements:
        try:
            
            item_container = price_element.find_element(By.XPATH, "./ancestor::div[.//h4]")
            
            name = item_container.find_element(By.XPATH, ".//h4").text.strip()
            price = price_element.text.strip()
            
            description = ""
            desc_elements = item_container.find_elements(By.XPATH, ".//p")
            if desc_elements:
                description = desc_elements[0].text.strip()
                
            food_items.append({
                "name": name,
                "price": price,
                "description": description
            })
            
            print(f"Found item with approach 2: {name} - {price}")
            
        except Exception as e:
            continue
    
    if food_items:
        restaurant_data["menu_categories"].append({
            "category_name": "All Items",
            "items": food_items
        })
    else:
        print("No food items found with any approach.")


restaurants.append(restaurant_data)


total_items = sum(len(category["items"]) for category in restaurant_data["menu_categories"])
print(f"\nSummary for {restaurant_data['name']}:")
print(f"Rating: {restaurant_data['rating']}")
print(f"Cuisines: {restaurant_data['cuisines']}")
print(f"Menu categories: {len(restaurant_data['menu_categories'])}")
print(f"Total menu items: {total_items}")


with open("zomato_restaurants_cleaned.json", "w", encoding="utf-8") as file:
    json.dump(restaurants, file, indent=4, ensure_ascii=False)

# Save to CSV
with open("zomato_restaurants_cleaned.csv", "w", newline="", encoding="utf-8") as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["Name", "Rating", "Cuisines", "Category", "Item Name", "Price", "Description", "Vegetarian"])

    for restaurant in restaurants:
        name = restaurant.get("name", "")
        rating = restaurant.get("rating", "")
        cuisines = restaurant.get("cuisines", "")
        
        for category in restaurant.get("menu_categories", []):
            category_name = category.get("category_name", "")
            
            for item in category.get("items", []):
                writer.writerow([
                    name,
                    rating,
                    cuisines,
                    category_name,
                    item.get("name", ""),
                    item.get("price", ""),
                    item.get("description", ""),
                    "Yes" if item.get("is_vegetarian", False) else "No"
                ])

driver.quit()
print(" Scraping completed and files saved.")

Loading page, waiting for content...
Screenshot saved as zomato_page.png
Restaurant name: Punjabi Dhaba - P.D
Page source saved to page_source.html
Rating found: 4.3

Possible menu categories (h2, h3 elements):
Header text: 'Order Online'
Header text: 'Reviews'
Header text: 'Order Online'
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''
Header text: ''

Looking for menu items...
Found 1688 potential food items using approach 1
Found item

In [17]:
import pandas as pd
punjd = pd.read_csv('zomato_restaurants_cleaned.csv')

punjd.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,No
1,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,No
2,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,Order Online,No
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No
4,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,A pack of 3 varieties of chicken which will be...,No


In [20]:
punjd.shape

(502, 8)

In [19]:
punjd = punjd.drop_duplicates()


In [21]:
punjd.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,No
2,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,Order Online,No
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No
4,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,A pack of 3 varieties of chicken which will be...,No
8,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,,No


In [22]:
punjd

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,No
2,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,Order Online,No
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No
4,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,A pack of 3 varieties of chicken which will be...,No
8,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,,No
...,...,...,...,...,...,...,...,...
1669,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Dal [Full],₹250,VEG MAINS,No
1670,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Dal [Full],₹250,,No
1676,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Paneer [Full],₹299,,No
1682,Punjabi Dhaba - P.D,4.3,,Uncategorized,Paneer Tikka Masala [Full],₹299,Paneer tikka masala is an Indian dish of marin...,No


In [23]:
duplicate_count = punjd.duplicated().sum()
print("Number of duplicate rows:", duplicate_count)


Number of duplicate rows: 0


In [24]:
punjd = punjd.drop_duplicates(subset=['Item Name', 'Price'])


In [25]:
punjd.shape

(251, 8)

In [26]:
punjd

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,No
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No
10,Punjabi Dhaba - P.D,4.3,,Uncategorized,Dal Makhani [Full] with Shahi Paneer [Full] an...,₹600,3 Varieties main-coarse dish.,No
16,Punjabi Dhaba - P.D,4.3,,Uncategorized,Malai Chaap [Full] with Paneer Tikka and Hara ...,₹600,,No
22,Punjabi Dhaba - P.D,4.3,,Uncategorized,Boneless Chilli Chicken [Full] with Chicken Ma...,₹1600,Boneless Chilli Chicken [Full]+Chicken Malai T...,No
...,...,...,...,...,...,...,...,...
1657,Punjabi Dhaba - P.D,4.3,,Uncategorized,Chicken Changezi [Full],₹700,"Chicken changezi is a rich, mild, creamy gravy...",No
1663,Punjabi Dhaba - P.D,4.3,,Uncategorized,Tawa Chicken [Full],₹700,Tawa chicken is a dish of chunks of roasted ma...,No
1669,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Dal [Full],₹250,VEG MAINS,No
1676,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Paneer [Full],₹299,,No


In [28]:

punjd.loc[:, 'Item Name Lower'] = punjd['Item Name'].str.lower()


non_veg_keywords = ['chicken', 'mutton', 'fish', 'egg', 'prawn', 'keema', 'meat', 'afghani']
punjd.loc[:, 'Vegetarian'] = punjd['Item Name Lower'].apply(
    lambda x: 'No' if any(keyword in x for keyword in non_veg_keywords) else 'Yes'
)


In [29]:
punjd

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian,Item Name Lower
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,Yes,ipl party packs
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No,butter chicken [full] with afghani chicken [fu...
10,Punjabi Dhaba - P.D,4.3,,Uncategorized,Dal Makhani [Full] with Shahi Paneer [Full] an...,₹600,3 Varieties main-coarse dish.,Yes,dal makhani [full] with shahi paneer [full] an...
16,Punjabi Dhaba - P.D,4.3,,Uncategorized,Malai Chaap [Full] with Paneer Tikka and Hara ...,₹600,,Yes,malai chaap [full] with paneer tikka and hara ...
22,Punjabi Dhaba - P.D,4.3,,Uncategorized,Boneless Chilli Chicken [Full] with Chicken Ma...,₹1600,Boneless Chilli Chicken [Full]+Chicken Malai T...,No,boneless chilli chicken [full] with chicken ma...
...,...,...,...,...,...,...,...,...,...
1657,Punjabi Dhaba - P.D,4.3,,Uncategorized,Chicken Changezi [Full],₹700,"Chicken changezi is a rich, mild, creamy gravy...",No,chicken changezi [full]
1663,Punjabi Dhaba - P.D,4.3,,Uncategorized,Tawa Chicken [Full],₹700,Tawa chicken is a dish of chunks of roasted ma...,No,tawa chicken [full]
1669,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Dal [Full],₹250,VEG MAINS,Yes,handi dal [full]
1676,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Paneer [Full],₹299,,Yes,handi paneer [full]


In [30]:
punjd.drop(columns=['Item Name Lower'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  punjd.drop(columns=['Item Name Lower'], inplace=True)


In [31]:
punjd


Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Punjabi Dhaba - P.D,4.3,,Uncategorized,IPL Party Packs,₹1600,,Yes
3,Punjabi Dhaba - P.D,4.3,,Uncategorized,Butter Chicken [Full] with Afghani Chicken [Fu...,₹1600,(PACK OF 3 VARIETIES),No
10,Punjabi Dhaba - P.D,4.3,,Uncategorized,Dal Makhani [Full] with Shahi Paneer [Full] an...,₹600,3 Varieties main-coarse dish.,Yes
16,Punjabi Dhaba - P.D,4.3,,Uncategorized,Malai Chaap [Full] with Paneer Tikka and Hara ...,₹600,,Yes
22,Punjabi Dhaba - P.D,4.3,,Uncategorized,Boneless Chilli Chicken [Full] with Chicken Ma...,₹1600,Boneless Chilli Chicken [Full]+Chicken Malai T...,No
...,...,...,...,...,...,...,...,...
1657,Punjabi Dhaba - P.D,4.3,,Uncategorized,Chicken Changezi [Full],₹700,"Chicken changezi is a rich, mild, creamy gravy...",No
1663,Punjabi Dhaba - P.D,4.3,,Uncategorized,Tawa Chicken [Full],₹700,Tawa chicken is a dish of chunks of roasted ma...,No
1669,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Dal [Full],₹250,VEG MAINS,Yes
1676,Punjabi Dhaba - P.D,4.3,,Uncategorized,Handi Paneer [Full],₹299,,Yes


In [32]:
punjd.to_csv('zomato_restaurants_processed.csv', index=False)



In [38]:
import csv
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException

def scrape_zomato_restaurant(driver, url):
    """
    Scrape a single Zomato restaurant menu
    
    Args:
        driver: Selenium WebDriver instance
        url: Restaurant URL to scrape
        
    Returns:
        Dictionary containing restaurant data
    """
    
    driver.get(url)
    print(f"\nLoading page: {url}")
    print("Waiting for content...")
    time.sleep(8)  

    restaurant_data = {}

    # Restaurant name
    try:
        name_element = driver.find_element(By.TAG_NAME, "h1")
        restaurant_data["name"] = name_element.text.strip()
        print(f"Restaurant name: {restaurant_data['name']}")
    except Exception as e:
        restaurant_data["name"] = "Unknown"
        print(f"Error getting restaurant name: {e}")

    # Restaurant rating
    restaurant_data["rating"] = "N/A"
    try:
        rating_element = driver.find_element(By.CSS_SELECTOR, "[data-testid='header-avgrating']")
        restaurant_data["rating"] = rating_element.text.strip()
    except:
        pass

    # Cuisines
    restaurant_data["cuisines"] = "N/A"
    try:
        cuisine_element = driver.find_element(By.CSS_SELECTOR, "[data-testid='header-cuisines']")
        restaurant_data["cuisines"] = cuisine_element.text.strip()
    except:
        pass

    
    all_headings = driver.find_elements(By.XPATH, "//h4")
    print(f"All h4 headings found: {len(all_headings)}")

    
    category_candidates = []
    try:
        
        candidate_headers = driver.find_elements(By.XPATH, "//h4[not(ancestor::div[contains(@class, 'menu-item') or contains(@class, 'sc-1s0saks-17')])]")
        
        for header in candidate_headers:
            header_text = header.text.strip()
            if header_text and "₹" not in header_text:  
                try:
                    parent = header.find_element(By.XPATH, "./..")
                    category_candidates.append(header_text)
                except:
                    pass
    except Exception as e:
        print(f"Error finding category headers: {e}")

   
    if not category_candidates:
        print("Using backup category detection...")
        category_elements = driver.find_elements(By.CSS_SELECTOR, "section h4")
        category_candidates = [el.text.strip() for el in category_elements 
                               if el.text.strip() and not el.text.strip().startswith("₹")]

    
    if not category_candidates:
        category_candidates = ["Menu Items"]

    print(f"Potential categories found: {category_candidates}")

    
    food_blocks = driver.find_elements(By.XPATH, "//div[.//h4 and .//span[contains(text(), '₹')]]")
    print(f"Found {len(food_blocks)} food item blocks.")

    restaurant_data["menu_categories"] = []

    
    current_category = category_candidates[0] if category_candidates else "Uncategorized"
    category_map = {}

    
    for cat in category_candidates:
        category_map[cat] = []

    print("Processing food items...")
    for block in food_blocks:
        try:
            
            previous_elements = driver.execute_script("""
                let elements = [];
                let elem = arguments[0].previousElementSibling;
                while(elem && elements.length < 3) {  // Check a few siblings back
                    elements.push({tag: elem.tagName.toLowerCase(), text: elem.textContent.trim()});
                    elem = elem.previousElementSibling;
                }
                return elements;
            """, block)
            
            for elem in previous_elements:
                if elem['tag'] == 'h4' and elem['text'] in category_candidates:
                    current_category = elem['text']
                    break
            
            
            name = block.find_element(By.XPATH, ".//h4").text.strip()
            price_elem = block.find_element(By.XPATH, ".//span[contains(text(), '₹')]")
            price = price_elem.text.strip()

            description = ""
            desc_elements = block.find_elements(By.XPATH, ".//p")
            if desc_elements:
                description = desc_elements[0].text.strip()


            is_vegetarian = False
            try:
                
                veg_selectors = [
                    ".//div[@data-testid='veg-icon']",
                    ".//div[contains(@class, 'veg-icon')]",
                    ".//img[contains(@alt, 'veg')]",
                    ".//div[contains(@class, 'sc-') and @aria-label='vegetarian']"
                ]
                
                for selector in veg_selectors:
                    veg_elements = block.find_elements(By.XPATH, selector)
                    if veg_elements:
                        is_vegetarian = True
                        break
                        
               
                if not is_vegetarian:
                    if "paneer" in name.lower() or ("veg" in name.lower() and "non-veg" not in name.lower()):
                        is_vegetarian = True
            except Exception as ve:
                print(f"Error detecting vegetarian status: {ve}")

            item_data = {
                "name": name,
                "price": price,
                "description": description,
                "is_vegetarian": is_vegetarian
            }

            # Grouping
            if current_category not in category_map:
                category_map[current_category] = []
            category_map[current_category].append(item_data)

        except Exception as e:
            print(f"Error processing food block: {e}")

    
    for category, items in category_map.items():
        if items:  
            restaurant_data["menu_categories"].append({
                "category_name": category,
                "items": items
            })

  
    total_items = sum(len(cat["items"]) for cat in restaurant_data["menu_categories"])
    print(f"\nSummary for {restaurant_data['name']}:")
    print(f"Rating: {restaurant_data['rating']}")
    print(f"Cuisines: {restaurant_data['cuisines']}")
    print(f"Menu Categories: {len(restaurant_data['menu_categories'])}")
    print(f"Total Items: {total_items}")
    
  
    return restaurant_data

def main():
    # List of restaurant URLs to scrape
    restaurant_urls = [
        "https://www.zomato.com/roorkee/hotel-prakash-restaurant-roorkee-locality/order",
        "https://www.zomato.com/roorkee/hungry-point-roorkee-locality/order",
        "https://www.zomato.com/roorkee/baap-of-rolls-roorkee-locality/order",
        "https://www.zomato.com/roorkee/the-cook-house-roorkee-locality/order",
        "https://www.zomato.com/roorkee/tanishas-restaurant-royal-hyderabadi-biryani-roorkee-locality/order",
        "https://www.zomato.com/roorkee/bhalla-vaishav-dhaba-roorkee-locality/order",
        "https://www.zomato.com/roorkee/foodbay-roorkee-locality/order",
        "https://www.zomato.com/roorkee/punjabi-dhaba-p-d-roorkee-locality/order",
        # Add more URLs here
        
    ]
    
   
    options = Options()
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    
   
    driver = webdriver.Chrome(options=options)
    
  
    all_restaurants = []
    
   
    for url in restaurant_urls:
        try:
            restaurant_data = scrape_zomato_restaurant(driver, url)
            all_restaurants.append(restaurant_data)
            # Add a short delay between requests to avoid getting blocked
            time.sleep(3)
        except Exception as e:
            print(f"Error processing restaurant at {url}: {e}")
    
   
    driver.quit()
    
    
    with open("zomato_all_restaurants.json", "w", encoding="utf-8") as f:
        json.dump(all_restaurants, f, indent=4, ensure_ascii=False)
    
    # Save all data to a single CSV file
    with open("zomato_all_restaurants.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["Name", "Rating", "Cuisines", "Category", "Item Name", "Price", "Description", "Vegetarian"])
        
        for restaurant in all_restaurants:
            for cat in restaurant.get("menu_categories", []):
                for item in cat.get("items", []):
                    writer.writerow([
                        restaurant.get("name", "Unknown"),
                        restaurant.get("rating", "N/A"),
                        restaurant.get("cuisines", "N/A"),
                        cat.get("category_name", "Uncategorized"),
                        item.get("name", ""),
                        item.get("price", ""),
                        item.get("description", ""),
                        "Yes" if item.get("is_vegetarian", False) else "No"
                    ])
    
    print(f"\n Scraping completed for {len(all_restaurants)} restaurants.")
    print(f" Data saved to zomato_all_restaurants.json and zomato_all_restaurants.csv")

if __name__ == "__main__":
    main()


Loading page: https://www.zomato.com/roorkee/hotel-prakash-restaurant-roorkee-locality/order
Waiting for content...
Restaurant name: Hotel Prakash & Restaurant
All h4 headings found: 189
Potential categories found: ['Breakfast Specials', 'Bun Butter', 'Veg Sandwich', 'Butter Toast', 'Butter Toast with Jam', 'Mix Veg Grilled Sandwich', 'Cheese Sandwich', 'Cheese Grilled Sandwich', 'Pav Bhaji', 'Poori Bhaji [4 Poori]', 'Chana Bhatura [2 Pieces]', 'Paneer Chana Bhatura', 'Starters', 'Veg Seekh Kabab', 'Dahi Kabab', 'Hara Bhara Kabab', 'Paneer Hariyali Tikka', 'Paneer Tikka', 'South Indian', 'Idli [1 Piece]', '1 Vada', 'Idli Sambar [Home Style Preparation]', 'Vada Sambar [2 Vada]', 'Idli Sambar [2 Pieces]', 'Mini Dosa', 'Plain Dosa', 'Masala Dosa', 'Onion Masala Dosa', 'Butter Masala Dosa', 'Paneer Masala Dosa', 'Rava Butter Masala Dosa', 'Special Masala Dosa', 'Mysore Masala Dosa', 'Onion Tomato Uttapam', 'Paneer Uttapam', 'Mixed Uttapam', 'Chinese Chopsuey Dosa', 'American Delight Dosa'

In [39]:
all=pd.read_csv('zomato_all_restaurants.csv')
all.shape

(8338, 8)

In [40]:
all.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Hotel Prakash & Restaurant,,,Breakfast Specials,Breakfast Specials,₹70,,No
1,Hotel Prakash & Restaurant,,,Breakfast Specials,Breakfast Specials,₹70,,No
2,Hotel Prakash & Restaurant,,,Breakfast Specials,Breakfast Specials,₹70,Order Online,No
3,Hotel Prakash & Restaurant,,,Breakfast Specials,Bun Butter,₹70,BREAKFAST,No
4,Hotel Prakash & Restaurant,,,Breakfast Specials,Bun Butter,₹70,Bun and butter.,No


In [41]:
all = all.drop_duplicates(subset=['Name','Item Name', 'Price'])

In [42]:
all.shape

(1340, 8)

In [43]:
all.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Hotel Prakash & Restaurant,,,Breakfast Specials,Breakfast Specials,₹70,,No
3,Hotel Prakash & Restaurant,,,Breakfast Specials,Bun Butter,₹70,BREAKFAST,No
10,Hotel Prakash & Restaurant,,,Breakfast Specials,Veg Sandwich,₹100,Vegetarian sandwich consisting of vegetable fi...,Yes
16,Hotel Prakash & Restaurant,,,Breakfast Specials,Butter Toast,₹100,Toast with butter.,No
22,Hotel Prakash & Restaurant,,,Breakfast Specials,Butter Toast with Jam,₹100,"Crispy, golden toast slathered with creamy but...",No


In [44]:
# Define your condition
print(all['Name'].unique())





['Hotel Prakash & Restaurant' 'Hungry Point' 'Baap Of Rolls'
 'The Cook House' "Tanisha's Restaurant Royal Hyderabadi Biryani"
 'Bhalla Vaishav Dhaba' 'Foodbay' 'Punjabi Dhaba - P.D']


In [48]:
fix_ratings = {
    'Punjabi Dhaba - P.D': 4.3,
    'Hotel Prakash & Restaurant': 4.0,
    'Foodbay': 3.6,
    'Hungry Point': 3.9,
    'Baap Of Rolls': 4.2,
    'The Cook House': 3.2,
    "Tanisha's Restaurant Royal Hyderabadi Biryani": 4.2,
    'Bhalla Vaishav Dhaba': 3.9
}

for name, rating in fix_ratings.items():
    all.loc[all['Name'] == name, 'Rating'] = rating



In [49]:
all.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Breakfast Specials,₹70,,No
3,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Bun Butter,₹70,BREAKFAST,No
10,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Veg Sandwich,₹100,Vegetarian sandwich consisting of vegetable fi...,Yes
16,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast,₹100,Toast with butter.,No
22,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast with Jam,₹100,"Crispy, golden toast slathered with creamy but...",No


In [52]:

non_veg_keywords = ['chicken', 'mutton', 'fish', 'egg', 'prawn', 'keema', 'meat', 'afghani', 'non veg']


all['Text Combined'] = (
    all['Item Name'].fillna('') + ' ' + all['Description'].fillna('')
).str.lower()


all['Vegetarian'] = all['Text Combined'].apply(
    lambda x: 'No' if any(word in x for word in non_veg_keywords) else 'Yes'
)


In [53]:
all.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian,Item Name Lower,Text Combined
0,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Breakfast Specials,₹70,,Yes,breakfast specials,breakfast specials
3,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Bun Butter,₹70,BREAKFAST,Yes,bun butter,bun butter breakfast
10,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Veg Sandwich,₹100,Vegetarian sandwich consisting of vegetable fi...,Yes,veg sandwich,veg sandwich vegetarian sandwich consisting of...
16,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast,₹100,Toast with butter.,Yes,butter toast,butter toast toast with butter.
22,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast with Jam,₹100,"Crispy, golden toast slathered with creamy but...",Yes,butter toast with jam,"butter toast with jam crispy, golden toast sla..."


In [54]:
all.drop(columns=['Text Combined'], inplace=True)

In [55]:
all.drop(columns=['Item Name Lower'], inplace=True)

In [56]:
all.head()

Unnamed: 0,Name,Rating,Cuisines,Category,Item Name,Price,Description,Vegetarian
0,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Breakfast Specials,₹70,,Yes
3,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Bun Butter,₹70,BREAKFAST,Yes
10,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Veg Sandwich,₹100,Vegetarian sandwich consisting of vegetable fi...,Yes
16,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast,₹100,Toast with butter.,Yes
22,Hotel Prakash & Restaurant,4.0,,Breakfast Specials,Butter Toast with Jam,₹100,"Crispy, golden toast slathered with creamy but...",Yes


In [57]:
all.shape
all.to_csv('zomato_restaurants_final.csv', index=False)