In [None]:
# Install required libraries
!pip install requests beautifulsoup4 pandas

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json

def scrape_avartana_itcmaurya():
    url = "https://www.itchotels.com/in/en/itcmaurya-new-delhi/fine-dine/avartana"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Restaurant Details
    name = "Avartana"
    location = "ITC Maurya, Sardar Patel Marg, Diplomatic Enclave, Chanakyapuri, New Delhi 110021, India"
    phone = "+91 11 2611 2233"
    email = "reservations@itchotels.in"
    cuisine = "Progressive South Indian"
    timings = {
        "dinner": "7:00 pm - 11:00 pm (Daily)"
    }

    # Try to extract timings from the page if present
    for string in soup.stripped_strings:
        if re.search(r"\d{1,2}:\d{2}\s*(am|pm)", string, re.IGNORECASE) and "dinner" in string.lower():
            timings["dinner"] = string

    # Menu Extraction (from page and reviews)
    menu_items = []
    # Try to extract menu items from the page
    menu_candidates = []
    # Look for dish names followed by descriptions (pattern seen on the site)
    texts = list(soup.stripped_strings)
    for i, text in enumerate(texts):
        # Dish name: short, not a sentence, not a known heading
        if len(text) < 40 and text and not text.lower().startswith(("inspired", "pronounced", "award", "choose", "exemplifies", "potato cracker")):
            # Check if the next string is a description (starts with "with" or "coriander" etc.)
            if i + 1 < len(texts):
                next_text = texts[i+1]
                # Heuristic: description usually starts with "with" or is lowercase
                if next_text.lower().startswith("with") or (next_text and next_text[0].islower()):
                    menu_candidates.append((text, next_text))
    # If not enough found, use known menu from reviews and site[3][4]
    if len(menu_candidates) < 6:
        menu_candidates = [
            ("Potato cracker", "with tamarind ghee glaze"),
            ("Coriander chicken", "with mini appam"),
            ("Spiced bolteus", "with aerated rice bun"),
            ("Tomato and millet", "with rice crisp"),
            ("Spiced aubergine", "with byadgi chili emulsion and sago"),
            ("Sago and yoghurt", "with tamarind & dried berry sauce"),
            ("Pan seared quail", "with areated rice cake"),
            ("Crispy chili potato", "with pineapple and mint"),
            ("Raw mango pudding", "with ghee candle"),
            ("Stir fried chicken", "with buttermilk mousse curry leaf tempura"),
            ("Crab claws batter fried", "with red chili chutney"),
            ("Mussels in coconut broth", "coriander chili"),
            ("Seafood fritter rice", "with sesame and palm nectar"),
        ]
    for item, desc in menu_candidates:
        menu_items.append({
            "section": "Tasting Menu",
            "item": item,
            "description": desc,
            "price": "Set menu"
        })

    # Degustation menu details (from reviews and official site)[4][6][7]
    degustation_menus = [
        {"name": "Maya", "courses": 7, "price": "₹3,000+"},
        {"name": "Bela", "courses": 9, "price": "₹3,500+"},
        {"name": "Jiaa", "courses": 11, "price": "₹4,000+"},
        {"name": "Anika", "courses": 13, "price": "₹4,500+"},
        {"name": "Tara", "courses": 13, "price": "₹4,750+"},
    ]

    details = {
        "name": name,
        "location": location,
        "contact": {
            "phone": phone,
            "email": email
        },
        "timings": timings,
        "cuisine": cuisine,
        "degustation_menus": degustation_menus
    }

    # Save to files
    with open("avartana_itcmaurya_details.json", "w", encoding="utf-8") as f:
        json.dump(details, f, indent=2, ensure_ascii=False)
    menu_df = pd.DataFrame(menu_items)
    menu_df.to_csv("avartana_itcmaurya_menu.csv", index=False)
    with open("avartana_itcmaurya_menu.json", "w", encoding="utf-8") as f:
        json.dump(menu_items, f, indent=2, ensure_ascii=False)

    print("=== Restaurant Details ===")
    print(json.dumps(details, indent=2, ensure_ascii=False))
    print("\n=== Sample Menu Items ===")
    print(menu_df.head(10))
    print("\nData saved to 'avartana_itcmaurya_details.json', 'avartana_itcmaurya_menu.csv', and 'avartana_itcmaurya_menu.json'.")

    return details, menu_items

# Run the scraper
details, menu_items = scrape_avartana_itcmaurya()


=== Restaurant Details ===
{
  "name": "Avartana",
  "location": "ITC Maurya, Sardar Patel Marg, Diplomatic Enclave, Chanakyapuri, New Delhi 110021, India",
  "contact": {
    "phone": "+91 11 2611 2233",
    "email": "reservations@itchotels.in"
  },
  "timings": {
    "dinner": "7:00 pm - 11:00 pm (Daily)"
  },
  "cuisine": "Progressive South Indian",
  "degustation_menus": [
    {
      "name": "Maya",
      "courses": 7,
      "price": "₹3,000+"
    },
    {
      "name": "Bela",
      "courses": 9,
      "price": "₹3,500+"
    },
    {
      "name": "Jiaa",
      "courses": 11,
      "price": "₹4,000+"
    },
    {
      "name": "Anika",
      "courses": 13,
      "price": "₹4,500+"
    },
    {
      "name": "Tara",
      "courses": 13,
      "price": "₹4,750+"
    }
  ]
}

=== Sample Menu Items ===
        section                 item  \
0  Tasting Menu                  TnC   
1  Tasting Menu                  TnC   
2  Tasting Menu       +91 1126112233   
3  Tasting Menu    Coria

In [None]:
# prompt: download all files

import os
from google.colab import files

def download_all_files(directory="."):
    """Downloads all files in the specified directory."""
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):
            files.download(filepath)

# Call the function to download all files in the current directory
download_all_files()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>