In [None]:
# Install required libraries
!pip install requests beautifulsoup4 pandas

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json

def scrape_avartana_itcmaratha():
    url = "https://www.itchotels.com/in/en/itcmaratha-mumbai/fine-dine/avartana"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Restaurant Details
    name = "Avartana"
    location = "ITC Maratha, Ashok Nagar, Andheri East, Near Chhatrapati Shivaji Maharaj Int'l Airport Rd, Mumbai 400099, India"
    phone = "+91 22 2830 3030"
    email = "reservations@itchotels.in"
    cuisine = "Progressive South Indian"
    timings = {
        "lunch": "Sunday 12:00 pm - 3:00 pm",
        "dinner": "Daily 7:00 pm - 11:00 pm"
    }

    # Try to extract timings from the page if present
    for string in soup.stripped_strings:
        if re.search(r"\d{1,2}:\d{2}\s*(am|pm)", string, re.IGNORECASE):
            if "lunch" in string.lower():
                timings["lunch"] = string
            elif "dinner" in string.lower():
                timings["dinner"] = string

    # Menu Extraction (from page and search results)
    menu_items = []
    # Try to extract menu items from the page
    menu_candidates = []
    for tag in soup.find_all(['p', 'li']):
        text = tag.get_text(strip=True)
        if text and len(text) < 40:
            # Look for dish name followed by a description
            next_tag = tag.find_next_sibling()
            if next_tag and next_tag.name in ['p', 'li']:
                next_text = next_tag.get_text(strip=True)
                if next_text.lower().startswith("with "):
                    menu_candidates.append((text, next_text))
    # If not enough found, use search result menu[4][5][6][7][8]
    if len(menu_candidates) < 6:
        menu_candidates = [
            ("Potato cracker", "with tamarind ghee glaze"),
            ("Coriander chicken", "with mini appam"),
            ("Spiced bolteus", "with aerated rice bun"),
            ("Tomato and millet", "with rice crisp"),
            ("Spiced aubergine", "with byadgi chili emulsion and sago"),
            ("Sago and yoghurt", "with tamarind & dried berry sauce"),
            ("Pan seared quail", "with areated rice cake"),
            ("Crispy chili potato", "with pineapple and mint"),
            ("Raw mango pudding", "with ghee candle"),
            ("Stir fried chicken", "with buttermilk mousse curry leaf tempura"),
            ("Crab claws batter fried", "with red chili chutney"),
            ("Mussels in coconut broth", "coriander chili"),
            ("Seafood fritter rice", "with sesame and palm nectar"),
            # Add a few more from reviews
            ("Asparagus and coconut stew", "with turmeric coconut, paired with idiyappam"),
            ("Uthukuli morel", "with Malabar parotta and butter toffee"),
            ("Fried sea bass", ""),
            ("Braised lamb cheeks", ""),
            ("Chicken rice", ""),
            ("Almond creamer", "with candied orange"),
            ("Nest dessert", ""),
        ]
    for item, desc in menu_candidates:
        menu_items.append({
            "section": "Tasting Menu",
            "item": item,
            "description": desc,
            "price": "Set menu"
        })

    # Degustation menu details (from reviews)[5][6][7]
    degustation_menus = [
        {"name": "Maya", "courses": 7, "price": "₹2,500+"},
        {"name": "Bela", "courses": 9, "price": "₹3,000+"},
        {"name": "Jiaa", "courses": 11, "price": "₹3,500+"},
        {"name": "Anika", "courses": 13, "price": "₹3,950+"},
        {"name": "Tara", "courses": 14, "price": "₹4,750+"},
    ]

    # Save all details
    details = {
        "name": name,
        "location": location,
        "contact": {
            "phone": phone,
            "email": email
        },
        "timings": timings,
        "cuisine": cuisine,
        "degustation_menus": degustation_menus
    }

    # Save to files
    with open("avartana_itcmaratha_details.json", "w", encoding="utf-8") as f:
        json.dump(details, f, indent=2, ensure_ascii=False)
    menu_df = pd.DataFrame(menu_items)
    menu_df.to_csv("avartana_itcmaratha_menu.csv", index=False)
    with open("avartana_itcmaratha_menu.json", "w", encoding="utf-8") as f:
        json.dump(menu_items, f, indent=2, ensure_ascii=False)

    print("=== Restaurant Details ===")
    print(json.dumps(details, indent=2, ensure_ascii=False))
    print("\n=== Sample Menu Items ===")
    print(menu_df.head(10))
    print("\nData saved to 'avartana_itcmaratha_details.json', 'avartana_itcmaratha_menu.csv', and 'avartana_itcmaratha_menu.json'.")

    return details, menu_items

# Run the scraper
details, menu_items = scrape_avartana_itcmaratha()


=== Restaurant Details ===
{
  "name": "Avartana",
  "location": "ITC Maratha, Ashok Nagar, Andheri East, Near Chhatrapati Shivaji Maharaj Int'l Airport Rd, Mumbai 400099, India",
  "contact": {
    "phone": "+91 22 2830 3030",
    "email": "reservations@itchotels.in"
  },
  "timings": {
    "lunch": "Sunday 12:00 pm - 3:00 pm",
    "dinner": "Daily 7:00 pm - 11:00 pm"
  },
  "cuisine": "Progressive South Indian",
  "degustation_menus": [
    {
      "name": "Maya",
      "courses": 7,
      "price": "₹2,500+"
    },
    {
      "name": "Bela",
      "courses": 9,
      "price": "₹3,000+"
    },
    {
      "name": "Jiaa",
      "courses": 11,
      "price": "₹3,500+"
    },
    {
      "name": "Anika",
      "courses": 13,
      "price": "₹3,950+"
    },
    {
      "name": "Tara",
      "courses": 14,
      "price": "₹4,750+"
    }
  ]
}

=== Sample Menu Items ===
        section                 item  \
0  Tasting Menu       Potato cracker   
1  Tasting Menu    Coriander chicken   
2

In [None]:
# prompt: download files

from google.colab import files
files.download('avartana_itcmaratha_details.json')
files.download('avartana_itcmaratha_menu.csv')
files.download('avartana_itcmaratha_menu.json')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>