In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests


def clean(text):
    return re.sub(r'\s+', ' ', text.strip()) if text else ""

restaurant_info = {
    "name": "",
    "location": "20, Church Street, Bangalore, Karnataka, India",
    "contact": "preferred@speciality.co.in",
    "hours": []
}
# Fetch and Parse HTML Content
url = "https://mainlandchina.in/mlc/menu.html"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "lxml")

# Extract restaurant name and hours dynamically
title_tag = soup.find("title")
if title_tag:
    restaurant_info["name"] = clean(title_tag.text.split("|")[0])  # assumes title like "Mainland China | Menu"

footer = soup.find("footer")
if footer:
    footer_text = footer.get_text(separator="\n")
    for line in footer_text.splitlines():
      if any(x in line.lower() for x in ["timing", "am", "pm"]):
            restaurant_info["hours"].append(clean(line))

#  Extract Menu Items
menu_items = []

def extract_menu(menu_block, category, dish_type):
    columns = menu_block.find_all("div", class_="left-box-menu") + menu_block.find_all("div", class_="right-box-menu")
    for col in columns:
        for p in col.find_all("p"):
            text = clean(p.get_text())
            if text.lower() in ["not available", ""]:
                continue

            kcal_info = ""
            kcal_match = re.search(r"\(([\d\s\w\/]+kcal)\)", text)
            if kcal_match:
                kcal_info = kcal_match.group(1)
                text = text.replace(f"({kcal_info})", "").strip()

            menu_items.append({
                "category": category,
                "type": dish_type,
                "item_description": text,
                "calories_info": kcal_info,
                "special_features": dish_type
            })

# Loop through each category tab
tabs = soup.select(".tab-pane")
for section in tabs:
    tab_id = section.get("id")
    button = soup.find("button", attrs={"data-bs-target": f"#{tab_id}"})
    category = clean(button.text) if button else "Unknown Category"

    if section.find("div", class_="veg-block"):
        extract_menu(section.find("div", class_="veg-block"), category, "Veg")
    if section.find("div", class_="nonveg-block"):
        extract_menu(section.find("div", class_="nonveg-block"), category, "Non-Veg")

# Save Menu to CSV
df = pd.DataFrame(menu_items)
df.to_csv("mainland_china_full_menu.csv", index=False)

# Print & Save Restaurant Info
print("Restaurant Info:")
print("Name:", restaurant_info['name'])
print("Location:", restaurant_info['location'])
print("Contact:", restaurant_info['contact'])
print("Hours:")
for h in restaurant_info['hours']:
    print(" -", h)

with open("mainland_china_restaurant_info.txt", "w", encoding="utf-8") as f:
    f.write(f"Name: {restaurant_info['name']}\n")
    f.write(f"Location: {restaurant_info['location']}\n")
    f.write(f"Contact: {restaurant_info['contact']}\n")
    f.write("Hours:\n")
    for h in restaurant_info['hours']:
        f.write(f" - {h}\n")

print("\nData saved:")
print(" - mainland_china_full_menu.csv")
print(" - mainland_china_restaurant_info.txt")


🏢 Restaurant Info:
Name: Mainland China
Location: 20, Church Street, Bangalore, Karnataka, India
Contact: preferred@speciality.co.in
Hours:
 - 12:00 pm to 4 pm
 - 7:00 pm to 11.30 pm

✅ Data saved:
 - mainland_china_full_menu.csv
 - mainland_china_restaurant_info.txt
