In [4]:
import requests
from bs4 import BeautifulSoup
import re
import csv

def extract_dishes_from_url(url, section_name):
    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")
    text = soup.get_text(separator="\n")

    # Improved regex to capture dish names with ₹
    pattern = re.compile(r"(.+?)\s*₹\s?(\d{2,4})")
    matches = pattern.findall(text)

    # Filter out lines with only section headers or navigational keywords
    skip_keywords = ["STARTERS", "MAIN COURSE", "DESSERTS", "COCKTAILS", "QUICK", "FOOD", "DRINKS", "BAR BITES", "SOUPS", "SALADS"]

    dishes = [
        {"section": section_name, "dish": name.strip(), "price": f"₹{price}"}
        for name, price in matches
        if name.strip() and not any(k in name.upper() for k in skip_keywords)
    ]
    return dishes

def scrape_multiple_sections(sections):
    all_dishes = []
    for section in sections:
        url = section["url"]
        name = section["name"]
        print(f"Scraping {name} from {url}")
        dishes = extract_dishes_from_url(url, name)
        all_dishes.extend(dishes)
    return all_dishes

def save_to_csv(dishes, filename="Just_BLR_menu.csv"):
    with open(filename, mode="w", newline='', encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["section", "dish", "price"])
        writer.writeheader()
        writer.writerows(dishes)

# --- Run the Scraper ---
if __name__ == "__main__":
    sections = [
        {"name": "Bar Bites", "url": "https://justblr.com/food-bar-bites/"},
        {"name": "Soups & Salads", "url": "https://justblr.com/food-drinks/"},
        {"name": "Main Course", "url": "https://justblr.com/food-main-course/"},
        {"name": "Desserts", "url": "https://justblr.com/food-desserts/"}
    ]

    all_dishes = scrape_multiple_sections(sections)
    save_to_csv(all_dishes)
    print("\nMenu data saved to Just_BLR_menu.csv")


Scraping Bar Bites from https://justblr.com/food-bar-bites/
Scraping Soups & Salads from https://justblr.com/food-drinks/
Scraping Main Course from https://justblr.com/food-main-course/
Scraping Desserts from https://justblr.com/food-desserts/

Menu data saved to Just_BLR_menu.csv


In [3]:
import requests
from bs4 import BeautifulSoup
import re
import csv

def extract_info_from_url(url):
    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")
    text = soup.get_text(separator="\n")
    lines = [line.strip() for line in text.splitlines() if line.strip()]

    restaurant_name = "Just BLR"

    # Try to find address-like line
    location = next((line for line in lines if "mg road" in line.lower() or "bangalore" in line.lower()), "Not found")

    # Special features: Dietary terms
    keywords = ["vegetarian", "vegan", "gluten", "allergen", "spicy", "spice"]
    special_features = [line for line in lines if any(k in line.lower() for k in keywords)]

    # Contact info: Scan only the last 30 lines (footer area)
    footer_lines = lines[-30:]
    contact_info = []

    for line in footer_lines:
        email_match = re.search(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", line)
        phone_match = re.search(r"\+91[-\s]?\d{5}[-\s]?\d{5}", line)
        if email_match and "email" not in contact_info:
            contact_info.append(f"Email: {email_match.group()}")
        if phone_match and "phone" not in contact_info:
            contact_info.append(f"Phone: {phone_match.group()}")

    # Operating hours: only lines with time-like formats
    time_pattern = re.compile(r"\d{1,2}[:.]\d{2}\s?(AM|PM|am|pm)", re.IGNORECASE)
    operating_hours = [line for line in lines if time_pattern.search(line)]

    return {
        "restaurant_name": restaurant_name,
        "location": location,
        "special_features": special_features,
        "contact_info": contact_info,
        "operating_hours": operating_hours
    }

def save_to_csv(data, filename="restaurant_info.csv"):
    with open(filename, mode="w", newline='', encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=data.keys())
        writer.writeheader()
        writer.writerow({k: ", ".join(v) if isinstance(v, list) else v for k, v in data.items()})

# Example usage
if __name__ == "__main__":
    url = "https://justblr.com/"
    data = extract_info_from_url(url)

    # Print summary
    for key, value in data.items():
        print(f"\n🔹 {key.replace('_', ' ').title()}:")
        if isinstance(value, list) and value:
            for v in value:
                print(f"  - {v}")
        else:
            print(f"  {value if value else 'Not found'}")

    # Save to CSV
    save_to_csv(data)
    print("\n✅ Data saved to restaurant_info.csv")



🔹 Restaurant Name:
  Just BLR

🔹 Location:
  Just BLR - Have Fun & Flavors for your Perfect Party at MG Road in Bengaluru - Justblr.com

🔹 Special Features:
  Not found

🔹 Contact Info:
  - Phone: +91-82779-96372
  - Email: info@justblr.com

🔹 Operating Hours:
  Not found

✅ Data saved to restaurant_info.csv
