In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import re
import urllib.robotparser

# URL to scrape
url = 'https://magicpin.in/Bangalore/Hal-2Nd-Stage/Restaurant/The-Rameshwaram-Cafe/store/5b72aa/delivery/'

# Check robots.txt
rp = urllib.robotparser.RobotFileParser()
rp.set_url('https://magicpin.in/robots.txt')
rp.read()

if not rp.can_fetch("*", url):
    print("Scraping disallowed by robots.txt")
else:
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise HTTPError for bad responses

        soup = BeautifulSoup(response.content, 'html.parser')

        with open('TRC_items.csv', mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['Category', 'Item Name', 'Description', 'Price'])

            menu_sections = soup.select('article.categoryListing')
            for section in menu_sections:
                category = section.select_one('h4.categoryHeading')
                category_name = category.text.strip() if category else "Uncategorized"

                items = section.select('div section.categoryItemHolder')
                for item in items:
                    try:
                        name_tag = item.select_one('article.itemInfo a')
                        desc_tag = item.select_one('section.description')
                        price_tag = item.select_one('span.itemPrice')

                        name = name_tag.text.strip() if name_tag else "Not found"
                        desc = desc_tag.text.strip() if desc_tag else "No description"
                        price = price_tag.text.strip() if price_tag else "No price"
                        price_cleaned = re.sub(r'[^\d]', '', price)

                        writer.writerow([category_name, name, desc, price_cleaned])
                    except Exception as e:
                        print(f"Skipping item due to error: {e}")

        print("Data scraped and saved to TRC_items.csv")

    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch page: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")


Data scraped and saved to TRC_items.csv


In [4]:
import requests
from bs4 import BeautifulSoup
import json

url = "https://magicpin.in/Bangalore/Hal-2Nd-Stage/Restaurant/The-Rameshwaram-Cafe/store/5b72aa/delivery/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

script_tags = soup.find_all("script", type="application/ld+json")

for tag in script_tags:
    try:
        data = json.loads(tag.string)
        if isinstance(data, list):
            data = next(d for d in data if d.get("@type") == "Restaurant")
        if data.get("@type") == "Restaurant":
            name = data.get("name")
            address = data.get("address", {}).get("streetAddress", "")
            phone = data.get("telephone", ["N/A"])[0] if isinstance(data.get("telephone"), list) else data.get("telephone")
            hours = [
                f"{entry['dayOfWeek'][0]}: {entry['opens']} to {entry['closes']}"
                for entry in data.get("openingHoursSpecification", [])
            ]
            break
    except Exception as e:
        continue

print("Restaurant:", name)
print("Location:", address)
print("Phone:", phone)
print("Opening Hours:")
for h in hours:
    print(" -", h)
import csv

# Save extracted info to a CSV
with open("rameshwaram_cafe_info.csv", "w", newline='', encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["restaurant_name", "location", "contact", "opening_hours"])
    writer.writeheader()
    writer.writerow({
        "restaurant_name": name,
        "location": address,
        "contact": phone,
        "opening_hours": "; ".join(hours)
    })

print("Info saved to 'rameshwaram_cafe_info.csv'")


Restaurant: The-Rameshwaram-Cafe
Location: Ground Floor, No 2984, 12th Main Rd, Indiranagar, HAL 2nd Stage, Bangalore
Phone: +918151999191
Opening Hours:
 - MONDAY: 06:30:00 to 01:00:00
 - TUESDAY: 06:30:00 to 01:00:00
 - WEDNESDAY: 06:30:00 to 01:00:00
 - THURSDAY: 06:30:00 to 01:00:00
 - FRIDAY: 06:30:00 to 01:00:00
 - SATURDAY: 06:30:00 to 01:00:00
 - SUNDAY: 06:30:00 to 01:00:00
Info saved to 'rameshwaram_cafe_info.csv'
