ASSIGNMENT -
Part 1 -
Get name, address, open times for Village from Yelp API.

In [42]:
import requests
from bs4 import BeautifulSoup
import re

class RestaurantMenuScraper:
    def __init__(self, yelp_api_key):
        self.YELP_API_KEY = yelp_api_key
        self.HEADERS = {
            "Authorization": f"Bearer {self.YELP_API_KEY}",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }

    def get_restaurant_details(self, restaurant_name, location):
        """
        Retrieve restaurant details from Yelp API
        
        :param restaurant_name: Name of the restaurant
        :param location: Location to search
        :return: Detailed restaurant information
        """
        search_url = "https://api.yelp.com/v3/businesses/search"
        params = {
            "term": restaurant_name,
            "location": location,
            "limit": 1
        }
        
        try:
            response = requests.get(search_url, headers=self.HEADERS, params=params)
            response.raise_for_status()
            data = response.json()
            
            if "businesses" in data and len(data["businesses"]) > 0:
                business = data["businesses"][0]
                business_id = business["id"]
                
                # Fetch detailed information
                details_url = f"https://api.yelp.com/v3/businesses/{business_id}"
                details_response = requests.get(details_url, headers=self.HEADERS)
                details_response.raise_for_status()
                details_data = details_response.json()
                
                return details_data
            else:
                print(f"No data found for {restaurant_name}.")
                return None
        
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving restaurant details: {e}")
            return None

    def extract_open_times(self, hours_data):
        # List of days in a standard week
        days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        formatted_times = {}
        
        # Initialize all days as "Closed"
        for day in days:
            formatted_times[day] = "Closed"
        
        if hours_data:
            # Loop through each entry in 'hours_data'
            for period in hours_data:
                # Check if 'open' field exists in the current period
                if 'open' in period:
                    for time_slot in period['open']:
                        # Get the corresponding day from the index in the 'days' list
                        day = days[time_slot['day']]
                        
                        # Extract start and end times
                        start_time = f"{time_slot['start'][:2]}:{time_slot['start'][2:]}"
                        end_time = f"{time_slot['end'][:2]}:{time_slot['end'][2:]}"
                        
                        # Format and assign the opening times
                        if formatted_times[day] == "Closed":
                            formatted_times[day] = f"{start_time} - {end_time}"
                        else:
                            formatted_times[day] += f", {start_time} - {end_time}"
    
        return formatted_times


def main():
    # Yelp API Key
    YELP_API_KEY = "-He3gbhB7wZ21g2U0nXCiyVdXkUZiYx" # Replace with your Yelp API Key
    
    # Initialize scraper
    scraper = RestaurantMenuScraper(YELP_API_KEY)
    
    # Get restaurant details
    restaurant_details = scraper.get_restaurant_details(
        "Village - The Soul of India", 
        "Hicksville"
    )
    
    if restaurant_details:
        # Extract and print basic information
        print("Restaurant Details:")
        print("Name:", restaurant_details["name"])
        print("Address:", ", ".join(restaurant_details["location"]["display_address"]))
        
        # Parse and print open times
        open_times = scraper.extract_open_times(restaurant_details.get('hours', []))
        print("\nOpen Times:")
        for day, times in open_times.items():
            print(f"{day}: {times}")

if __name__ == "__main__":
    main()


Restaurant Details:
Name: Village the soul of india
Address: 11 West Marie St, Hicksville, NY 11801

Open Times:
Monday: 11:00 - 15:00, 17:00 - 22:00
Tuesday: Closed
Wednesday: 11:00 - 15:00, 17:00 - 22:00
Thursday: 11:00 - 15:00, 17:00 - 22:00
Friday: 11:00 - 15:00, 17:00 - 22:00
Saturday: 11:30 - 22:00
Sunday: 11:30 - 22:00


 Print menu items, & prices for Village from Yelp API. Also, save a village_menu.csv file

In [34]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains

# Setup WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run headlessly (without a UI)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# URL to scrape
url = 'https://www.yelp.com/menu/village-the-soul-of-india-hicksville'
driver.get(url)

# Optionally, you can wait for JavaScript to load the page if needed
driver.implicitly_wait(5)

# Extract Menu Item Names
menu_items = driver.find_elements(By.CSS_SELECTOR, '.menu-item-details h4')

# Extract Menu Descriptions
descriptions = driver.find_elements(By.CSS_SELECTOR, '.menu-item-details .menu-item-details-description')

# Extract Prices
prices = driver.find_elements(By.CSS_SELECTOR, '.menu-item-price-amount')

# Prepare data to be written to CSV and print to console
menu_data = []
for item, desc, price in zip(menu_items, descriptions, prices):
    item_data = [item.text, desc.text, price.text]
    menu_data.append(item_data)
    # Print extracted data
    print(f"Item: {item.text}")
    print(f"Description: {desc.text}")
    print(f"Price: {price.text}")
    print('---')

# Save data to CSV file
with open('village_menu.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Item', 'Description', 'Price'])  # Write header
    writer.writerows(menu_data)  # Write the menu data

# Close the browser after scraping
driver.quit()

print("Data has been saved to 'village_menu.csv'.")


Item: Medu Vada
Description: Lentil flour donuts.
Price: $6.00
---
Item: Medu Vada in Sambar Bowl
Description: Lentil flour donuts smothered with sambar.
Price: $6.99
---
Item: Medu Vada in Rasam Bowl
Description: Lentil flour donuts smothered with Rasam soup.
Price: $6.99
---
Item: Combo
Description: Rice cake and lentil donut.
Price: $5.99
---
Item: Combo in Sambar Bowl
Description: Idly and Vada.
Price: $6.99
---
Item: Masala Vada
Description: Crispy Chenna Dal mixed with spiced and fried.
Price: $7.45
---
Item: Dahi Vada
Description: Lentil donut soaked in sweet yogurt.
Price: $8.95
---
Item: Batata Vada
Description: Mashed potato spices and fried with chickpeas flour.
Price: $6.00
---
Item: Mysore Bonda
Description: Lentil balls spices and cashew fried.
Price: $6.00
---
Item: Chennai Pakkoda
Description: Onions and Chenna Dal spices fried.
Price: $6.00
---
Item: Spring Rolls
Description: Vegetables stuffed platter.
Price: $7.45
---
Item: Paneer Pakkoda
Description: Cottage cheese 

Find latitude and longitude using API of Village

In [11]:
import requests

def get_coordinates_open_cage(address, api_key):
    """Get coordinates using OpenCage Geocoding API"""
    url = f'https://api.opencagedata.com/geocode/v1/json?q={address}&key={api_key}'
    response = requests.get(url)
    data = response.json()

    if data['status']['code'] == 200:
        lat = data['results'][0]['geometry']['lat']
        lng = data['results'][0]['geometry']['lng']
        return lat, lng
    else:
        print(f"Error: {data['status']['message']}")
        return None, None

# Example usage
api_key = 'abc'  # Replace with your OpenCage API key
address = "Village - The Soul of India, 11 West Marie St, Hicksville, NY 11801"
lat, lng = get_coordinates_open_cage(address, api_key)
print(f"Latitude: {lat}, Longitude: {lng}")


Latitude: 40.766876, Longitude: -73.523663


Get top-rated 5 restaurants in 2 km with similar menu items

In [13]:
import requests

def find_similar_restaurants(api_key, latitude, longitude, term="Indian Vegetarian", radius=2000, limit=5):
    """Fetch top-rated Vegetarian Indian restaurants near the specified coordinates."""
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": f"Bearer {api_key}"}
    params = {
        "term": term,
        "latitude": latitude,
        "longitude": longitude,
        "radius": radius,
        "categories": "indian,dosa",
        "sort_by": "rating",
        "limit": limit + 1  # Include one extra to exclude the main restaurant
    }

    response = requests.get(url, headers=headers, params=params)
    data = response.json()

    # Filter out "Village - The Soul of India" and keep the top 5
    refined_restaurants = []
    for business in data.get("businesses", []):
        if "village" not in business["name"].lower():
            refined_restaurants.append({
                "name": business["name"],
                "address": ", ".join(business["location"]["display_address"]),
                "rating": business["rating"],
                "phone": business.get("display_phone", "N/A")
            })
            if len(refined_restaurants) == limit:
                break

    return refined_restaurants

# Example Usage
api_key = "ngjMz-"  # Replace with your Yelp API Key
latitude, longitude = 40.766876, -73.523663  # Coordinates of "Village - The Soul of India"

restaurants = find_similar_restaurants(api_key, latitude, longitude)
for idx, restaurant in enumerate(restaurants, start=1):
    print(f"{idx}. Restaurant: {restaurant['name']}\n   Address: {restaurant['address']}\n   Rating: {restaurant['rating']}\n   Phone: {restaurant['phone']}\n")


1. Restaurant: Kunga Kitchen
   Address: 390 Woodbury Rd, Hicksville, NY 11801
   Rating: 4.6
   Phone: (516) 490-5501

2. Restaurant: Taste of Mumbai
   Address: 153 Levittown Pkwy, Hicksville, NY 11801
   Rating: 4.6
   Phone: (516) 261-9070

3. Restaurant: Kathis & Kababs
   Address: 55 Broadway, Ste A, Hicksville, NY 11801
   Rating: 4.5
   Phone: (516) 595-7775

4. Restaurant: Kabul Grill
   Address: 129 N Broadway, Hicksville, NY 11801
   Rating: 4.5
   Phone: (516) 933-8999

5. Restaurant: Taste Of Chennai
   Address: 39 W John St, Hicksville, NY 11801
   Rating: 4.4
   Phone: (516) 396-1449



SCRAPING and DISPLAYING menu items & prices for Village + each restaurant

In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def scrape_menu(url):
    """Scrape menu items and prices from the given restaurant URL."""
    # Setup WebDriver
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run headlessly (without a UI)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        # Visit the URL
        driver.get(url)

        # Optionally, wait for JavaScript to load the page if needed
        driver.implicitly_wait(5)

        # Extract Menu Item Names
        menu_items = driver.find_elements(By.CSS_SELECTOR, '.menu-item-details h4')

        # Extract Prices
        prices = driver.find_elements(By.CSS_SELECTOR, '.menu-item-price-amount')

        # Collect Data
        menu = []
        for item, price in zip(menu_items, prices):
            menu.append({
                "item": item.text,
                "price": price.text
            })
        
        return menu

    except Exception as e:
        print(f"Error occurred: {e}")
        return []

    finally:
        # Close the browser
        driver.quit()

# URLs for Village and other restaurants
restaurants = {
    "Village - The Soul of India": "https://www.yelp.com/menu/village-the-soul-of-india-hicksville",
    "Kunga Kitchen": "https://www.yelp.com/menu/kunga-kitchen-hicksville",  
    "Taste of Mumbai": "https://www.yelp.com/menu/taste-of-mumbai-hicksville-3",
    "Kathis & Kababs": "https://www.yelp.com/menu/kathis-and-kababs-hicksville", 
    "Kabul Grill": "https://www.yelp.com/menu/kabul-grill-hicksville",         
    "Taste Of Chennai": "https://www.yelp.com/menu/taste-of-chennai-hicksville" 
}

# Scrape menus for all restaurants
for restaurant, url in restaurants.items():
    print(f"\nScraping menu for {restaurant}...")
    menu = scrape_menu(url)
    if menu:
        print(f"Menu for {restaurant}:")
        for dish in menu:
            print(f"  - {dish['item']}: ${dish['price']}")
    else:
        print(f"No menu data available for {restaurant}.")



Scraping menu for Village - The Soul of India...
Menu for Village - The Soul of India:
  - Medu Vada: $$6.00
  - Medu Vada in Sambar Bowl: $$6.99
  - Medu Vada in Rasam Bowl: $$6.99
  - Combo: $$5.99
  - Combo in Sambar Bowl: $$6.99
  - Masala Vada: $$7.45
  - Dahi Vada: $$8.95
  - Batata Vada: $$6.00
  - Mysore Bonda: $$6.00
  - Chennai Pakkoda: $$6.00
  - Spring Rolls: $$7.45
  - Paneer Pakkoda: $$8.95
  - Samosa: $$6.00
  - Aloo Tikki: $$6.00
  - Chilly Pakora: $$6.00
  - Tomato Soup: $$7.45
  - Vegetables Manchow Soup: $$7.45
  - Rasam: $$7.45
  - Sambar: $$7.45
  - Mulligatawny Soup: $$7.45
  - Garden Soup: $$7.45
  - Katchumbar Salad: $$7.45
  - Garden Salad: $$7.45
  - Idly: $$7.45
  - Masala Idly: $$9.95
  - Mini Idly: $$7.45
  - Ghee Fried Idly: $$9.95
  - Podi Idly: $$8.95
  - Chilli Idly: $$9.95
  - Idly in Sambar Bow: $$8.95
  - Idly Rasam Bowl: $$8.95
  - Dami Idly: $$7.45
  - Bhel Puri: $$8.95
  - Samosa Chat: $$8.95
  - Aloo Tikki Chat: $$9.95
  - Papdee Chat: $$8.95
  