# Problem Statement: Develop the scraper to scrape Grab Food Delivery

Writing a web Scraping code:

In [40]:
# Import necessary modules

# 1. requests: This library is used to send HTTP requests in Python. 
#In this code, it's used to make GET requests to fetch HTML content from the GrabFood website. 
#The `get` function from the `requests` module is used to perform these HTTP GET requests.

# 2. BeautifulSoup: BeautifulSoup is a Python library used for web scraping. 
# It facilitates parsing HTML and XML documents, extracting useful data, and navigating through the document tree. 
# In this code, BeautifulSoup is used to parse the HTML content retrieved from the GrabFood website, making it easier to 
# extract specific information such as restaurant names, cuisines, ratings, and delivery times.

# 3. threading: Threading is a built-in Python module used for working with threads. 
# Threads allow concurrent execution of tasks, which can improve performance in certain scenarios, such as web scraping. 
# In this code, threading is used to scrape multiple restaurants simultaneously, making the scraping process faster. 
# The `Thread` class from the `threading` module is used to create new threads, and the `start` and `join` methods are used to 
# start and synchronize the execution of threads, respectively.

# These libraries provide essential functionality for web scraping and concurrent execution of tasks, enabling efficient 
# retrieval of restaurant information from the GrabFood website.

import requests
from bs4 import BeautifulSoup
import threading
import csv 
import pandas as pd

# Define the user-agent header
HEADERS = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
# Define the base URL of the website
BASE_URL = "https://food.grab.com"

# Function to scrape individual restaurant information
def scrape_restaurant_info(url):
    # Print a debug message indicating which restaurant info is being scraped
    print(f"Scraping restaurant info for: {url}")
    # Send a GET request to the restaurant URL
    r = requests.get(BASE_URL + url, headers=HEADERS)
    # Parse the HTML content of the response
    soup = BeautifulSoup(r.content, 'html.parser')
    # Extract relevant information about the restaurant
    out = True
    try:
        aa = soup.find("h1", {"class": ["name___1Ls94"]}).text
        bb = soup.find("h3", {"class": ["cuisine___3sorn", "infoRow___3TzCZ"]}).text
        cc = soup.find("div", {"class": ["rating___1ZywF"]}).find('div').text
        dd = soup.find("div", {"class": ["distance___3UWcK"]}).find('div').text
    except:
        out = False
        pass
    ee= soup.findAll("div",{"class":["category___3C8lX"],"id":["Promotions_SGCAT20231030020149015078"]})
    zz= soup.findAll("div",{"class":["promoName___2qJQm"]})
    promotions = []
    for ee_ in ee:
        ss = ee_.findAll("div",{"class":"menuItemWrapper___1xIAB"})
        for ss_ in ss:
            a,b,c = None , None , None
            kk =ss_.find("div",{"class":"ant-row menuItemInfo___PyfMY"})
            try:
                a = kk.find("p",{"class":["itemNameTitle___1sFBq"]}).text

                b = kk.find("p",{"class":["itemDescription___2cIzt"]}).text

                c = kk.find("h6",{"class":["discountedPrice___3MBVA"]}).text
            except:
                pass
            print(a,b,c)
            promotions.append([a,b,c])
            def get_lat_long(address):
                base_url = "https://food.grab.com"
                params = {
                    'q': address,
                    'format': 'json',
                }
                response = requests.get(base_url, params=params)
                if response.status_code == 200:
                    data = response.json()
                    if data:
                        latitude = data[0]['lat']
                        longitude = data[0]['lon']
                        return latitude, longitude
                    else:
                        return None, None
                else:
                    print("Failed to fetch coordinates:", response.status_code)
                    return None, None
    
            # Get latitude and longitude for the restaurant's address
            latitude, longitude = get_lat_long(dd)
    
    try:
        # Print the extracted information
        print('=' * 100)
        print(f"Name: {aa}")
        print(f"Cuisine: {bb}")
        print(f"Rating: {cc}")
        print(f"Delivery Time: {dd}")
        z_res = None
        if len(zz)>=2:
            z_res = zz[1].text
            print(f"Delivery Fee: ",zz[1].text)
    except:
        out = False
        pass
    if out is True:
        with open("out.csv", 'a') as csvfile:
            # creating a csv writer object
            csvwriter = csv.writer(csvfile)
            # writing the fields
            csvwriter.writerow([aa,bb,cc,dd ,z_res, promotions])

# Function to scrape restaurants from a list of URLs
threads = []
def scrape_restaurants(urls):
    print(urls)

    if True:
        # Iterate over each restaurant element
        for url in urls:
            # Find the anchor tag containing the restaurant URL
            #c = restaurant.find('a', href=True)
            # If the anchor tag is found
            if True:
                # Create a new thread to scrape restaurant info
                thread = threading.Thread(target=scrape_restaurant_info, args=(url,))
                # Add the thread to the list
                threads.append(thread)
                # Start the thread
                thread.start()
                # Print a debug message indicating the thread is started
                print(f"Thread started for: {url}")

# Main function to orchestrate the scraping process
def main():
    # URL of the main page containing restaurant links
    url = "/sg/en/restaurants"
    # Send a GET request to the main page
    r = requests.get(BASE_URL + url, headers=HEADERS)
    # Parse the HTML content of the response
    soup = BeautifulSoup(r.content, 'html.parser')
    # Find all elements containing restaurant information
    b = soup.find_all(class_=["ant-col-24 RestaurantListCol___1FZ8V", "ant-col-md-12", "ant-col-lg-6"])
    # Scrape restaurant information from the list of URLs
    scrape_restaurants([restaurant.find('a', href=True)["href"] for restaurant in b if restaurant.find('a', href=True)])
    
    
# Entry point of the script
if __name__ == "__main__":
    main()


['/sg/en/restaurant/dapur-penyet-north-bridge-road-delivery/4-C6EFRUMTJU2JBA?', '/sg/en/restaurant/mcdonald-s-boat-quay-delivery/SGDD04944?', '/sg/en/restaurant/wok-hey-funan-delivery/4-C2VKJ3U2C2JZTN?', '/sg/en/restaurant/koi-th%C3%A9-funan-delivery/4-C4NJGEDEE2X1TN?', '/sg/en/restaurant/tandoori-zaika-70-boat-quay-delivery/4-CZD3LRMAWEKVLJ?', '/sg/en/restaurant/stuff-d-funan-delivery/4-C2VKLAC2MBT2NJ?', '/sg/en/restaurant/liho-tea-funan-delivery/4-CZCBCGNXAY5ACA?', '/sg/en/restaurant/sanook-kitchen-funan-mall-delivery/4-C2E3GCCFEEWXAN?', '/sg/en/restaurant/subway-funan-delivery/4-CZDKG25AVYVAT6?', '/sg/en/restaurant/paradise-dynasty-funan-delivery/4-CYUTT6CDRUXXL6?', '/sg/en/restaurant/ramen-matsuri-7-north-canal-road-delivery/SGDD00979?', '/sg/en/restaurant/briyani-hut-boat-quay-delivery/4-C32KFE3EFB31T2?', '/sg/en/restaurant/guzman-y-gomez-funan-delivery/4-CZEUR7LAUCDEC2?', '/sg/en/restaurant/sukiya-funan-delivery/4-C3XTAN6WCCLJKA?', '/sg/en/restaurant/authentic-hock-lam-beef-noodl

In [38]:
import csv
import json

csvfile = open('out.csv', 'r')
jsonfile = open('out.ndjson', 'w')

fieldnames = ("Name","Cuisine","Rating","Delivery Time","Delivery Fee")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
    json.dump(row, jsonfile)
    jsonfile.write('\n')