In [1]:
import time
import pandas as pd
import re
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# URL to scrape
url = "https://food.grab.com/sg/en/"

# Setup Chrome WebDriver with options for scraping
chrome_options = webdriver.ChromeOptions()
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134"
chrome_options.add_argument(f'user-agent={user_agent}')
chrome_options.add_argument("--incognito")  # Use incognito mode to avoid saving history
chrome_options.add_argument("--disable-blink-features=AutomationControlled")  # Avoid detection
chrome_prefs = {"profile.default_content_setting_values.geolocation": 2}  # Disable geolocation
chrome_options.experimental_options["prefs"] = chrome_prefs


# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)
driver.get(url)

In [3]:
# Accept cookies if the button is present
try:
    WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept")]'))).click()
except TimeoutException:
    pass  # If the button doesn't appear, continue without action

In [4]:
# Wait for the main page layout to load
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".ant-layout")))


# Input the location for delivery
location_input = driver.find_element(By.ID, 'location-input')
location_input.click()
time.sleep(2)  # Wait for the input to be clickable and ready for input
location_input.clear()
location_input.send_keys("PT Singapore - Choa Chu Kang North 6, Singapore, 689577")

# Submit the location
submit_button = driver.find_element(By.CSS_SELECTOR, '.ant-btn.submitBtn___2roqB.ant-btn-primary')
submit_button.click()

In [5]:
# Ensure restaurants list is loaded
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".ant-layout")))

# Scroll to the bottom to load all restaurants
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Wait for the page to load
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height




In [6]:
# Get the page source and close the driver
page_source = driver.page_source
driver.quit()
print(page_source)

<html lang="en" class=""><head><meta charset="utf-8"><meta name="google" content="notranslate"><meta http-equiv="Content-Security-Policy" content="script-src 'unsafe-eval' 'unsafe-inline' 'self' www.googletagmanager.com www.google.com scribe-web-sdk.grab.com static.hotjar.com www.google-analytics.com connect.facebook.net websdk.appsflyer.com www.gstatic.com script.hotjar.com *.sentry.io cdn.ravenjs.com food.grab.com www.googleadservices.com googleads.g.doubleclick.net cdn-apac.onetrust.com blob: data:;worker-src https://food.grab.com blob: data:;"><script async="" src="https://websdk.appsflyer.com?st=banners&amp;"></script><script type="text/javascript" async="" src="https://www.googletagmanager.com/gtag/destination?id=DC-6254042&amp;l=dataLayer&amp;cx=c"></script><script type="text/plain" src="https://connect.facebook.net/signals/config/517824045640036?v=2.9.151&amp;r=stable&amp;domain=food.grab.com&amp;hme=8ce74e881727851b4427183947937854816d72704925561b9de6420cd43214ee&amp;ex_m=66%2

In [7]:
with open('page_source.html', 'w', encoding='utf-8') as f:
    f.write(page_source)

In [8]:
file_path = 'page_source.html'

# Read the HTML content from the file
with open(file_path, 'r', encoding='utf-8') as file:
    html_content = file.read()


In [9]:
# Process the page source with BeautifulSoup
soup = BeautifulSoup(page_source, 'html.parser')
restaurants_divs = soup.find_all("div", class_="ant-col-24 RestaurantListCol___1FZ8V ant-col-md-12 ant-col-lg-6")

all_restaurant_data = []  # Container for all extracted data


# List to hold all restaurant data
all_restaurant_data = []

# Extract data from each restaurant block
for restaurant_div in restaurants_divs:
    restaurant_data = {
        "Restaurant Name": restaurant_div.find("p", class_="name___2epcT").text if restaurant_div.find("p", class_="name___2epcT") else "N/A",
        "Restaurant Cuisine": restaurant_div.find("div", class_="cuisine___T2tCh").text if restaurant_div.find("div", class_="cuisine___T2tCh") else "N/A",
        # Additional fields extracted similarly...
    }

    # Extract the image link, handle cases where it might be missing
    image_link_element = restaurant_div.find("img", class_="realImage___2TyNE")
    restaurant_data["Image Link"] = image_link_element['src'] if image_link_element and 'src' in image_link_element.attrs else "N/A"
    
    # Append the restaurant data to the list
    all_restaurant_data.append(restaurant_data)
    print(f"Total restaurants scraped: {len(all_restaurant_data)}")



Total restaurants scraped: 1
Total restaurants scraped: 2
Total restaurants scraped: 3
Total restaurants scraped: 4
Total restaurants scraped: 5
Total restaurants scraped: 6
Total restaurants scraped: 7
Total restaurants scraped: 8
Total restaurants scraped: 9
Total restaurants scraped: 10
Total restaurants scraped: 11
Total restaurants scraped: 12
Total restaurants scraped: 13
Total restaurants scraped: 14
Total restaurants scraped: 15
Total restaurants scraped: 16
Total restaurants scraped: 17
Total restaurants scraped: 18
Total restaurants scraped: 19
Total restaurants scraped: 20
Total restaurants scraped: 21
Total restaurants scraped: 22
Total restaurants scraped: 23
Total restaurants scraped: 24
Total restaurants scraped: 25
Total restaurants scraped: 26
Total restaurants scraped: 27
Total restaurants scraped: 28
Total restaurants scraped: 29
Total restaurants scraped: 30
Total restaurants scraped: 31
Total restaurants scraped: 32
Total restaurants scraped: 33
Total restaurants s

In [290]:
# Example: Printing out the extracted data for all restaurants
for restaurant in all_restaurant_data:
    for key, value in restaurant.items():
        print(f"{key}: {value}")
    print("-----")  # Separating entries for readability

Restaurant Name: The Coffee Bean & Tea Leaf - Raffles City Shopping Centre
Restaurant Cuisine: Breakfast & Brunch, Coffee & Tea, Halal, Dessert
Image Link: https://d1sag4ddilekf6.cloudfront.net/compressed_webp/merchants/4-C2LXSBCJSET2LX/hero/eba0e478f3954100b2c1dc01bed520df_1648777871908336709.webp
-----
Restaurant Name: Holly Wine - Beach Road 🏍
Restaurant Cuisine: Alcohol, Islandwide Delivery
Image Link: https://d1sag4ddilekf6.cloudfront.net/compressed_webp/merchants/4-C3UFDA3VTFMTEJ/hero/3f908b09c64e42a2aaddf32165d136bf_1662546260338125845.webp
-----
Restaurant Name: Old Street Victoria Cafe - Victoria Street
Restaurant Cuisine: Local & Malaysian, Breakfast & Brunch, Coffee & Tea
Image Link: https://d1sag4ddilekf6.cloudfront.net/compressed_webp/merchants/4-C4AAAPAHGKE1PE/hero/f399746f2ee94cc18f6bb230abc248a5_1698846925448720655.webp
-----
Restaurant Name: Mr. Coconut - Raffles City Shopping Centre
Restaurant Cuisine: Drinks & Beverages, Bubble Tea, Dessert
Image Link: https://food-c

In [291]:
import json
import gzip

# Define the path for the NDJSON file and the gzipped file
ndjson_file_path = 'restaurants_readable.ndjson'
gzip_file_path = 'restaurants_readable.ndjson.gz'

# Writing the data to an NDJSON file with pretty-printing
with open(ndjson_file_path, 'w', encoding='utf-8') as f:
    for restaurant in all_restaurant_data:
        # Convert each restaurant data dictionary to a pretty-printed JSON string
        json_str = json.dumps(restaurant, ensure_ascii=False, indent=2)
        # Write the JSON string to the file, followed by a newline character
        f.write(json_str + '\n')

# Compressing the NDJSON file using gzip
with open(ndjson_file_path, 'rb') as f_in:
    with gzip.open(gzip_file_path, 'wb') as f_out:
        # Copy the contents of the original file into the compressed file
        f_out.writelines(f_in)

print(f"Data has been successfully written to {gzip_file_path} in a more readable format.")


Data has been successfully written to restaurants_readable.ndjson.gz in a more readable format.
