In [1]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from tqdm.notebook import tqdm
import pandas as pd
import time
import uuid

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# Configure Chrome options to use the proxy server
chrome_options = Options()

# Path to your ChromeDriver
driver = webdriver.Chrome(options=chrome_options)

In [8]:
# Go to the gofood web
driver.get('https://gofood.co.id/en/jakarta/menteng-restaurants/burger_sandwich_steak')

In [10]:
# Do scroll
def page_scrolling(scroll_pause_time:int=3, add_index:float=0.5):
    scroll_pause_time = scroll_pause_time
    screen_height = driver.execute_script("return window.screen.height;")   # get the screen height of the web
    i = 1

    # See maximum limit scrolling to 5
    while True:
        # scroll one screen height each time
        executed = False
        while executed == False:
            try:
                driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
                i += add_index
                time.sleep(scroll_pause_time)
                # update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
                scroll_height = driver.execute_script("return document.body.scrollHeight;")  
                executed = True
            except:
                time.sleep(0.5)
        # Break the loop when the height we need to scroll to is larger than the total scroll height
        if (screen_height) * i > scroll_height:
            break

In [9]:
page_scrolling()

AttributeError: 'WebDriver' object has no attribute 'navigate'

In [10]:
# Get all restaurant links
restaurant_boxes = driver.find_elements(by = By.XPATH, value = '//*[@id="__next"]/div/div[3]/div[1]/a')

# Create metadata
restaurant_metadata = {
    'id':[],
    'name':[],
    'category':[],
    'price_level':[],
    'link':[]
}

# Loop
for box in restaurant_boxes:
    # Get new ID
    restaurant_metadata['id'].append(str(uuid.uuid4()))

    # Get restaurant name
    name = box.find_element(by = By.XPATH, value = 'div/div[2]/p[1]').text
    restaurant_metadata['name'].append(name)

    # Get restaurant category
    categories = box.find_element(by = By.XPATH, value = 'div/div[2]/p[2]').text
    restaurant_metadata['category'].append(categories)

    # Get price level
    price_level = len(box.find_elements(By.CLASS_NAME, 'text-gf-content-primary')) - 1
    restaurant_metadata['price_level'].append(price_level)

    # Get link
    link = box.get_attribute('href')
    restaurant_metadata['link'].append(link)

In [11]:
# Create dataframe
restaurant_df = pd.DataFrame(restaurant_metadata)

In [12]:
# Create mapping between ID to Link
id_to_link = dict(zip(restaurant_df['id'], restaurant_df['link']))

In [22]:
restaurant_df

Unnamed: 0,id,name,category,price_level,link
0,285093fb-6f91-446b-a252-cf98b876170f,"One Nine Kitchen, Menteng","Snacks, Western, Fast food",3,https://gofood.co.id/en/jakarta/restaurant/one...
1,b6953795-73f7-4a53-9891-f2fccfe11add,"Belly Bandit Burger, Menteng","Western, Fast food, Snacks",4,https://gofood.co.id/en/jakarta/restaurant/bel...
2,bf484ca1-eb59-4cbd-bd7e-9b9031408497,"Cikang Resto Graha Anam, Gondangdia Menteng","Rice, Chicken & duck, Western",3,https://gofood.co.id/en/jakarta/restaurant/cik...
3,1c183c79-fe30-4d00-b864-0f5b5fe395ac,"Repha's Pastry & Tea, Plaza Menteng","Western, Beverages, Sweets",3,https://gofood.co.id/en/jakarta/restaurant/rep...
4,e7d7b9c5-103e-4f91-9588-fd143a5c71d7,"Berliner Imbiss, Menteng",Western,2,https://gofood.co.id/en/jakarta/restaurant/ber...
5,998457ab-612a-466e-847e-d0a12075110c,"Subshack By Quiznos, Menteng","Western, Fast food, Bakery",3,https://gofood.co.id/en/jakarta/restaurant/sub...
6,67cd52e9-51cc-487d-9d74-9f8f2a51ce4d,"Delico, Menteng","Coffee, Bakery, Western",3,https://gofood.co.id/en/jakarta/restaurant/del...
7,005fed2a-e69f-41fe-ac32-434166a9091d,"Pizza Hut Delivery - PHD, Jl. Cikini Raya","Pizza & pasta, Fast food, Snacks, Western, Chi...",4,https://gofood.co.id/en/jakarta/restaurant/piz...
8,ac8ff7ec-725e-4438-94a7-36893b9c3c47,"The Acre, Menteng","Sweets, Coffee, Western",4,https://gofood.co.id/en/jakarta/restaurant/the...
9,8b36a459-a3ae-4354-b7ed-a81498a210bb,"Odelice Cafe, HOS Cokroamino","Western, Coffee, Beverages",3,https://gofood.co.id/en/jakarta/restaurant/ode...


In [19]:
def get_menu_metadata(resto_id):
    # Get link
    link = id_to_link[resto_id]

    # Go to the link
    driver.get(link)

    # Scroll page
    page_scrolling(scroll_pause_time=1, add_index=0.7)

    # Get all section
    sections = driver.find_elements(by = By.XPATH, value = "//div[contains(@id, 'section-')]")

    # Remove section-0 since this is recommendation section
    section_0 = driver.find_elements(by = By.XPATH, value = "//div[@id='section--0']")
    if section_0 != []:
        sections.remove(section_0[0])

    # Create menu metadata
    menu_metadata = {
        'id':[],
        'resto_id':[],
        'section':[],
        'menu_name':[],
        'menu_detail':[],
        'price':[]
    }

    # Loop
    for section in sections:
        # Get section
        section_name = section.find_element(by = By.XPATH, value = "h2").text

        # Get menu list
        menus = section.find_elements(by = By.XPATH, value = "div/div")

        # Loop every menu
        for menu in menus:
            # Append restaurant ID
            menu_metadata['id'].append(str(uuid.uuid4()))
            menu_metadata['resto_id'].append(resto_id)

            # Append section name to the metadata
            menu_metadata['section'].append(section_name)

            # Get menuname
            menuname = menu.find_element(by = By.XPATH, value = 'div/div[1]/div[1]/h3').text
            menu_metadata['menu_name'].append(menuname)

            # Get menudetail
            try:
                menudetail = menu.find_element(by = By.XPATH, value = 'div/div[1]/div[1]/p').text
                menu_metadata['menu_detail'].append(menudetail)
            except:
                menu_metadata['menu_detail'].append('')

            # Get price
            try:
                price = menu.find_element(by = By.XPATH, value = 'div/div[1]/div[1]/div').text
                price = int(price.replace('.',''))
            except:
                # There must be a change in the price layout,
                # This happen when the menu is on Promo, 
                # Collect the original menu price
                price = menu.find_element(by = By.XPATH, value = 'div/div[1]/div[1]/div/div[1]/span[2]').text
                price = int(price.replace('.',''))

            menu_metadata['price'].append(price)
    
    # Return menu_metadata
    return pd.DataFrame(menu_metadata)

In [20]:
# Create menu metadata
menu_metadata = pd.DataFrame()

# Loop
for id in tqdm(restaurant_metadata['id']):
    # Get all menu name
    result = get_menu_metadata(id)

    # Concat with dataframe
    menu_metadata = pd.concat([menu_metadata, result], axis = 0, ignore_index = True)

  0%|          | 0/12 [00:00<?, ?it/s]

In [21]:
menu_metadata

Unnamed: 0,id,resto_id,section,menu_name,menu_detail,price
0,54d555c5-f11b-42dd-91ad-fe1b105d6f71,285093fb-6f91-446b-a252-cf98b876170f,Savory Snack,Bitterballen (Frozen),One Nine signature beef bitterballen. Contains...,85000
1,26c0edf5-d011-47df-9083-3273743aeae6,285093fb-6f91-446b-a252-cf98b876170f,Creamy Dessert,Very Berry Creamy,Creamy jiggly wigly diaries with homemade stra...,25000
2,2d0ba145-6b5a-4b01-95ec-e98ea609abb5,285093fb-6f91-446b-a252-cf98b876170f,Creamy Dessert,Creamy Caramel,Creamy jiggly wiggly diaries mixed well with h...,25000
3,e730ef66-2010-4274-9bb0-dddd3f192028,b6953795-73f7-4a53-9891-f2fccfe11add,Salads,Caesar Salad,"Fresh baby romaine, tomato, anchovies and topp...",88400
4,f7624692-667e-49cc-af97-2105c5660bec,b6953795-73f7-4a53-9891-f2fccfe11add,Salads,Additional : Poached Egg,,23400
...,...,...,...,...,...,...
838,237d629d-de47-4077-bd3b-a15e03a93adc,2544e03a-d226-456f-8f66-d35f0769788a,Drinks,Extra Coca-Cola,,13000
839,5fe3b9ff-8e61-44fb-ac8c-dc03d7f534a8,2544e03a-d226-456f-8f66-d35f0769788a,Drinks,Extra Zero Coca-Cola,,13000
840,18fae51e-56f9-48f1-b80e-9d9cdb8e4820,2544e03a-d226-456f-8f66-d35f0769788a,Drinks,Extra Fanta,,13000
841,852f5a20-5cf5-40f0-8c20-1781c78216e9,2544e03a-d226-456f-8f66-d35f0769788a,Drinks,Extra Sprite,,13000
