# Deliveroo

In [None]:
#| default_exp deliveroo_utils

In [None]:
#| export
from deliveroo_editions.selenium_utils import *
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from tqdm import tqdm
from bs4 import BeautifulSoup
from ratelimit import limits, RateLimitException, sleep_and_retry
import time

In [None]:
#| export
@sleep_and_retry
@limits(calls=1, period=20)
def get_restaurant_tags(url:str, # URL for Deliveroo restaurants page
                        driver=None
                       ):
    "Returns all list elements from Deliveroo restaurants webpage corresponding to a restaurant"
    if not driver:
        driver = initialise_driver(service,True)
    time.sleep(1)
    driver.get(url)
    wait = WebDriverWait(driver, 10)  # Maximum wait time in seconds
    ul_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'ul[class*="HomeFeedGrid"]')))
    soup = BeautifulSoup(ul_element.get_attribute('innerHTML'), 'html.parser')
    filtered_li_tags = [li for li in soup.find_all('li') if not li.find_parents('li')]
    return filtered_li_tags

tags = get_restaurant_tags("https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions")
assert len(tags) == 13

In [None]:
#| export
def get_timestamp(url:str # URL for Deliveroo
                 ):
                     "Returns YYYYMMDD timestamp from url of format: https://web.archive.org/web/YYYYMMDD/"
                     timestamp = url.split('/')[4]
                     if timestamp.isdigit():
                         return timestamp[0:8]
                     else:
                         print("Could not extract timestamp of format YYYYMMDD from url provided")
                         return

assert get_timestamp("https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions")

In [None]:
#| export
def add_timestamps_to_restaurants(restaurants, url):
    for restaurant in restaurants: 
        restaurant['timestamp_url'] = url
        restaurant['timestamp'] = get_timestamp(url)
    return restaurants


restaurants = [{'name': 'Oowee Vegan',
  'location': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/oowee-vegan-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'}]

timestamped_restaurants = [{'name': 'Oowee Vegan',
  'location': 'brighton-editions',
  'timestamp': '20201019',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/oowee-vegan-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP',
  'timestamp_url': 'https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions'}]

assert add_timestamps_to_restaurants(restaurants, timestamped_restaurants[0]['timestamp_url']) == timestamped_restaurants

In [None]:
#| export
def get_restaurants(url:str, # URL for Deliveroo restaurants page
                    # headless:bool=True,
                    driver= None
                   ): # run headless (True) or with browser (False).
                       """Gets the restaurant `name`, editions `location` and Deliveroo `restaurant_url`
                       for each restaurant on url page."""
                       if not driver:
                           driver = initialise_driver(service,True)
                       restaurants = []
                       tags = get_restaurant_tags(url, driver)
                       # timestamp = get_timestamp(url)
                       for tag in tags:
                           name, restaurant_url, location = "", "", ""
                           list_sections = tag.find_all('ul')
                           if list_sections:
                               for list_section in list_sections:
                                   list_items = list_section.find_all('li')
                                   if len(list_items) >= 3:
                                       name = list_items[0].text
                                       try:
                                           restaurant_url = tag.find_all('a')[0]['href']
                                           if restaurant_url.startswith('/menu'):
                                               restaurant_url = "https://deliveroo.co.uk" + restaurant_url
                                           location = restaurant_url.split("/")[4]
                                           edition = restaurant_url.split("/")[5]
                                       except Exception as e: 
                                           print(e)
                                           print(f"Couldn't get metadata for {name} in {url}")
                                           # restaurants.append({'name': name, 'location': location, 'timestamp': timestamp, 'restaurant_url': restaurant_url, 'timestamp_url': url})
                                       restaurants.append({'name': name, 'location': location, 'edition': edition, 'restaurant_url': restaurant_url})

                                   else:
                                       pass
                           else:
                               print(f"No restaurants found at {url}")
                       return restaurants

In [None]:
metadata = get_restaurants("https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions")
assert metadata == [{'name': 'Oowee Vegan',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/oowee-vegan-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'Shake Shack',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/shake-shack-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'Lost Boys Chicken',
  'location': 'brighton',
  'edition': 'hove',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/hove/lost-boys-chicken-editions?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'The Athenian',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/the-athenian-editions-bnc-new?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'The Great British Cheesecake Company ',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/the-great-british-cheesecake-company-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'The Athenian Plant Based',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/the-athenian-plant-based-editions-bnc-new?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'The Ice Cream Store',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/the-ice-cream-store-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'Pleesecakes - cheesecake',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/pleesecakes-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'TRIP CBD Store\t',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/trip-cbd-store-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'Halo Top',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/halo-top?day=today&geohash=gcpc5qr68ee1&time=ASAP'},
 {'name': 'VIP Very Italian Pizza',
  'location': 'brighton',
  'edition': 'brighton-editions',
  'restaurant_url': 'https://deliveroo.co.uk/menu/brighton/brighton-editions/vip-italy-limited-hove?day=today&geohash=gcpc5qr68ee1&time=1715'},
 {'name': 'A Burgers Veggie Kitchen  by Taster',
  'location': '',
  'edition': 'brighton-editions',
  'restaurant_url': ''},
 {'name': 'Saucybird',
  'location': '',
  'edition': 'brighton-editions',
  'restaurant_url': ''}]

list index out of range
Couldn't get metadata for A Burgers Veggie Kitchen  by Taster in https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions
list index out of range
Couldn't get metadata for Saucybird in https://web.archive.org/web/20201019/https://deliveroo.co.uk/restaurants/brighton/brighton-editions?tags=deliveroo+editions


In [None]:
#| export
def search_deliveroo(address:str, # UK address containing a UK postcode
                     driver= None  # Initialised Selenium webdriver
                    ):
                        "Searches Deliveroo for an address, returning webdriver element once search results page has loaded."
                        base_url = "https://deliveroo.co.uk/"
                        if not driver:
                            driver = initialise_driver(service,True)
                        driver.get(base_url)
                        wait = WebDriverWait(driver, 20)  # Maximum wait time in seconds
                        input_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input#location-search')))
                        input_element.send_keys(address)
                        input_element.send_keys(Keys.RETURN)
                        wait = WebDriverWait(driver, 20)
                        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'ul[class*="HomeFeedGrid"]')))
                        return driver

test_address = "144 Cambridge Heath Rd, Bethnal Green, London E1 5QJ"
driver = search_deliveroo(test_address)
assert driver.current_url == 'https://deliveroo.co.uk/restaurants/london/stepney-green?fulfillment_method=DELIVERY&geohash=gcpvng8jvn74'

In [None]:
#| export
def results_to_editions_url(url:str, # Deliveroo search results url
                           ):
                               "Apply `deliveroo+editions` filter to Deliveroo search results url"
                               return url.split('?')[0] + '?fulfillment_method=DELIVERY&tags=deliveroo+editions'

test_url = 'https://deliveroo.co.uk/restaurants/london/stepney-green?fulfillment_method=DELIVERY&geohash=gcpvng8jvn74'
assert results_to_editions_url(test_url) == 'https://deliveroo.co.uk/restaurants/london/stepney-green?fulfillment_method=DELIVERY&tags=deliveroo+editions'

In [None]:
#| export
def get_editions(url:str, # URL for Deliveroo search results page
                    # headless:bool=True,
                    driver= None
                   ): # run headless (True) or with browser (False).
                       """Returns a list of editions location
                       from all the editions restaurants on url page ie 'bristol-editions'."""
                       if not driver:
                           driver = initialise_driver(service,True)
                       editions_url = results_to_editions_url(url)
                       editions_list = []
                       tags = get_restaurant_tags(editions_url, driver)
                       for tag in tags:
                           edition_tags = tag.find_all('a')[0]['href'].split('/')
                           edition = edition_tags[2].lower() + '/' + edition_tags[3].lower()
                           if edition not in editions_list:
                               editions_list.append(edition)
                       return editions_list

test_url = "https://deliveroo.co.uk/restaurants/london/globe-town?fulfillment_method=DELIVERY&geohash=gcpvnuuyrtud"
get_editions(test_url)

['london/whitechapel-editions',
 'london/canary-wharf',
 'london/caledonian-road-and-barnsbury',
 'london/canning-town-editions',
 'london/fish-island-area',
 'london/blackwall']

In [None]:
https://deliveroo.co.uk/menu/brighton/brighton-editions/oowee-vegan-editions-bnc?day=today&geohash=gcpc5qr68ee1&time=ASAP'

In [None]:
'/menu/London/whitechapel-editions/dishoom-shoreditch?day=today&geohash=gcpvpqp8znt2&time=1215'

In [None]:
#| export
def get_restaurants_from_editions_location(editions_list:list # list of editions locations ie ['london/whitechapel-editions','london/canary-wharf']
                                          ):
                                              "gets restaurant metadata for all restaurants based at listed editions locations"
                                              restaurants = []
                                              for edition in editions_list:
                                                  edition_url = "https://deliveroo.co.uk/restaurants/" + edition + "?fulfillment_method=DELIVERY&tags=deliveroo+editions"
                                                  restaurants += get_restaurants(edition_url)
                                              return restaurants    

editions_list = ['london/fish-island-area','london/blackwall']
restaurants = get_restaurants_from_editions_location(editions_list)
assert restaurants

In [None]:
#| export
def get_editions_locations_near_addresses(addresses:list,  # list of address strings to search Deliveroo's website for
                                          driver= None 
                                ):
                                    "Returns a list of all editions locations found when searching all the restaurants at or near the list of addresses"
                                    driver = initialise_driver(service,True)
                                    editions_locations = []
                                    for i, address in enumerate(addresses):
                                        driver = search_deliveroo(address, driver)
                                        editions = get_editions(driver.current_url)   
                                        editions_locations.extend([item for item in editions if item not in editions_locations])
                                    return editions_locations

addresses = ['144 Cambridge Heath Rd, Bethnal Green, London E1 5QJ',
            '20 Fonthill Rd, Finsbury Park, London N4 3HU']
test_editions = ['london/whitechapel-editions',
 'london/canary-wharf',
 'london/caledonian-road-and-barnsbury',
 'london/canning-town-editions',
 'london/fish-island-area',
 'london/blackwall',
 'london/hornsey-station',
 'london/kentish-town',
 'london/wood-green']
assert any(edition_location in test_editions for edition_location in get_editions_locations_near_addresses(addresses))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()