# 1. Import Libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException, StaleElementReferenceException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import pandas as pd
import random

# 2. Define Helper Functions

In [2]:
def scrape_restaurant(driver, url):
    global rest_count
    driver.get(url)

    try:
        rest_name = driver.find_element(By.XPATH, '//h1').text
    except NoSuchElementException:
        rest_name = "Name not found"
    
    try:
        number_of_reviews = driver.find_element(By.XPATH, '//*[@id="reviewInfo"]/span[2]').text
    except NoSuchElementException:
        number_of_reviews = "Reviews not found"
    
    try:
        rating = driver.find_element(By.XPATH, './/span[contains(@class, "m1KNa9XKCHY- C7Tp-bANpE4-")]').text
    except NoSuchElementException:
        rating = "Rating not found"
    
    try:
        food_type = driver.find_element(By.XPATH, '//*[@id="cuisineInfo"]/span[2]').text
    except NoSuchElementException:
        food_type = "Food type not found"
    
    try:
        coupon = driver.find_element(By.XPATH, '//div[contains(@id, "priceBandInfo")]//span[last()]').text
    except NoSuchElementException:
        coupon = "Coupon not found"

    try:
        food = driver.find_element(By.XPATH, '//span[text()="Food"]/preceding-sibling::span').text
    except NoSuchElementException:
        food = "Food not found"

    try:
        service = driver.find_element(By.XPATH, '//span[text()="Service"]/preceding-sibling::span').text
    except NoSuchElementException:
        service = "Service not found"

    try:
        ambience = driver.find_element(By.XPATH, '//span[text()="Ambience"]/preceding-sibling::span').text
    except NoSuchElementException:
        ambience = "Ambience not found"

    try:
        value = driver.find_element(By.XPATH, '//span[text()="Value"]/preceding-sibling::span').text
    except NoSuchElementException:
        value = "Value not found"
        
    try:
        image_elements = driver.find_elements(By.XPATH, '//img[contains(@src, "otstatic.com")]')
        if len(image_elements) > 1:
            second_image_url = image_elements[1].get_attribute('src')
        else:
            second_image_url = "No valid image found"
    except NoSuchElementException:
        second_image_url = "No image found"
    
    all_comments = []
    max_pages = 3
    current_page = 1
    
    while current_page <= max_pages:
        try:
            comments_elements = driver.find_elements(By.XPATH, './/span[contains(@class, "l9bbXUdC9v0- ZatlKKd1hyc- ukvN6yaH1Ds-")]')
            comments = " ".join([element.text for element in comments_elements[1:]]) if comments_elements else "Comments not found"
            all_comments.append(comments)
            print(f"Page {current_page} Comments:", comments)
            
            try:
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//a[@aria-label="Go to the next page"]'))
                )
                driver.execute_script("arguments[0].scrollIntoView();", next_button)
                time.sleep(1)
                driver.execute_script("arguments[0].click();", next_button)
                time.sleep(2)
                current_page += 1
            except (TimeoutException, ElementNotInteractableException, StaleElementReferenceException):
                print("Failed to locate or click the next button. Ending pagination.")
                break
        except NoSuchElementException:
            break
    
    all_comments = " ".join(all_comments)

    try:
        about_rest = driver.find_element(By.XPATH, './/span[contains(@class, "l9bbXUdC9v0- ZatlKKd1hyc- ukvN6yaH1Ds- l-AMWW5ZrIg-")]').text
    except NoSuchElementException:
        about_rest = "About Restaurant not found"
    print("About Restaurant:", about_rest)

    data["url"].append(url)
    data["rest_name"].append(rest_name)
    data["number_of_reviews"].append(number_of_reviews)
    data["rating"].append(rating)
    data["food_type"].append(food_type)
    data["coupon"].append(coupon)
    data["food"].append(food)
    data["service"].append(service)
    data["ambience"].append(ambience)
    data["value"].append(value)
    data["about_rest"].append(about_rest)
    data["comments"].append(all_comments)
    data["image_url"].append(second_image_url)

    rest_count += 1
    print(f"Total Restaurants scraped: {rest_count}\n")

In [3]:
def get_restaurant_links(driver):
    all_urls = []
    time.sleep(random.randint(2, 3))

    scroll_increment = 500

    for _ in range(22):
        driver.execute_script(f"window.scrollBy(0, {scroll_increment});")

        try:
            WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'qCITanV81-Y-')]"))
            )
        except TimeoutException:
            print("Timeout waiting for restaurant links to become visible")
            break

        elements = driver.find_elements(By.XPATH, "//a[contains(@class, 'qCITanV81-Y-')]")

        for elem in elements:
            url = elem.get_attribute('href')
            if url and url not in all_urls:
                all_urls.append(url)
        
    print(f"Total URLs collected so far: {len(all_urls)}")
    return all_urls


def click_next_page(driver):
    try:
        next_button = driver.find_element(By.XPATH, "//a[@aria-label='Go to the next page']")
        driver.execute_script("arguments[0].scrollIntoView();", next_button)
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@aria-label='Go to the next page']")))
        next_button.click()
        return True
    except (NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException, TimeoutException) as e:
        print(f"Could not click next page: {e}")
        return False

# 3. Initialize Data Structure and Set Up Selenium

In [4]:
data = {
    "url": [],
    "rest_name": [],
    "number_of_reviews": [],
    "rating": [],
    "food_type": [],
    "coupon": [],
    "food": [],
    "service": [],
    "ambience": [],
    "value": [],
    "about_rest": [],
    "comments": [],
    "image_url": []
}

search_url = 'https://www.opentable.com/s?dateTime=2024-11-10T19%3A00%3A00&covers=4&latitude=53.4777352&longitude=-2.2443015&shouldUseLatLongSearch=true&originCorrelationId=89f047c1-373a-400d-8bd6-1dfe32993784'
driver = webdriver.Chrome()
driver.get(search_url)

all_urls = []
rest_count = 0


# 4. Scrape Restaurant URLs

In [5]:
while True:
    urls = get_restaurant_links(driver)
    all_urls.extend(urls)

    if not click_next_page(driver):
        print("No more pages to navigate.")
        break

Total URLs collected so far: 53
Total URLs collected so far: 52
Total URLs collected so far: 50
Total URLs collected so far: 50
Total URLs collected so far: 10
Could not click next page: Message: 

No more pages to navigate.


# 5. Scrape Data for Each Restaurant

In [None]:
for url in all_urls:
    scrape_restaurant(driver, url)

Page 1 Comments: Read more Lovely tasting food,  served soon after ordering. The staff were friendly and eager to please.  Food was amazing as always, service was extremely slow ..  Delicious food, lovely friendly, helpful staff. Beautiful clean restaurant. Reasonable prices. Highly recommended.  Always love Zouk! We took our children and they were catered to very well, never had a better curry! X  Amazing  never been before . Came for my daughter  Birthday everything was brilliant  Was actually very good. Starter portions pretty basic but tasty. Mains are a lot larger. Will def come again when in Manchester.  Was a good experience , serving waiter was good , others were a little ignorant.
A little below standards of zouk.  The food was absolutly amazing, i ordered the sunday dinner platter for 2 it was delicious! The only down fall was that i ordered 2 soft drinks and my food arrived before the drinks.  Fantastic food. Will definitely be returning. Great for afyer the theatre  Very ni

# 6. Save Data and Close the Driver

In [None]:
pd.DataFrame(data).to_csv('Manchester.csv', index=False)
driver.quit()