# Sentiment Analysis of Italian Restaurant Reviews

The project aims to analyze customer sentiment towards Italian restaurants in France and Italy. The analysis is based on textual reviews collected from two different data sources: OpenTable and Yelp.

By employing sentiment analysis, we aim to uncover trends and compare how Italian cuisine and the general quality of the related businesses are perceived across the two regions.

## Yelp Fusion API

In [None]:
import requests

MAX_RESTAURANTS = 30
API_KEY = "3lX3EqE4bLsHCwaN8tyZ3kNNg_tykrIiw8cgEDcbNOeGYo9m22YYW5as-1dPp-f0Gy_X8_12CDEiqVgbM0SdKgKE2x94_w4-_PLu8Kfufdj-kBvbYWCGNmUUjyZHZ3Yx"
BASE_URL = "https://api.yelp.com/v3/businesses/"
API_URL = f"{BASE_URL}search?sort_by=rating&limit={MAX_RESTAURANTS}"

# Define the search parameters
headers = {"Authorization": f"Bearer {API_KEY}"}
params = {
    "term": "",
    "location": "Rome, Italy",
    "categories": "restaurants",
}

# Make the request
response = requests.get(API_URL, headers=headers, params=params)

# Parse the response
if response.status_code == 200:
    data = response.json()
    for business in data.get("businesses"):
        print(f"Name: {business['name']}")
        print(f"Rating: {business['rating']}")
        # print(f"Address: {', '.join(business['location']['display_address'])}")
        # print(f"Phone: {business.get('phone', 'N/A')}")
        url = BASE_URL + business['id']
        response_business = requests.get(url, headers=headers)
        if response_business.status_code == 200:
            data_business = response_business.json()
            review_count = data_business.get('review_count')
            print(f"Review count: {review_count}")
        print("-" * 40)
else:
    print(f"Error: {response.status_code} - {response.text}")


Error: 429 - {"error": {"code": "ACCESS_LIMIT_REACHED", "description": "You've reached the access limit for this client. See instructions for requesting a higher access limit at https://docs.developer.yelp.com/docs/fusion-rate-limiting"}}


# OpenTable
The following queries the OpenTable website with the following parameters:
* search term: Paris
* cuisine: Italian
* sort by: rating

This query returns a list of the best 30 italian restaurants in Paris (at the moment of the search).
This list is the used to open each restaurant's page so to scrape their reviews. All the sown reviews are captured.

For demonstration purposes the obtained reviews are printed in the output.

In [1]:
%pip install selenium --quiet

Note: you may need to restart the kernel to use updated packages.


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

# needed so to have the lists (restaurants and reviews) loaded from the website
def scroll_down_page(driver, speed=8):
    current_scroll_position, new_height= 0, 1
    while current_scroll_position <= new_height:
        current_scroll_position += speed
        driver.execute_script("window.scrollTo(0, {});".format(current_scroll_position))
        new_height = driver.execute_script("return document.body.scrollHeight")

# obtain the list of restaurants based on the predefined criteria
def scrape_opentable_restaurants(keep_open=False, max_restaurants=10):
    # queries OpenTable restaurants in 'Paris' under 'Italian' cousine category, ordered by rating
    url = "https://www.opentable.com/s?term=paris&cuisineIds%5B%5D=48e9d049-40cf-4cb9-98d9-8c47d0d58986&sortBy=rating"
    
    # open the browser
    driver = webdriver.Chrome()
    driver.get(url)
    time.sleep(3)
    # scroll down the page so to have all the restaurants loaded
    scroll_down_page(driver)
    time.sleep(3)

    restaurants = []

    try:
        # Extract restaurant elements
        restaurant_elements = driver.find_elements(By.CLASS_NAME, 'qCITanV81-Y-')

        for restaurant_element in restaurant_elements:
            try:
                restaurant_name = restaurant_element.text # name
                restaurant_link = restaurant_element.get_attribute('href') # link
                restaurant_link = restaurant_link[:-122] # remove parameters
                restaurants.append({'restaurant_name': restaurant_name, 'restaurant_link': restaurant_link})

            except Exception as e:
                print(f"Error extracting restaurants: {e}")
                continue

    except Exception as e:
        print(f"Error during scraping: {e}")
    finally:
        if not keep_open:
            driver.quit()

    return restaurants, driver

# scrape reviews from the given restaurant
def scrape_opentable_reviews(driver, url, keep_open=False, max_reviews=10):
    
    # open the browser if not already open
    if driver is None:
        driver = webdriver.Chrome()
    driver.get(url)
    time.sleep(3)

    reviews = []

    try:
        # scroll down the page so to have all the reviews loaded
        scroll_down_page(driver)
        while len(reviews) < max_reviews:
            review_elements = driver.find_elements(By.CLASS_NAME, 'afkKaa-4T28-')

            for review_element in review_elements:
                try:
                    review_text = review_element.find_element(By.CLASS_NAME, '_6rFG6U7PA6M-').text
                    review_date = review_element.find_element(By.CLASS_NAME, 'iLkEeQbexGs-').text
                    reviews.append({'review_text': review_text, 'review_date': review_date})

                    if len(reviews) >= max_reviews or len(review_elements) < 10:
                        break
                except Exception as e:
                    print(f"Error extracting review: {e}")
                    continue

            # Attempt to click "Load More Reviews" if it exists
            try:
                navigation_buttons = driver.find_elements(By.CLASS_NAME, 'c7WhV7jX-Yc-')
                navigation_buttons[1].click()
                time.sleep(3)
            except Exception as e:
                print(f"Error clicking 'Load More Reviews': {e}")
                break

    except Exception as e:
        print(f"Error during scraping: {e}")
    finally:
        if not keep_open:
            driver.quit()

    return reviews

In [3]:
restaurants, driver = scrape_opentable_restaurants(keep_open=True)

print("restaurants count: " + str(len(restaurants)))

for n, restaurant in enumerate(restaurants):
    print(f"Restaurant: {restaurant['restaurant_name']}")
    reviews = scrape_opentable_reviews(driver, restaurant['restaurant_link'], keep_open= True if n < len(restaurants) else False)
    
    for idx, review in enumerate(reviews, 1):
      print(f"Review {idx}:")
      print(f"Date: {review['review_date']}")
      print(f"Text: {review['review_text'][0:10]}...\n")

restaurants count: 3
Restaurant: Truffes Folies Paris 7
Error clicking 'Load More Reviews': Message: element click intercepted: Element <div class="c7WhV7jX-Yc-" data-test="pagination-next">...</div> is not clickable at point (658, 12). Other element would receive the click: <section class="aIgkg62Jlb4-" data-testid="tabs-container">...</section>
  (Session info: chrome=131.0.6778.139)
Stacktrace:
0   chromedriver                        0x0000000104d63af0 cxxbridge1$str$ptr + 3651580
1   chromedriver                        0x0000000104d5c340 cxxbridge1$str$ptr + 3620940
2   chromedriver                        0x00000001047c44b4 cxxbridge1$string$len + 89224
3   chromedriver                        0x000000010480e12c cxxbridge1$string$len + 391424
4   chromedriver                        0x000000010480c77c cxxbridge1$string$len + 384848
5   chromedriver                        0x000000010480a6c0 cxxbridge1$string$len + 376468
6   chromedriver                        0x0000000104809af8 cxxbr