In [1]:
import csv
from decimal import Decimal
import requests
from bs4 import BeautifulSoup

# Function to extract restaurant information from the Yelp page
def extract_restaurant_info(url):
    page = requests.get(url)
    page.raise_for_status()
    soup = BeautifulSoup(page.text, 'html.parser')

    # Extract restaurant name
    restaurant_name = soup.find("h1", class_="css-1se8maq").text.strip()

    # Extract total reviews
    review_count = soup.find("a", class_="css-19v1rkv").text.strip().split()[0]

    # Extract individual reviews
    reviews = []
    review_elements = soup.find_all("div", class_="css-1qn0b6x")
    unique_reviews = set()  # To store unique review texts

    for review_element in review_elements:
        review_text_element = review_element.find("p", class_="comment__09f24__D0cxf css-qgunke")
        review_text = review_text_element.text.strip() if review_text_element else "Not Found"

        if review_text not in unique_reviews:  # Check for duplicate reviews
            unique_reviews.add(review_text)
            reviewer_element = review_element.find("a", class_="css-19v1rkv")
            reviewer = reviewer_element.text.strip() if reviewer_element else "Not Found"
            
            rating_element = review_element.find("div", class_="five-stars__09f24__mBKym five-stars--regular__09f24__DgBNj css-1jq1ouh")
            rating = None
            if rating_element and "aria-label" in rating_element.attrs:
                rating_label = rating_element["aria-label"]
                rating = Decimal(rating_label.split()[0]) if rating_label else None

            if restaurant_name != "Not Found" and review_count != "Not Found" and review_text != "Not Found":
                reviews.append({"Review_text": review_text, "Reviewer": reviewer, "Rating": rating})

    return restaurant_name, review_count, reviews

# CSV File name
csv_file_name = "restaurant_reviews.csv"

# List of restaurant URLs
restaurant_urls = [
    "https://www.yelp.ca/biz/pai-northern-thai-kitchen-toronto-5?osq=Restaurants",
]

# Write the data to a CSV file
with open(csv_file_name, mode='w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Restaurant Name", "Total Reviews"])  # Write a header row

    for url in restaurant_urls:
        restaurant_name, review_count, reviews = extract_restaurant_info(url)
        csv_writer.writerow([restaurant_name, review_count])

        # Write individual reviews
        csv_writer.writerow(["Review_text", "Reviewer", "Rating"])  # Header for reviews
        for review in reviews:
            csv_writer.writerow([review["Review_text"], review["Reviewer"], review["Rating"]])

print(f"Data has been written to {csv_file_name}")

Data has been written to restaurant_reviews.csv
